airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
- airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
- airbyte_cdk/sources/types.py +2 -4
- airbyte_cdk/sources/utils/transform.py +2 -23
- airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
- airbyte_cdk/utils/mapping_helpers.py +86 -27
- airbyte_cdk/utils/slice_hasher.py +1 -8
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
- airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
- airbyte_cdk/utils/datetime_helpers.py +0 -499
- airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -19,11 +19,7 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
|
19
19
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
|
-
from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
|
23
22
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
24
|
-
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
25
|
-
PerPartitionWithGlobalCursor,
|
26
|
-
)
|
27
23
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
28
24
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
29
25
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -35,9 +31,8 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
35
31
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
36
32
|
ModelToComponentFactory,
|
37
33
|
)
|
38
|
-
from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
|
39
34
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
40
|
-
from airbyte_cdk.sources.declarative.retrievers import
|
35
|
+
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
41
36
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
42
37
|
DeclarativePartitionFactory,
|
43
38
|
StreamSlicerPartitionGenerator,
|
@@ -50,7 +45,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
|
|
50
45
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
51
46
|
AlwaysAvailableAvailabilityStrategy,
|
52
47
|
)
|
53
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import
|
48
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
54
49
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
55
50
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
56
51
|
|
@@ -71,10 +66,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
71
66
|
component_factory: Optional[ModelToComponentFactory] = None,
|
72
67
|
**kwargs: Any,
|
73
68
|
) -> None:
|
74
|
-
# todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
|
75
|
-
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
76
|
-
self._connector_state_manager = ConnectorStateManager(state=state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
77
|
-
|
78
69
|
# To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
|
79
70
|
# cursors. We do this by no longer automatically instantiating RFR cursors when converting
|
80
71
|
# the declarative models into runtime components. Concurrent sources will continue to checkpoint
|
@@ -82,17 +73,19 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
82
73
|
component_factory = component_factory or ModelToComponentFactory(
|
83
74
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
84
75
|
disable_resumable_full_refresh=True,
|
85
|
-
connector_state_manager=self._connector_state_manager,
|
86
76
|
)
|
87
77
|
|
88
78
|
super().__init__(
|
89
79
|
source_config=source_config,
|
90
|
-
config=config,
|
91
80
|
debug=debug,
|
92
81
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
93
82
|
component_factory=component_factory,
|
94
83
|
)
|
95
84
|
|
85
|
+
# todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
|
86
|
+
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
87
|
+
self._state = state
|
88
|
+
|
96
89
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
97
90
|
if concurrency_level_from_manifest:
|
98
91
|
concurrency_level_component = self._constructor.create_component(
|
@@ -182,6 +175,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
182
175
|
concurrent_streams: List[AbstractStream] = []
|
183
176
|
synchronous_streams: List[Stream] = []
|
184
177
|
|
178
|
+
state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
179
|
+
|
185
180
|
# Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
|
186
181
|
# and this is validated during the initialization of the source.
|
187
182
|
streams = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
|
@@ -221,52 +216,45 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
221
216
|
if self._is_datetime_incremental_without_partition_routing(
|
222
217
|
declarative_stream, incremental_sync_component_definition
|
223
218
|
):
|
224
|
-
stream_state =
|
219
|
+
stream_state = state_manager.get_stream_state(
|
225
220
|
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
226
221
|
)
|
227
222
|
|
228
|
-
|
223
|
+
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
224
|
+
state_manager=state_manager,
|
225
|
+
model_type=DatetimeBasedCursorModel,
|
226
|
+
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
227
|
+
stream_name=declarative_stream.name,
|
228
|
+
stream_namespace=declarative_stream.namespace,
|
229
|
+
config=config or {},
|
230
|
+
stream_state=stream_state,
|
231
|
+
)
|
229
232
|
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
233
|
+
retriever = declarative_stream.retriever
|
234
|
+
|
235
|
+
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
236
|
+
# low-code framework because state management is handled through the ConcurrentCursor.
|
237
|
+
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
238
|
+
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
239
|
+
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
240
|
+
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
241
|
+
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
242
|
+
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
243
|
+
if retriever.cursor:
|
244
|
+
retriever.cursor.set_initial_state(stream_state=stream_state)
|
245
|
+
# We zero it out here, but since this is a cursor reference, the state is still properly
|
246
|
+
# instantiated for the other components that reference it
|
247
|
+
retriever.cursor = None
|
234
248
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
declarative_stream.name,
|
245
|
-
declarative_stream.get_json_schema(),
|
246
|
-
retriever,
|
247
|
-
self.message_repository,
|
248
|
-
),
|
249
|
-
stream_slicer=declarative_stream.retriever.stream_slicer,
|
250
|
-
)
|
251
|
-
else:
|
252
|
-
cursor = (
|
253
|
-
self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
254
|
-
model_type=DatetimeBasedCursorModel,
|
255
|
-
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
256
|
-
stream_name=declarative_stream.name,
|
257
|
-
stream_namespace=declarative_stream.namespace,
|
258
|
-
config=config or {},
|
259
|
-
)
|
260
|
-
)
|
261
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
262
|
-
partition_factory=DeclarativePartitionFactory(
|
263
|
-
declarative_stream.name,
|
264
|
-
declarative_stream.get_json_schema(),
|
265
|
-
retriever,
|
266
|
-
self.message_repository,
|
267
|
-
),
|
268
|
-
stream_slicer=cursor,
|
269
|
-
)
|
249
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
250
|
+
DeclarativePartitionFactory(
|
251
|
+
declarative_stream.name,
|
252
|
+
declarative_stream.get_json_schema(),
|
253
|
+
retriever,
|
254
|
+
self.message_repository,
|
255
|
+
),
|
256
|
+
cursor,
|
257
|
+
)
|
270
258
|
|
271
259
|
concurrent_streams.append(
|
272
260
|
DefaultStream(
|
@@ -316,60 +304,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
316
304
|
cursor=final_state_cursor,
|
317
305
|
)
|
318
306
|
)
|
319
|
-
elif (
|
320
|
-
incremental_sync_component_definition
|
321
|
-
and incremental_sync_component_definition.get("type", "")
|
322
|
-
== DatetimeBasedCursorModel.__name__
|
323
|
-
and self._stream_supports_concurrent_partition_processing(
|
324
|
-
declarative_stream=declarative_stream
|
325
|
-
)
|
326
|
-
and hasattr(declarative_stream.retriever, "stream_slicer")
|
327
|
-
and isinstance(
|
328
|
-
declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
|
329
|
-
)
|
330
|
-
):
|
331
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
332
|
-
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
333
|
-
)
|
334
|
-
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
335
|
-
|
336
|
-
perpartition_cursor = (
|
337
|
-
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
338
|
-
state_manager=self._connector_state_manager,
|
339
|
-
model_type=DatetimeBasedCursorModel,
|
340
|
-
component_definition=incremental_sync_component_definition,
|
341
|
-
stream_name=declarative_stream.name,
|
342
|
-
stream_namespace=declarative_stream.namespace,
|
343
|
-
config=config or {},
|
344
|
-
stream_state=stream_state,
|
345
|
-
partition_router=partition_router,
|
346
|
-
)
|
347
|
-
)
|
348
|
-
|
349
|
-
retriever = self._get_retriever(declarative_stream, stream_state)
|
350
|
-
|
351
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
352
|
-
DeclarativePartitionFactory(
|
353
|
-
declarative_stream.name,
|
354
|
-
declarative_stream.get_json_schema(),
|
355
|
-
retriever,
|
356
|
-
self.message_repository,
|
357
|
-
),
|
358
|
-
perpartition_cursor,
|
359
|
-
)
|
360
|
-
|
361
|
-
concurrent_streams.append(
|
362
|
-
DefaultStream(
|
363
|
-
partition_generator=partition_generator,
|
364
|
-
name=declarative_stream.name,
|
365
|
-
json_schema=declarative_stream.get_json_schema(),
|
366
|
-
availability_strategy=AlwaysAvailableAvailabilityStrategy(),
|
367
|
-
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
368
|
-
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
369
|
-
logger=self.logger,
|
370
|
-
cursor=perpartition_cursor,
|
371
|
-
)
|
372
|
-
)
|
373
307
|
else:
|
374
308
|
synchronous_streams.append(declarative_stream)
|
375
309
|
else:
|
@@ -391,10 +325,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
391
325
|
declarative_stream=declarative_stream
|
392
326
|
)
|
393
327
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
394
|
-
and (
|
395
|
-
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
396
|
-
or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
|
397
|
-
)
|
328
|
+
and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
398
329
|
)
|
399
330
|
|
400
331
|
def _stream_supports_concurrent_partition_processing(
|
@@ -463,28 +394,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
463
394
|
return False
|
464
395
|
return True
|
465
396
|
|
466
|
-
@staticmethod
|
467
|
-
def _get_retriever(
|
468
|
-
declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
469
|
-
) -> Retriever:
|
470
|
-
retriever = declarative_stream.retriever
|
471
|
-
|
472
|
-
# This is an optimization so that we don't invoke any cursor or state management flows within the
|
473
|
-
# low-code framework because state management is handled through the ConcurrentCursor.
|
474
|
-
if declarative_stream and isinstance(retriever, SimpleRetriever):
|
475
|
-
# Also a temporary hack. In the legacy Stream implementation, as part of the read,
|
476
|
-
# set_initial_state() is called to instantiate incoming state on the cursor. Although we no
|
477
|
-
# longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
|
478
|
-
# like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
|
479
|
-
# still rely on a DatetimeBasedCursor that is properly initialized with state.
|
480
|
-
if retriever.cursor:
|
481
|
-
retriever.cursor.set_initial_state(stream_state=stream_state)
|
482
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
483
|
-
# instantiated for the other components that reference it
|
484
|
-
retriever.cursor = None
|
485
|
-
|
486
|
-
return retriever
|
487
|
-
|
488
397
|
@staticmethod
|
489
398
|
def _select_streams(
|
490
399
|
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
@@ -320,11 +320,6 @@ definitions:
|
|
320
320
|
title: Stream Count
|
321
321
|
description: Numbers of the streams to try reading from when running a check operation.
|
322
322
|
type: integer
|
323
|
-
use_check_availability:
|
324
|
-
title: Use Check Availability
|
325
|
-
description: Enables stream check availability. This field is automatically set by the CDK.
|
326
|
-
type: boolean
|
327
|
-
default: true
|
328
323
|
CompositeErrorHandler:
|
329
324
|
title: Composite Error Handler
|
330
325
|
description: Error handler that sequentially iterates over a list of error handlers.
|
@@ -789,29 +784,6 @@ definitions:
|
|
789
784
|
type:
|
790
785
|
type: string
|
791
786
|
enum: [DatetimeBasedCursor]
|
792
|
-
clamping:
|
793
|
-
title: Date Range Clamping
|
794
|
-
description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
|
795
|
-
type: object
|
796
|
-
required:
|
797
|
-
- target
|
798
|
-
properties:
|
799
|
-
target:
|
800
|
-
title: Target
|
801
|
-
description: The period of time that datetime windows will be clamped by
|
802
|
-
# This should ideally be an enum. However, we don't use an enum because we want to allow for connectors
|
803
|
-
# to support interpolation on the connector config to get the target which is an arbitrary string
|
804
|
-
type: string
|
805
|
-
interpolation_context:
|
806
|
-
- config
|
807
|
-
examples:
|
808
|
-
- "DAY"
|
809
|
-
- "WEEK"
|
810
|
-
- "MONTH"
|
811
|
-
- "{{ config['target'] }}"
|
812
|
-
target_details:
|
813
|
-
type: object
|
814
|
-
additionalProperties: true
|
815
787
|
cursor_field:
|
816
788
|
title: Cursor Field
|
817
789
|
description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
|
@@ -1086,6 +1058,8 @@ definitions:
|
|
1086
1058
|
type: object
|
1087
1059
|
required:
|
1088
1060
|
- type
|
1061
|
+
- client_id
|
1062
|
+
- client_secret
|
1089
1063
|
properties:
|
1090
1064
|
type:
|
1091
1065
|
type: string
|
@@ -1280,15 +1254,6 @@ definitions:
|
|
1280
1254
|
default: []
|
1281
1255
|
examples:
|
1282
1256
|
- ["invalid_grant", "invalid_permissions"]
|
1283
|
-
profile_assertion:
|
1284
|
-
title: Profile Assertion
|
1285
|
-
description: The authenticator being used to authenticate the client authenticator.
|
1286
|
-
"$ref": "#/definitions/JwtAuthenticator"
|
1287
|
-
use_profile_assertion:
|
1288
|
-
title: Use Profile Assertion
|
1289
|
-
description: Enable using profile assertion as a flow for OAuth authorization.
|
1290
|
-
type: boolean
|
1291
|
-
default: false
|
1292
1257
|
$parameters:
|
1293
1258
|
type: object
|
1294
1259
|
additionalProperties: true
|
@@ -1805,19 +1770,6 @@ definitions:
|
|
1805
1770
|
$parameters:
|
1806
1771
|
type: object
|
1807
1772
|
additionalProperties: true
|
1808
|
-
ComplexFieldType:
|
1809
|
-
title: Schema Field Type
|
1810
|
-
description: (This component is experimental. Use at your own risk.) Represents a complex field type.
|
1811
|
-
type: object
|
1812
|
-
required:
|
1813
|
-
- field_type
|
1814
|
-
properties:
|
1815
|
-
field_type:
|
1816
|
-
type: string
|
1817
|
-
items:
|
1818
|
-
anyOf:
|
1819
|
-
- type: string
|
1820
|
-
- "$ref": "#/definitions/ComplexFieldType"
|
1821
1773
|
TypesMap:
|
1822
1774
|
title: Types Map
|
1823
1775
|
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
|
@@ -1832,7 +1784,6 @@ definitions:
|
|
1832
1784
|
- type: array
|
1833
1785
|
items:
|
1834
1786
|
type: string
|
1835
|
-
- "$ref": "#/definitions/ComplexFieldType"
|
1836
1787
|
current_type:
|
1837
1788
|
anyOf:
|
1838
1789
|
- type: string
|
@@ -2847,25 +2798,35 @@ definitions:
|
|
2847
2798
|
enum: [RequestPath]
|
2848
2799
|
RequestOption:
|
2849
2800
|
title: Request Option
|
2850
|
-
description: Specifies the key field and where in the request a component's value should be injected.
|
2801
|
+
description: Specifies the key field or path and where in the request a component's value should be injected.
|
2851
2802
|
type: object
|
2852
2803
|
required:
|
2853
2804
|
- type
|
2854
|
-
- field_name
|
2855
2805
|
- inject_into
|
2856
2806
|
properties:
|
2857
2807
|
type:
|
2858
2808
|
type: string
|
2859
2809
|
enum: [RequestOption]
|
2860
2810
|
field_name:
|
2861
|
-
title:
|
2862
|
-
description: Configures which key should be used in the location that the descriptor is being injected into
|
2811
|
+
title: Field Name
|
2812
|
+
description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
|
2863
2813
|
type: string
|
2864
2814
|
examples:
|
2865
2815
|
- segment_id
|
2866
2816
|
interpolation_context:
|
2867
2817
|
- config
|
2868
2818
|
- parameters
|
2819
|
+
field_path:
|
2820
|
+
title: Field Path
|
2821
|
+
description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
|
2822
|
+
type: array
|
2823
|
+
items:
|
2824
|
+
type: string
|
2825
|
+
examples:
|
2826
|
+
- ["data", "viewer", "id"]
|
2827
|
+
interpolation_context:
|
2828
|
+
- config
|
2829
|
+
- parameters
|
2869
2830
|
inject_into:
|
2870
2831
|
title: Inject Into
|
2871
2832
|
description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
|
@@ -138,9 +138,7 @@ class DeclarativeStream(Stream):
|
|
138
138
|
"""
|
139
139
|
:param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
|
140
140
|
"""
|
141
|
-
if stream_slice is None or
|
142
|
-
not isinstance(stream_slice, StreamSlice) and stream_slice == {}
|
143
|
-
):
|
141
|
+
if stream_slice is None or stream_slice == {}:
|
144
142
|
# As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
|
145
143
|
# As part of the declarative model without custom components, this should never happen as the CDK would wire up a
|
146
144
|
# SinglePartitionRouter that would create this StreamSlice properly
|
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
62
|
+
date_time_based_cursor: DatetimeBasedCursor,
|
63
|
+
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
63
64
|
**kwargs: Any,
|
64
65
|
):
|
65
66
|
super().__init__(**kwargs)
|
66
|
-
self.
|
67
|
+
self._date_time_based_cursor = date_time_based_cursor
|
68
|
+
self._substream_cursor = substream_cursor
|
67
69
|
|
68
70
|
def filter_records(
|
69
71
|
self,
|
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
75
77
|
records = (
|
76
78
|
record
|
77
79
|
for record in records
|
78
|
-
if self.
|
80
|
+
if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
|
79
81
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
80
82
|
# Record stream name is empty cause it is not used durig the filtering
|
81
83
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,10 +2,6 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
6
|
-
ConcurrentCursorFactory,
|
7
|
-
ConcurrentPerPartitionCursor,
|
8
|
-
)
|
9
5
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
10
6
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
11
7
|
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
@@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
25
21
|
|
26
22
|
__all__ = [
|
27
23
|
"CursorFactory",
|
28
|
-
"ConcurrentCursorFactory",
|
29
|
-
"ConcurrentPerPartitionCursor",
|
30
24
|
"DatetimeBasedCursor",
|
31
25
|
"DeclarativeCursor",
|
32
26
|
"GlobalSubstreamCursor",
|
@@ -365,14 +365,15 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
365
365
|
options: MutableMapping[str, Any] = {}
|
366
366
|
if not stream_slice:
|
367
367
|
return options
|
368
|
+
|
368
369
|
if self.start_time_option and self.start_time_option.inject_into == option_type:
|
369
|
-
|
370
|
-
|
371
|
-
|
370
|
+
start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
|
371
|
+
self.start_time_option.inject_into_request(options, start_time_value, self.config)
|
372
|
+
|
372
373
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
373
|
-
|
374
|
-
|
375
|
-
|
374
|
+
end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
|
375
|
+
self.end_time_option.inject_into_request(options, end_time_value, self.config)
|
376
|
+
|
376
377
|
return options
|
377
378
|
|
378
379
|
def should_be_synced(self, record: Record) -> bool:
|
@@ -222,8 +222,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
222
222
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
223
223
|
) -> Mapping[str, Any]:
|
224
224
|
if stream_slice:
|
225
|
-
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
|
226
|
-
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
|
227
225
|
return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
|
228
226
|
stream_state=stream_state,
|
229
227
|
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
@@ -246,8 +244,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
246
244
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
247
245
|
) -> Mapping[str, Any]:
|
248
246
|
if stream_slice:
|
249
|
-
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
|
250
|
-
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
|
251
247
|
return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
|
252
248
|
stream_state=stream_state,
|
253
249
|
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
@@ -270,8 +266,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
270
266
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
271
267
|
) -> Union[Mapping[str, Any], str]:
|
272
268
|
if stream_slice:
|
273
|
-
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
|
274
|
-
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
|
275
269
|
return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
|
276
270
|
stream_state=stream_state,
|
277
271
|
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
@@ -294,8 +288,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
294
288
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
295
289
|
) -> Mapping[str, Any]:
|
296
290
|
if stream_slice:
|
297
|
-
if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
|
298
|
-
self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
|
299
291
|
return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
|
300
292
|
stream_state=stream_state,
|
301
293
|
stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
|
@@ -349,32 +341,8 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
349
341
|
)
|
350
342
|
partition_key = self._to_partition_key(record.associated_slice.partition)
|
351
343
|
if partition_key not in self._cursor_per_partition:
|
352
|
-
|
344
|
+
raise ValueError(
|
345
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
346
|
+
)
|
353
347
|
cursor = self._cursor_per_partition[partition_key]
|
354
348
|
return cursor
|
355
|
-
|
356
|
-
def _create_cursor_for_partition(self, partition_key: str) -> None:
|
357
|
-
"""
|
358
|
-
Dynamically creates and initializes a cursor for the specified partition.
|
359
|
-
|
360
|
-
This method is required for `ConcurrentPerPartitionCursor`. For concurrent cursors,
|
361
|
-
stream_slices is executed only for the concurrent cursor, so cursors per partition
|
362
|
-
are not created for the declarative cursor. This method ensures that a cursor is available
|
363
|
-
to create requests for the specified partition. The cursor is initialized
|
364
|
-
with the per-partition state if present in the initial state, or with the global state
|
365
|
-
adjusted by the lookback window, or with the state to migrate from.
|
366
|
-
|
367
|
-
Note:
|
368
|
-
This is a temporary workaround and should be removed once the declarative cursor
|
369
|
-
is decoupled from the concurrent cursor implementation.
|
370
|
-
|
371
|
-
Args:
|
372
|
-
partition_key (str): The unique identifier for the partition for which the cursor
|
373
|
-
needs to be created.
|
374
|
-
"""
|
375
|
-
partition_state = (
|
376
|
-
self._state_to_migrate_from if self._state_to_migrate_from else self._NO_CURSOR_STATE
|
377
|
-
)
|
378
|
-
cursor = self._create_cursor(partition_state)
|
379
|
-
|
380
|
-
self._cursor_per_partition[partition_key] = cursor
|
@@ -7,7 +7,6 @@ import logging
|
|
7
7
|
import pkgutil
|
8
8
|
from copy import deepcopy
|
9
9
|
from importlib import metadata
|
10
|
-
from types import ModuleType
|
11
10
|
from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
|
12
11
|
|
13
12
|
import yaml
|
@@ -26,13 +25,13 @@ from airbyte_cdk.models import (
|
|
26
25
|
from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
|
27
26
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
28
27
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
28
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
|
+
CheckStream as CheckStreamModel,
|
30
|
+
)
|
29
31
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
30
32
|
DeclarativeStream as DeclarativeStreamModel,
|
31
33
|
)
|
32
34
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
33
|
-
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
34
|
-
get_registered_components_module,
|
35
|
-
)
|
36
35
|
from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
|
37
36
|
ManifestComponentTransformer,
|
38
37
|
)
|
@@ -60,29 +59,22 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
60
59
|
def __init__(
|
61
60
|
self,
|
62
61
|
source_config: ConnectionDefinition,
|
63
|
-
*,
|
64
|
-
config: Mapping[str, Any] | None = None,
|
65
62
|
debug: bool = False,
|
66
63
|
emit_connector_builder_messages: bool = False,
|
67
64
|
component_factory: Optional[ModelToComponentFactory] = None,
|
68
65
|
):
|
69
66
|
"""
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
debug: True if debug mode is enabled.
|
74
|
-
emit_connector_builder_messages: True if messages should be emitted to the connector builder.
|
75
|
-
component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
|
67
|
+
:param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector
|
68
|
+
:param debug(bool): True if debug mode is enabled
|
69
|
+
:param component_factory(ModelToComponentFactory): optional factory if ModelToComponentFactory's default behaviour needs to be tweaked
|
76
70
|
"""
|
77
71
|
self.logger = logging.getLogger(f"airbyte.{self.name}")
|
72
|
+
|
78
73
|
# For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
|
79
74
|
manifest = dict(source_config)
|
80
75
|
if "type" not in manifest:
|
81
76
|
manifest["type"] = "DeclarativeSource"
|
82
77
|
|
83
|
-
# If custom components are needed, locate and/or register them.
|
84
|
-
self.components_module: ModuleType | None = get_registered_components_module(config=config)
|
85
|
-
|
86
78
|
resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
|
87
79
|
propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
|
88
80
|
"", resolved_source_config, {}
|