airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
  11. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  14. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  15. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  16. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  22. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
  24. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  25. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  26. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  27. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  28. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  29. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  30. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
  31. airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
  32. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
  33. airbyte_cdk/sources/http_logger.py +1 -1
  34. airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
  38. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
  39. airbyte_cdk/sources/types.py +2 -4
  40. airbyte_cdk/sources/utils/transform.py +2 -23
  41. airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
  42. airbyte_cdk/utils/mapping_helpers.py +86 -27
  43. airbyte_cdk/utils/slice_hasher.py +1 -8
  44. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
  45. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
  46. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
  47. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
  48. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
  49. airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
  50. airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
  51. airbyte_cdk/utils/datetime_helpers.py +0 -499
  52. airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
  53. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
  54. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -19,11 +19,7 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
19
19
  from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
20
  ClientSideIncrementalRecordFilterDecorator,
21
21
  )
22
- from airbyte_cdk.sources.declarative.incremental import ConcurrentPerPartitionCursor
23
22
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
24
- from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
25
- PerPartitionWithGlobalCursor,
26
- )
27
23
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
28
24
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
29
25
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -35,9 +31,8 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
35
31
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
36
32
  ModelToComponentFactory,
37
33
  )
38
- from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
39
34
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
40
- from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
35
+ from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
41
36
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
42
37
  DeclarativePartitionFactory,
43
38
  StreamSlicerPartitionGenerator,
@@ -50,7 +45,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
50
45
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
51
46
  AlwaysAvailableAvailabilityStrategy,
52
47
  )
53
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
48
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
54
49
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
55
50
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
56
51
 
@@ -71,10 +66,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
71
66
  component_factory: Optional[ModelToComponentFactory] = None,
72
67
  **kwargs: Any,
73
68
  ) -> None:
74
- # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
75
- # no longer needs to store the original incoming state. But maybe there's an edge case?
76
- self._connector_state_manager = ConnectorStateManager(state=state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
77
-
78
69
  # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
79
70
  # cursors. We do this by no longer automatically instantiating RFR cursors when converting
80
71
  # the declarative models into runtime components. Concurrent sources will continue to checkpoint
@@ -82,17 +73,19 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
82
73
  component_factory = component_factory or ModelToComponentFactory(
83
74
  emit_connector_builder_messages=emit_connector_builder_messages,
84
75
  disable_resumable_full_refresh=True,
85
- connector_state_manager=self._connector_state_manager,
86
76
  )
87
77
 
88
78
  super().__init__(
89
79
  source_config=source_config,
90
- config=config,
91
80
  debug=debug,
92
81
  emit_connector_builder_messages=emit_connector_builder_messages,
93
82
  component_factory=component_factory,
94
83
  )
95
84
 
85
+ # todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
86
+ # no longer needs to store the original incoming state. But maybe there's an edge case?
87
+ self._state = state
88
+
96
89
  concurrency_level_from_manifest = self._source_config.get("concurrency_level")
97
90
  if concurrency_level_from_manifest:
98
91
  concurrency_level_component = self._constructor.create_component(
@@ -182,6 +175,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
182
175
  concurrent_streams: List[AbstractStream] = []
183
176
  synchronous_streams: List[Stream] = []
184
177
 
178
+ state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
179
+
185
180
  # Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
186
181
  # and this is validated during the initialization of the source.
187
182
  streams = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
@@ -221,52 +216,45 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
221
216
  if self._is_datetime_incremental_without_partition_routing(
222
217
  declarative_stream, incremental_sync_component_definition
223
218
  ):
224
- stream_state = self._connector_state_manager.get_stream_state(
219
+ stream_state = state_manager.get_stream_state(
225
220
  stream_name=declarative_stream.name, namespace=declarative_stream.namespace
226
221
  )
227
222
 
228
- retriever = self._get_retriever(declarative_stream, stream_state)
223
+ cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
224
+ state_manager=state_manager,
225
+ model_type=DatetimeBasedCursorModel,
226
+ component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
227
+ stream_name=declarative_stream.name,
228
+ stream_namespace=declarative_stream.namespace,
229
+ config=config or {},
230
+ stream_state=stream_state,
231
+ )
229
232
 
230
- if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
231
- declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
232
- ):
233
- cursor = declarative_stream.retriever.stream_slicer.stream_slicer
233
+ retriever = declarative_stream.retriever
234
+
235
+ # This is an optimization so that we don't invoke any cursor or state management flows within the
236
+ # low-code framework because state management is handled through the ConcurrentCursor.
237
+ if declarative_stream and isinstance(retriever, SimpleRetriever):
238
+ # Also a temporary hack. In the legacy Stream implementation, as part of the read,
239
+ # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
240
+ # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
241
+ # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
242
+ # still rely on a DatetimeBasedCursor that is properly initialized with state.
243
+ if retriever.cursor:
244
+ retriever.cursor.set_initial_state(stream_state=stream_state)
245
+ # We zero it out here, but since this is a cursor reference, the state is still properly
246
+ # instantiated for the other components that reference it
247
+ retriever.cursor = None
234
248
 
235
- if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
236
- # This should never happen since we instantiate ConcurrentCursor in
237
- # model_to_component_factory.py
238
- raise ValueError(
239
- f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
240
- )
241
-
242
- partition_generator = StreamSlicerPartitionGenerator(
243
- partition_factory=DeclarativePartitionFactory(
244
- declarative_stream.name,
245
- declarative_stream.get_json_schema(),
246
- retriever,
247
- self.message_repository,
248
- ),
249
- stream_slicer=declarative_stream.retriever.stream_slicer,
250
- )
251
- else:
252
- cursor = (
253
- self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
254
- model_type=DatetimeBasedCursorModel,
255
- component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
256
- stream_name=declarative_stream.name,
257
- stream_namespace=declarative_stream.namespace,
258
- config=config or {},
259
- )
260
- )
261
- partition_generator = StreamSlicerPartitionGenerator(
262
- partition_factory=DeclarativePartitionFactory(
263
- declarative_stream.name,
264
- declarative_stream.get_json_schema(),
265
- retriever,
266
- self.message_repository,
267
- ),
268
- stream_slicer=cursor,
269
- )
249
+ partition_generator = StreamSlicerPartitionGenerator(
250
+ DeclarativePartitionFactory(
251
+ declarative_stream.name,
252
+ declarative_stream.get_json_schema(),
253
+ retriever,
254
+ self.message_repository,
255
+ ),
256
+ cursor,
257
+ )
270
258
 
271
259
  concurrent_streams.append(
272
260
  DefaultStream(
@@ -316,60 +304,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
316
304
  cursor=final_state_cursor,
317
305
  )
318
306
  )
319
- elif (
320
- incremental_sync_component_definition
321
- and incremental_sync_component_definition.get("type", "")
322
- == DatetimeBasedCursorModel.__name__
323
- and self._stream_supports_concurrent_partition_processing(
324
- declarative_stream=declarative_stream
325
- )
326
- and hasattr(declarative_stream.retriever, "stream_slicer")
327
- and isinstance(
328
- declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
329
- )
330
- ):
331
- stream_state = self._connector_state_manager.get_stream_state(
332
- stream_name=declarative_stream.name, namespace=declarative_stream.namespace
333
- )
334
- partition_router = declarative_stream.retriever.stream_slicer._partition_router
335
-
336
- perpartition_cursor = (
337
- self._constructor.create_concurrent_cursor_from_perpartition_cursor(
338
- state_manager=self._connector_state_manager,
339
- model_type=DatetimeBasedCursorModel,
340
- component_definition=incremental_sync_component_definition,
341
- stream_name=declarative_stream.name,
342
- stream_namespace=declarative_stream.namespace,
343
- config=config or {},
344
- stream_state=stream_state,
345
- partition_router=partition_router,
346
- )
347
- )
348
-
349
- retriever = self._get_retriever(declarative_stream, stream_state)
350
-
351
- partition_generator = StreamSlicerPartitionGenerator(
352
- DeclarativePartitionFactory(
353
- declarative_stream.name,
354
- declarative_stream.get_json_schema(),
355
- retriever,
356
- self.message_repository,
357
- ),
358
- perpartition_cursor,
359
- )
360
-
361
- concurrent_streams.append(
362
- DefaultStream(
363
- partition_generator=partition_generator,
364
- name=declarative_stream.name,
365
- json_schema=declarative_stream.get_json_schema(),
366
- availability_strategy=AlwaysAvailableAvailabilityStrategy(),
367
- primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
368
- cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
369
- logger=self.logger,
370
- cursor=perpartition_cursor,
371
- )
372
- )
373
307
  else:
374
308
  synchronous_streams.append(declarative_stream)
375
309
  else:
@@ -391,10 +325,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
391
325
  declarative_stream=declarative_stream
392
326
  )
393
327
  and hasattr(declarative_stream.retriever, "stream_slicer")
394
- and (
395
- isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
396
- or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
397
- )
328
+ and isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
398
329
  )
399
330
 
400
331
  def _stream_supports_concurrent_partition_processing(
@@ -463,28 +394,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
463
394
  return False
464
395
  return True
465
396
 
466
- @staticmethod
467
- def _get_retriever(
468
- declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
469
- ) -> Retriever:
470
- retriever = declarative_stream.retriever
471
-
472
- # This is an optimization so that we don't invoke any cursor or state management flows within the
473
- # low-code framework because state management is handled through the ConcurrentCursor.
474
- if declarative_stream and isinstance(retriever, SimpleRetriever):
475
- # Also a temporary hack. In the legacy Stream implementation, as part of the read,
476
- # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
477
- # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
478
- # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
479
- # still rely on a DatetimeBasedCursor that is properly initialized with state.
480
- if retriever.cursor:
481
- retriever.cursor.set_initial_state(stream_state=stream_state)
482
- # We zero it out here, but since this is a cursor reference, the state is still properly
483
- # instantiated for the other components that reference it
484
- retriever.cursor = None
485
-
486
- return retriever
487
-
488
397
  @staticmethod
489
398
  def _select_streams(
490
399
  streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
@@ -320,11 +320,6 @@ definitions:
320
320
  title: Stream Count
321
321
  description: Numbers of the streams to try reading from when running a check operation.
322
322
  type: integer
323
- use_check_availability:
324
- title: Use Check Availability
325
- description: Enables stream check availability. This field is automatically set by the CDK.
326
- type: boolean
327
- default: true
328
323
  CompositeErrorHandler:
329
324
  title: Composite Error Handler
330
325
  description: Error handler that sequentially iterates over a list of error handlers.
@@ -789,29 +784,6 @@ definitions:
789
784
  type:
790
785
  type: string
791
786
  enum: [DatetimeBasedCursor]
792
- clamping:
793
- title: Date Range Clamping
794
- description: This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)
795
- type: object
796
- required:
797
- - target
798
- properties:
799
- target:
800
- title: Target
801
- description: The period of time that datetime windows will be clamped by
802
- # This should ideally be an enum. However, we don't use an enum because we want to allow for connectors
803
- # to support interpolation on the connector config to get the target which is an arbitrary string
804
- type: string
805
- interpolation_context:
806
- - config
807
- examples:
808
- - "DAY"
809
- - "WEEK"
810
- - "MONTH"
811
- - "{{ config['target'] }}"
812
- target_details:
813
- type: object
814
- additionalProperties: true
815
787
  cursor_field:
816
788
  title: Cursor Field
817
789
  description: The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.
@@ -1086,6 +1058,8 @@ definitions:
1086
1058
  type: object
1087
1059
  required:
1088
1060
  - type
1061
+ - client_id
1062
+ - client_secret
1089
1063
  properties:
1090
1064
  type:
1091
1065
  type: string
@@ -1280,15 +1254,6 @@ definitions:
1280
1254
  default: []
1281
1255
  examples:
1282
1256
  - ["invalid_grant", "invalid_permissions"]
1283
- profile_assertion:
1284
- title: Profile Assertion
1285
- description: The authenticator being used to authenticate the client authenticator.
1286
- "$ref": "#/definitions/JwtAuthenticator"
1287
- use_profile_assertion:
1288
- title: Use Profile Assertion
1289
- description: Enable using profile assertion as a flow for OAuth authorization.
1290
- type: boolean
1291
- default: false
1292
1257
  $parameters:
1293
1258
  type: object
1294
1259
  additionalProperties: true
@@ -1805,19 +1770,6 @@ definitions:
1805
1770
  $parameters:
1806
1771
  type: object
1807
1772
  additionalProperties: true
1808
- ComplexFieldType:
1809
- title: Schema Field Type
1810
- description: (This component is experimental. Use at your own risk.) Represents a complex field type.
1811
- type: object
1812
- required:
1813
- - field_type
1814
- properties:
1815
- field_type:
1816
- type: string
1817
- items:
1818
- anyOf:
1819
- - type: string
1820
- - "$ref": "#/definitions/ComplexFieldType"
1821
1773
  TypesMap:
1822
1774
  title: Types Map
1823
1775
  description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
@@ -1832,7 +1784,6 @@ definitions:
1832
1784
  - type: array
1833
1785
  items:
1834
1786
  type: string
1835
- - "$ref": "#/definitions/ComplexFieldType"
1836
1787
  current_type:
1837
1788
  anyOf:
1838
1789
  - type: string
@@ -2847,25 +2798,35 @@ definitions:
2847
2798
  enum: [RequestPath]
2848
2799
  RequestOption:
2849
2800
  title: Request Option
2850
- description: Specifies the key field and where in the request a component's value should be injected.
2801
+ description: Specifies the key field or path and where in the request a component's value should be injected.
2851
2802
  type: object
2852
2803
  required:
2853
2804
  - type
2854
- - field_name
2855
2805
  - inject_into
2856
2806
  properties:
2857
2807
  type:
2858
2808
  type: string
2859
2809
  enum: [RequestOption]
2860
2810
  field_name:
2861
- title: Request Option
2862
- description: Configures which key should be used in the location that the descriptor is being injected into
2811
+ title: Field Name
2812
+ description: Configures which key should be used in the location that the descriptor is being injected into. We hope to eventually deprecate this field in favor of `field_path` for all request_options, but must currently maintain it for backwards compatibility in the Builder.
2863
2813
  type: string
2864
2814
  examples:
2865
2815
  - segment_id
2866
2816
  interpolation_context:
2867
2817
  - config
2868
2818
  - parameters
2819
+ field_path:
2820
+ title: Field Path
2821
+ description: Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)
2822
+ type: array
2823
+ items:
2824
+ type: string
2825
+ examples:
2826
+ - ["data", "viewer", "id"]
2827
+ interpolation_context:
2828
+ - config
2829
+ - parameters
2869
2830
  inject_into:
2870
2831
  title: Inject Into
2871
2832
  description: Configures where the descriptor should be set on the HTTP requests. Note that request parameters that are already encoded in the URL path will not be duplicated.
@@ -138,9 +138,7 @@ class DeclarativeStream(Stream):
138
138
  """
139
139
  :param: stream_state We knowingly avoid using stream_state as we want cursors to manage their own state.
140
140
  """
141
- if stream_slice is None or (
142
- not isinstance(stream_slice, StreamSlice) and stream_slice == {}
143
- ):
141
+ if stream_slice is None or stream_slice == {}:
144
142
  # As the parameter is Optional, many would just call `read_records(sync_mode)` during testing without specifying the field
145
143
  # As part of the declarative model without custom components, this should never happen as the CDK would wire up a
146
144
  # SinglePartitionRouter that would create this StreamSlice properly
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
59
59
 
60
60
  def __init__(
61
61
  self,
62
- cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
62
+ date_time_based_cursor: DatetimeBasedCursor,
63
+ substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
63
64
  **kwargs: Any,
64
65
  ):
65
66
  super().__init__(**kwargs)
66
- self._cursor = cursor
67
+ self._date_time_based_cursor = date_time_based_cursor
68
+ self._substream_cursor = substream_cursor
67
69
 
68
70
  def filter_records(
69
71
  self,
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
75
77
  records = (
76
78
  record
77
79
  for record in records
78
- if self._cursor.should_be_synced(
80
+ if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
79
81
  # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
80
82
  # Record stream name is empty cause it is not used durig the filtering
81
83
  Record(data=record, associated_slice=stream_slice, stream_name="")
@@ -2,10 +2,6 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
6
- ConcurrentCursorFactory,
7
- ConcurrentPerPartitionCursor,
8
- )
9
5
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
10
6
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
11
7
  from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
@@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
25
21
 
26
22
  __all__ = [
27
23
  "CursorFactory",
28
- "ConcurrentCursorFactory",
29
- "ConcurrentPerPartitionCursor",
30
24
  "DatetimeBasedCursor",
31
25
  "DeclarativeCursor",
32
26
  "GlobalSubstreamCursor",
@@ -365,14 +365,15 @@ class DatetimeBasedCursor(DeclarativeCursor):
365
365
  options: MutableMapping[str, Any] = {}
366
366
  if not stream_slice:
367
367
  return options
368
+
368
369
  if self.start_time_option and self.start_time_option.inject_into == option_type:
369
- options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
370
- self._partition_field_start.eval(self.config)
371
- )
370
+ start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
371
+ self.start_time_option.inject_into_request(options, start_time_value, self.config)
372
+
372
373
  if self.end_time_option and self.end_time_option.inject_into == option_type:
373
- options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
374
- self._partition_field_end.eval(self.config)
375
- )
374
+ end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
375
+ self.end_time_option.inject_into_request(options, end_time_value, self.config)
376
+
376
377
  return options
377
378
 
378
379
  def should_be_synced(self, record: Record) -> bool:
@@ -64,9 +64,6 @@ class Timer:
64
64
  else:
65
65
  raise RuntimeError("Global substream cursor timer not started")
66
66
 
67
- def is_running(self) -> bool:
68
- return self._start is not None
69
-
70
67
 
71
68
  class GlobalSubstreamCursor(DeclarativeCursor):
72
69
  """
@@ -222,8 +222,6 @@ class PerPartitionCursor(DeclarativeCursor):
222
222
  next_page_token: Optional[Mapping[str, Any]] = None,
223
223
  ) -> Mapping[str, Any]:
224
224
  if stream_slice:
225
- if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
226
- self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
227
225
  return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
228
226
  stream_state=stream_state,
229
227
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -246,8 +244,6 @@ class PerPartitionCursor(DeclarativeCursor):
246
244
  next_page_token: Optional[Mapping[str, Any]] = None,
247
245
  ) -> Mapping[str, Any]:
248
246
  if stream_slice:
249
- if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
250
- self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
251
247
  return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
252
248
  stream_state=stream_state,
253
249
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -270,8 +266,6 @@ class PerPartitionCursor(DeclarativeCursor):
270
266
  next_page_token: Optional[Mapping[str, Any]] = None,
271
267
  ) -> Union[Mapping[str, Any], str]:
272
268
  if stream_slice:
273
- if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
274
- self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
275
269
  return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
276
270
  stream_state=stream_state,
277
271
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -294,8 +288,6 @@ class PerPartitionCursor(DeclarativeCursor):
294
288
  next_page_token: Optional[Mapping[str, Any]] = None,
295
289
  ) -> Mapping[str, Any]:
296
290
  if stream_slice:
297
- if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
298
- self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
299
291
  return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
300
292
  stream_state=stream_state,
301
293
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -349,32 +341,8 @@ class PerPartitionCursor(DeclarativeCursor):
349
341
  )
350
342
  partition_key = self._to_partition_key(record.associated_slice.partition)
351
343
  if partition_key not in self._cursor_per_partition:
352
- self._create_cursor_for_partition(partition_key)
344
+ raise ValueError(
345
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
346
+ )
353
347
  cursor = self._cursor_per_partition[partition_key]
354
348
  return cursor
355
-
356
- def _create_cursor_for_partition(self, partition_key: str) -> None:
357
- """
358
- Dynamically creates and initializes a cursor for the specified partition.
359
-
360
- This method is required for `ConcurrentPerPartitionCursor`. For concurrent cursors,
361
- stream_slices is executed only for the concurrent cursor, so cursors per partition
362
- are not created for the declarative cursor. This method ensures that a cursor is available
363
- to create requests for the specified partition. The cursor is initialized
364
- with the per-partition state if present in the initial state, or with the global state
365
- adjusted by the lookback window, or with the state to migrate from.
366
-
367
- Note:
368
- This is a temporary workaround and should be removed once the declarative cursor
369
- is decoupled from the concurrent cursor implementation.
370
-
371
- Args:
372
- partition_key (str): The unique identifier for the partition for which the cursor
373
- needs to be created.
374
- """
375
- partition_state = (
376
- self._state_to_migrate_from if self._state_to_migrate_from else self._NO_CURSOR_STATE
377
- )
378
- cursor = self._create_cursor(partition_state)
379
-
380
- self._cursor_per_partition[partition_key] = cursor
@@ -7,7 +7,6 @@ import logging
7
7
  import pkgutil
8
8
  from copy import deepcopy
9
9
  from importlib import metadata
10
- from types import ModuleType
11
10
  from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
12
11
 
13
12
  import yaml
@@ -26,13 +25,13 @@ from airbyte_cdk.models import (
26
25
  from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
27
26
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
28
27
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
28
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
+ CheckStream as CheckStreamModel,
30
+ )
29
31
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
30
32
  DeclarativeStream as DeclarativeStreamModel,
31
33
  )
32
34
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
33
- from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
34
- get_registered_components_module,
35
- )
36
35
  from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
37
36
  ManifestComponentTransformer,
38
37
  )
@@ -60,29 +59,22 @@ class ManifestDeclarativeSource(DeclarativeSource):
60
59
  def __init__(
61
60
  self,
62
61
  source_config: ConnectionDefinition,
63
- *,
64
- config: Mapping[str, Any] | None = None,
65
62
  debug: bool = False,
66
63
  emit_connector_builder_messages: bool = False,
67
64
  component_factory: Optional[ModelToComponentFactory] = None,
68
65
  ):
69
66
  """
70
- Args:
71
- config: The provided config dict.
72
- source_config: The manifest of low-code components that describe the source connector.
73
- debug: True if debug mode is enabled.
74
- emit_connector_builder_messages: True if messages should be emitted to the connector builder.
75
- component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
67
+ :param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector
68
+ :param debug(bool): True if debug mode is enabled
69
+ :param component_factory(ModelToComponentFactory): optional factory if ModelToComponentFactory's default behaviour needs to be tweaked
76
70
  """
77
71
  self.logger = logging.getLogger(f"airbyte.{self.name}")
72
+
78
73
  # For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
79
74
  manifest = dict(source_config)
80
75
  if "type" not in manifest:
81
76
  manifest["type"] = "DeclarativeSource"
82
77
 
83
- # If custom components are needed, locate and/or register them.
84
- self.components_module: ModuleType | None = get_registered_components_module(config=config)
85
-
86
78
  resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
87
79
  propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
88
80
  "", resolved_source_config, {}