airbyte-cdk 6.17.1.dev1__py3-none-any.whl → 6.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,9 +20,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
20
  ClientSideIncrementalRecordFilterDecorator,
21
21
  )
22
22
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
23
- from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
24
- PerPartitionWithGlobalCursor,
25
- )
26
23
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
27
24
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
28
25
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -307,72 +304,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
307
304
  cursor=final_state_cursor,
308
305
  )
309
306
  )
310
- elif (
311
- incremental_sync_component_definition
312
- and incremental_sync_component_definition.get("type", "")
313
- == DatetimeBasedCursorModel.__name__
314
- and self._stream_supports_concurrent_partition_processing(
315
- declarative_stream=declarative_stream
316
- )
317
- and hasattr(declarative_stream.retriever, "stream_slicer")
318
- and isinstance(
319
- declarative_stream.retriever.stream_slicer, PerPartitionWithGlobalCursor
320
- )
321
- ):
322
- stream_state = state_manager.get_stream_state(
323
- stream_name=declarative_stream.name, namespace=declarative_stream.namespace
324
- )
325
- partition_router = declarative_stream.retriever.stream_slicer._partition_router
326
-
327
- cursor = self._constructor.create_concurrent_cursor_from_perpartition_cursor(
328
- state_manager=state_manager,
329
- model_type=DatetimeBasedCursorModel,
330
- component_definition=incremental_sync_component_definition,
331
- stream_name=declarative_stream.name,
332
- stream_namespace=declarative_stream.namespace,
333
- config=config or {},
334
- stream_state=stream_state,
335
- partition_router=partition_router,
336
- )
337
-
338
- retriever = declarative_stream.retriever
339
-
340
- # This is an optimization so that we don't invoke any cursor or state management flows within the
341
- # low-code framework because state management is handled through the ConcurrentCursor.
342
- if declarative_stream and isinstance(retriever, SimpleRetriever):
343
- # Also a temporary hack. In the legacy Stream implementation, as part of the read,
344
- # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
345
- # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
346
- # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
347
- # still rely on a DatetimeBasedCursor that is properly initialized with state.
348
- if retriever.cursor:
349
- retriever.cursor.set_initial_state(stream_state=stream_state)
350
- # We zero it out here, but since this is a cursor reference, the state is still properly
351
- # instantiated for the other components that reference it
352
- retriever.cursor = None
353
-
354
- partition_generator = StreamSlicerPartitionGenerator(
355
- DeclarativePartitionFactory(
356
- declarative_stream.name,
357
- declarative_stream.get_json_schema(),
358
- retriever,
359
- self.message_repository,
360
- ),
361
- cursor,
362
- )
363
-
364
- concurrent_streams.append(
365
- DefaultStream(
366
- partition_generator=partition_generator,
367
- name=declarative_stream.name,
368
- json_schema=declarative_stream.get_json_schema(),
369
- availability_strategy=AlwaysAvailableAvailabilityStrategy(),
370
- primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
371
- cursor_field=cursor.cursor_field.cursor_field_key,
372
- logger=self.logger,
373
- cursor=cursor,
374
- )
375
- )
376
307
  else:
377
308
  synchronous_streams.append(declarative_stream)
378
309
  else:
@@ -2977,6 +2977,11 @@ definitions:
2977
2977
  anyOf:
2978
2978
  - "$ref": "#/definitions/CustomRequester"
2979
2979
  - "$ref": "#/definitions/HttpRequester"
2980
+ url_requester:
2981
+ description: Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.
2982
+ anyOf:
2983
+ - "$ref": "#/definitions/CustomRequester"
2984
+ - "$ref": "#/definitions/HttpRequester"
2980
2985
  download_requester:
2981
2986
  description: Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.
2982
2987
  anyOf:
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
59
59
 
60
60
  def __init__(
61
61
  self,
62
- cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
62
+ date_time_based_cursor: DatetimeBasedCursor,
63
+ substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
63
64
  **kwargs: Any,
64
65
  ):
65
66
  super().__init__(**kwargs)
66
- self._cursor = cursor
67
+ self._date_time_based_cursor = date_time_based_cursor
68
+ self._substream_cursor = substream_cursor
67
69
 
68
70
  def filter_records(
69
71
  self,
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
75
77
  records = (
76
78
  record
77
79
  for record in records
78
- if self._cursor.should_be_synced(
80
+ if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
79
81
  # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
80
82
  # Record stream name is empty cause it is not used durig the filtering
81
83
  Record(data=record, associated_slice=stream_slice, stream_name="")
@@ -2,10 +2,6 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
6
- ConcurrentCursorFactory,
7
- ConcurrentPerPartitionCursor,
8
- )
9
5
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
10
6
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
11
7
  from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
@@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
25
21
 
26
22
  __all__ = [
27
23
  "CursorFactory",
28
- "ConcurrentCursorFactory",
29
- "ConcurrentPerPartitionCursor",
30
24
  "DatetimeBasedCursor",
31
25
  "DeclarativeCursor",
32
26
  "GlobalSubstreamCursor",
@@ -303,21 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
303
303
  raise ValueError("A partition needs to be provided in order to get request body json")
304
304
 
305
305
  def should_be_synced(self, record: Record) -> bool:
306
- if (
307
- record.associated_slice
308
- and self._to_partition_key(record.associated_slice.partition)
309
- not in self._cursor_per_partition
310
- ):
311
- partition_state = (
312
- self._state_to_migrate_from
313
- if self._state_to_migrate_from
314
- else self._NO_CURSOR_STATE
315
- )
316
- cursor = self._create_cursor(partition_state)
317
-
318
- self._cursor_per_partition[
319
- self._to_partition_key(record.associated_slice.partition)
320
- ] = cursor
321
306
  return self._get_cursor(record).should_be_synced(
322
307
  self._convert_record_to_cursor_record(record)
323
308
  )
@@ -737,33 +737,43 @@ class KeysToSnakeCase(BaseModel):
737
737
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
738
738
 
739
739
 
740
+ class FlattenFields(BaseModel):
741
+ type: Literal["FlattenFields"]
742
+ flatten_lists: Optional[bool] = Field(
743
+ True,
744
+ description="Whether to flatten lists or leave it as is. Default is True.",
745
+ title="Flatten Lists",
746
+ )
747
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
748
+
749
+
740
750
  class KeysReplace(BaseModel):
741
751
  type: Literal["KeysReplace"]
742
752
  old: str = Field(
743
753
  ...,
744
754
  description="Old value to replace.",
745
- examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
755
+ examples=[
756
+ " ",
757
+ "{{ record.id }}",
758
+ "{{ config['id'] }}",
759
+ "{{ stream_slice['id'] }}",
760
+ ],
746
761
  title="Old value",
747
762
  )
748
763
  new: str = Field(
749
764
  ...,
750
765
  description="New value to set.",
751
- examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
766
+ examples=[
767
+ "_",
768
+ "{{ record.id }}",
769
+ "{{ config['id'] }}",
770
+ "{{ stream_slice['id'] }}",
771
+ ],
752
772
  title="New value",
753
773
  )
754
774
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
755
775
 
756
776
 
757
- class FlattenFields(BaseModel):
758
- type: Literal["FlattenFields"]
759
- flatten_lists: Optional[bool] = Field(
760
- True,
761
- description="Whether to flatten lists or leave it as is. Default is True.",
762
- title="Flatten Lists",
763
- )
764
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
765
-
766
-
767
777
  class IterableDecoder(BaseModel):
768
778
  type: Literal["IterableDecoder"]
769
779
 
@@ -2040,6 +2050,10 @@ class AsyncRetriever(BaseModel):
2040
2050
  ...,
2041
2051
  description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
2042
2052
  )
2053
+ url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2054
+ None,
2055
+ description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
2056
+ )
2043
2057
  download_requester: Union[CustomRequester, HttpRequester] = Field(
2044
2058
  ...,
2045
2059
  description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",
@@ -84,8 +84,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
84
84
  )
85
85
  from airbyte_cdk.sources.declarative.incremental import (
86
86
  ChildPartitionResumableFullRefreshCursor,
87
- ConcurrentCursorFactory,
88
- ConcurrentPerPartitionCursor,
89
87
  CursorFactory,
90
88
  DatetimeBasedCursor,
91
89
  DeclarativeCursor,
@@ -440,7 +438,6 @@ from airbyte_cdk.sources.message import (
440
438
  InMemoryMessageRepository,
441
439
  LogAppenderMessageRepositoryDecorator,
442
440
  MessageRepository,
443
- NoopMessageRepository,
444
441
  )
445
442
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
446
443
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -874,8 +871,6 @@ class ModelToComponentFactory:
874
871
  stream_namespace: Optional[str],
875
872
  config: Config,
876
873
  stream_state: MutableMapping[str, Any],
877
- message_repository: Optional[MessageRepository] = None,
878
- runtime_lookback_window: Optional[int] = None,
879
874
  **kwargs: Any,
880
875
  ) -> ConcurrentCursor:
881
876
  component_type = component_definition.get("type")
@@ -933,11 +928,6 @@ class ModelToComponentFactory:
933
928
  if evaluated_lookback_window:
934
929
  lookback_window = parse_duration(evaluated_lookback_window)
935
930
 
936
- if runtime_lookback_window and lookback_window:
937
- lookback_window = max(lookback_window, runtime_lookback_window)
938
- elif runtime_lookback_window:
939
- lookback_window = runtime_lookback_window
940
-
941
931
  connector_state_converter: DateTimeStreamStateConverter
942
932
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
943
933
  datetime_format=datetime_format,
@@ -1016,7 +1006,7 @@ class ModelToComponentFactory:
1016
1006
  stream_name=stream_name,
1017
1007
  stream_namespace=stream_namespace,
1018
1008
  stream_state=stream_state,
1019
- message_repository=message_repository or self._message_repository,
1009
+ message_repository=self._message_repository,
1020
1010
  connector_state_manager=state_manager,
1021
1011
  connector_state_converter=connector_state_converter,
1022
1012
  cursor_field=cursor_field,
@@ -1028,63 +1018,6 @@ class ModelToComponentFactory:
1028
1018
  cursor_granularity=cursor_granularity,
1029
1019
  )
1030
1020
 
1031
- def create_concurrent_cursor_from_perpartition_cursor(
1032
- self,
1033
- state_manager: ConnectorStateManager,
1034
- model_type: Type[BaseModel],
1035
- component_definition: ComponentDefinition,
1036
- stream_name: str,
1037
- stream_namespace: Optional[str],
1038
- config: Config,
1039
- stream_state: MutableMapping[str, Any],
1040
- partition_router,
1041
- **kwargs: Any,
1042
- ) -> ConcurrentPerPartitionCursor:
1043
- component_type = component_definition.get("type")
1044
- if component_definition.get("type") != model_type.__name__:
1045
- raise ValueError(
1046
- f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1047
- )
1048
-
1049
- datetime_based_cursor_model = model_type.parse_obj(component_definition)
1050
-
1051
- if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1052
- raise ValueError(
1053
- f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1054
- )
1055
-
1056
- interpolated_cursor_field = InterpolatedString.create(
1057
- datetime_based_cursor_model.cursor_field,
1058
- parameters=datetime_based_cursor_model.parameters or {},
1059
- )
1060
- cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1061
-
1062
- # Create the cursor factory
1063
- cursor_factory = ConcurrentCursorFactory(
1064
- partial(
1065
- self.create_concurrent_cursor_from_datetime_based_cursor,
1066
- state_manager=state_manager,
1067
- model_type=model_type,
1068
- component_definition=component_definition,
1069
- stream_name=stream_name,
1070
- stream_namespace=stream_namespace,
1071
- config=config,
1072
- message_repository=NoopMessageRepository(),
1073
- )
1074
- )
1075
-
1076
- # Return the concurrent cursor and state converter
1077
- return ConcurrentPerPartitionCursor(
1078
- cursor_factory=cursor_factory,
1079
- partition_router=partition_router,
1080
- stream_name=stream_name,
1081
- stream_namespace=stream_namespace,
1082
- stream_state=stream_state,
1083
- message_repository=self._message_repository, # type: ignore
1084
- connector_state_manager=state_manager,
1085
- cursor_field=cursor_field,
1086
- )
1087
-
1088
1021
  @staticmethod
1089
1022
  def create_constant_backoff_strategy(
1090
1023
  model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1367,15 +1300,18 @@ class ModelToComponentFactory:
1367
1300
  raise ValueError(
1368
1301
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1369
1302
  )
1370
- cursor = (
1371
- combined_slicers
1372
- if isinstance(
1373
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1374
- )
1375
- else self._create_component_from_model(model=model.incremental_sync, config=config)
1376
- )
1377
-
1378
- client_side_incremental_sync = {"cursor": cursor}
1303
+ client_side_incremental_sync = {
1304
+ "date_time_based_cursor": self._create_component_from_model(
1305
+ model=model.incremental_sync, config=config
1306
+ ),
1307
+ "substream_cursor": (
1308
+ combined_slicers
1309
+ if isinstance(
1310
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1311
+ )
1312
+ else None
1313
+ ),
1314
+ }
1379
1315
 
1380
1316
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1381
1317
  cursor_model = model.incremental_sync
@@ -2191,7 +2127,7 @@ class ModelToComponentFactory:
2191
2127
  if (
2192
2128
  not isinstance(stream_slicer, DatetimeBasedCursor)
2193
2129
  or type(stream_slicer) is not DatetimeBasedCursor
2194
- ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2130
+ ):
2195
2131
  # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
2196
2132
  # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
2197
2133
  # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
@@ -2351,7 +2287,7 @@ class ModelToComponentFactory:
2351
2287
  extractor=download_extractor,
2352
2288
  name=name,
2353
2289
  record_filter=None,
2354
- transformations=[],
2290
+ transformations=transformations,
2355
2291
  schema_normalization=TypeTransformer(TransformConfig.NoTransform),
2356
2292
  config=config,
2357
2293
  parameters={},
@@ -2388,6 +2324,16 @@ class ModelToComponentFactory:
2388
2324
  if model.delete_requester
2389
2325
  else None
2390
2326
  )
2327
+ url_requester = (
2328
+ self._create_component_from_model(
2329
+ model=model.url_requester,
2330
+ decoder=decoder,
2331
+ config=config,
2332
+ name=f"job extract_url - {name}",
2333
+ )
2334
+ if model.url_requester
2335
+ else None
2336
+ )
2391
2337
  status_extractor = self._create_component_from_model(
2392
2338
  model=model.status_extractor, decoder=decoder, config=config, name=name
2393
2339
  )
@@ -2398,6 +2344,7 @@ class ModelToComponentFactory:
2398
2344
  creation_requester=creation_requester,
2399
2345
  polling_requester=polling_requester,
2400
2346
  download_retriever=download_retriever,
2347
+ url_requester=url_requester,
2401
2348
  abort_requester=abort_requester,
2402
2349
  delete_requester=delete_requester,
2403
2350
  status_extractor=status_extractor,
@@ -0,0 +1,57 @@
1
+ # AsyncHttpJobRepository sequence diagram
2
+
3
+ - Components marked as optional are not required and can be ignored.
4
+ - if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
5
+ - interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
6
+
7
+
8
+ ```mermaid
9
+ ---
10
+ title: AsyncHttpJobRepository Sequence Diagram
11
+ ---
12
+ sequenceDiagram
13
+ participant AsyncHttpJobRepository as AsyncOrchestrator
14
+ participant CreationRequester as creation_requester
15
+ participant PollingRequester as polling_requester
16
+ participant UrlRequester as url_requester (Optional)
17
+ participant DownloadRetriever as download_retriever
18
+ participant AbortRequester as abort_requester (Optional)
19
+ participant DeleteRequester as delete_requester (Optional)
20
+ participant Reporting Server as Async Reporting Server
21
+
22
+ AsyncHttpJobRepository ->> CreationRequester: Initiate job creation
23
+ CreationRequester ->> Reporting Server: Create job request
24
+ Reporting Server -->> CreationRequester: Job ID response
25
+ CreationRequester -->> AsyncHttpJobRepository: Job ID
26
+
27
+ loop Poll for job status
28
+ AsyncHttpJobRepository ->> PollingRequester: Check job status
29
+ PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
30
+ Reporting Server -->> PollingRequester: Status response
31
+ PollingRequester -->> AsyncHttpJobRepository: Job status
32
+ end
33
+
34
+ alt Status: Ready
35
+ AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
36
+ UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
37
+ Reporting Server -->> UrlRequester: Download URLs
38
+ UrlRequester -->> AsyncHttpJobRepository: Download URLs
39
+
40
+ AsyncHttpJobRepository ->> DownloadRetriever: Download reports
41
+ DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `url`)
42
+ Reporting Server -->> DownloadRetriever: Report data
43
+ DownloadRetriever -->> AsyncHttpJobRepository: Report data
44
+ else Status: Failed
45
+ AsyncHttpJobRepository ->> AbortRequester: Send abort request
46
+ AbortRequester ->> Reporting Server: Abort job
47
+ Reporting Server -->> AbortRequester: Abort confirmation
48
+ AbortRequester -->> AsyncHttpJobRepository: Confirmation
49
+ end
50
+
51
+ AsyncHttpJobRepository ->> DeleteRequester: Send delete job request
52
+ DeleteRequester ->> Reporting Server: Delete job
53
+ Reporting Server -->> DeleteRequester: Deletion confirmation
54
+ DeleteRequester -->> AsyncHttpJobRepository: Confirmation
55
+
56
+
57
+ ```
@@ -31,6 +31,10 @@ LOGGER = logging.getLogger("airbyte")
31
31
 
32
32
  @dataclass
33
33
  class AsyncHttpJobRepository(AsyncJobRepository):
34
+ """
35
+ See Readme file for more details about flow.
36
+ """
37
+
34
38
  creation_requester: Requester
35
39
  polling_requester: Requester
36
40
  download_retriever: SimpleRetriever
@@ -44,6 +48,9 @@ class AsyncHttpJobRepository(AsyncJobRepository):
44
48
  record_extractor: RecordExtractor = field(
45
49
  init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
46
50
  )
51
+ url_requester: Optional[Requester] = (
52
+ None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
53
+ )
47
54
 
48
55
  def __post_init__(self) -> None:
49
56
  self._create_job_response_by_id: Dict[str, Response] = {}
@@ -186,10 +193,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
186
193
 
187
194
  """
188
195
 
189
- for url in self.urls_extractor.extract_records(
190
- self._polling_job_response_by_id[job.api_job_id()]
191
- ):
192
- stream_slice: StreamSlice = StreamSlice(partition={"url": url}, cursor_slice={})
196
+ for url in self._get_download_url(job):
197
+ job_slice = job.job_parameters()
198
+ stream_slice = StreamSlice(
199
+ partition=job_slice.partition,
200
+ cursor_slice=job_slice.cursor_slice,
201
+ extra_fields={**job_slice.extra_fields, "url": url},
202
+ )
193
203
  for message in self.download_retriever.read_records({}, stream_slice):
194
204
  if isinstance(message, Record):
195
205
  yield message.data
@@ -226,3 +236,22 @@ class AsyncHttpJobRepository(AsyncJobRepository):
226
236
  cursor_slice={},
227
237
  )
228
238
  return stream_slice
239
+
240
+ def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
241
+ if not self.url_requester:
242
+ url_response = self._polling_job_response_by_id[job.api_job_id()]
243
+ else:
244
+ stream_slice: StreamSlice = StreamSlice(
245
+ partition={
246
+ "polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
247
+ },
248
+ cursor_slice={},
249
+ )
250
+ url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
251
+ if not url_response:
252
+ raise AirbyteTracedException(
253
+ internal_message="Always expect a response or an exception from url_requester",
254
+ failure_type=FailureType.system_error,
255
+ )
256
+
257
+ yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
160
160
  stream_slice,
161
161
  next_page_token,
162
162
  self._paginator.get_request_headers,
163
- self.request_option_provider.get_request_headers,
163
+ self.stream_slicer.get_request_headers,
164
164
  )
165
165
  if isinstance(headers, str):
166
166
  raise ValueError("Request headers cannot be a string")
@@ -196,9 +196,7 @@ class ConcurrentCursor(Cursor):
196
196
 
197
197
  @property
198
198
  def state(self) -> MutableMapping[str, Any]:
199
- return self._connector_state_converter.convert_to_state_message(
200
- self.cursor_field, self._concurrent_state
201
- )
199
+ return self._concurrent_state
202
200
 
203
201
  @property
204
202
  def cursor_field(self) -> CursorField:
@@ -243,10 +241,10 @@ class ConcurrentCursor(Cursor):
243
241
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
244
242
 
245
243
  def close_partition(self, partition: Partition) -> None:
246
- slice_count_before = len(self._concurrent_state.get("slices", []))
244
+ slice_count_before = len(self.state.get("slices", []))
247
245
  self._add_slice_to_state(partition)
248
246
  if slice_count_before < len(
249
- self._concurrent_state["slices"]
247
+ self.state["slices"]
250
248
  ): # only emit if at least one slice has been processed
251
249
  self._merge_partitions()
252
250
  self._emit_state_message()
@@ -258,11 +256,11 @@ class ConcurrentCursor(Cursor):
258
256
  )
259
257
 
260
258
  if self._slice_boundary_fields:
261
- if "slices" not in self._concurrent_state:
259
+ if "slices" not in self.state:
262
260
  raise RuntimeError(
263
261
  f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
264
262
  )
265
- self._concurrent_state["slices"].append(
263
+ self.state["slices"].append(
266
264
  {
267
265
  self._connector_state_converter.START_KEY: self._extract_from_slice(
268
266
  partition, self._slice_boundary_fields[self._START_BOUNDARY]
@@ -290,7 +288,7 @@ class ConcurrentCursor(Cursor):
290
288
  "expected. Please contact the Airbyte team."
291
289
  )
292
290
 
293
- self._concurrent_state["slices"].append(
291
+ self.state["slices"].append(
294
292
  {
295
293
  self._connector_state_converter.START_KEY: self.start,
296
294
  self._connector_state_converter.END_KEY: most_recent_cursor_value,
@@ -302,7 +300,9 @@ class ConcurrentCursor(Cursor):
302
300
  self._connector_state_manager.update_state_for_stream(
303
301
  self._stream_name,
304
302
  self._stream_namespace,
305
- self.state,
303
+ self._connector_state_converter.convert_to_state_message(
304
+ self._cursor_field, self.state
305
+ ),
306
306
  )
307
307
  state_message = self._connector_state_manager.create_state_message(
308
308
  self._stream_name, self._stream_namespace
@@ -310,9 +310,7 @@ class ConcurrentCursor(Cursor):
310
310
  self._message_repository.emit_message(state_message)
311
311
 
312
312
  def _merge_partitions(self) -> None:
313
- self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
314
- self._concurrent_state["slices"]
315
- )
313
+ self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
316
314
 
317
315
  def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
318
316
  try:
@@ -349,42 +347,36 @@ class ConcurrentCursor(Cursor):
349
347
  if self._start is not None and self._is_start_before_first_slice():
350
348
  yield from self._split_per_slice_range(
351
349
  self._start,
352
- self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
350
+ self.state["slices"][0][self._connector_state_converter.START_KEY],
353
351
  False,
354
352
  )
355
353
 
356
- if len(self._concurrent_state["slices"]) == 1:
354
+ if len(self.state["slices"]) == 1:
357
355
  yield from self._split_per_slice_range(
358
356
  self._calculate_lower_boundary_of_last_slice(
359
- self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
357
+ self.state["slices"][0][self._connector_state_converter.END_KEY]
360
358
  ),
361
359
  self._end_provider(),
362
360
  True,
363
361
  )
364
- elif len(self._concurrent_state["slices"]) > 1:
365
- for i in range(len(self._concurrent_state["slices"]) - 1):
362
+ elif len(self.state["slices"]) > 1:
363
+ for i in range(len(self.state["slices"]) - 1):
366
364
  if self._cursor_granularity:
367
365
  yield from self._split_per_slice_range(
368
- self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
366
+ self.state["slices"][i][self._connector_state_converter.END_KEY]
369
367
  + self._cursor_granularity,
370
- self._concurrent_state["slices"][i + 1][
371
- self._connector_state_converter.START_KEY
372
- ],
368
+ self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
373
369
  False,
374
370
  )
375
371
  else:
376
372
  yield from self._split_per_slice_range(
377
- self._concurrent_state["slices"][i][
378
- self._connector_state_converter.END_KEY
379
- ],
380
- self._concurrent_state["slices"][i + 1][
381
- self._connector_state_converter.START_KEY
382
- ],
373
+ self.state["slices"][i][self._connector_state_converter.END_KEY],
374
+ self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
383
375
  False,
384
376
  )
385
377
  yield from self._split_per_slice_range(
386
378
  self._calculate_lower_boundary_of_last_slice(
387
- self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
379
+ self.state["slices"][-1][self._connector_state_converter.END_KEY]
388
380
  ),
389
381
  self._end_provider(),
390
382
  True,
@@ -395,8 +387,7 @@ class ConcurrentCursor(Cursor):
395
387
  def _is_start_before_first_slice(self) -> bool:
396
388
  return (
397
389
  self._start is not None
398
- and self._start
399
- < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
390
+ and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
400
391
  )
401
392
 
402
393
  def _calculate_lower_boundary_of_last_slice(
@@ -152,3 +152,6 @@ class StreamSlice(Mapping[str, Any]):
152
152
 
153
153
  def __hash__(self) -> int:
154
154
  return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
155
+
156
+ def __bool__(self) -> bool:
157
+ return bool(self._stream_slice) or bool(self._extra_fields)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.17.1.dev1
3
+ Version: 6.18.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=VfDvff6ionjGScMbEpMGlZ0TfOyIQpMUZiuV6pkI9Os,26557
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyMCu1qoGsne1Ooz3c1da-8EDZk6Suiy2gIq9Q,22475
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=GfZlk9EvYQiWDx3AipNLf1us1e986q2mgqcbHbeZU0k,133172
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=2t3_QVXWOImPcH-apR_Xd8qNl6K_URFwBbQ47YHcjXg,133490
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
@@ -81,16 +81,15 @@ airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrq
81
81
  airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
82
82
  airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
83
83
  airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
84
- airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
84
+ airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzxYOeIrDy1GINb1zH9MBy6suC5tm2LSk,3545
85
85
  airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD-TGCScXvW95ThNKyPGcx2SiWbG1-H-sc,6552
86
86
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
87
87
  airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
88
- airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
89
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=vU6bcVgjDFou7szl5UKxv2-theKSsV78oSME84-C78A,15043
88
+ airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=huRz3KQJSUFmJCg5GPE9TckEBsB5TMsCa_THhJAhPVI,1037
90
89
  airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
91
90
  airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
92
91
  airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
93
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=_FSJjAwL4Zu-i2CngnhTtx8j-NPVSBKj5LwDSPta3Cg,16305
92
+ airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
94
93
  airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py,sha256=2YBOA2NnwAeIKlIhSwUB_W-FaGnPcmrG_liY7b4mV2Y,8365
95
94
  airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py,sha256=10LFv1QPM-agVKl6eaANmEBOfd7gZgBrkoTcMggsieQ,4809
96
95
  airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=tjUJkn3B-iZ-p7RP2c3dVZejrGiQeooGmS5ibWTuUL4,437
@@ -107,12 +106,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
107
106
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
108
107
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
109
108
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
110
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=IZFT1m4d-zp5hQ0ayU06Vdxm6r3MEq-X2sOCo9SuG-k,93270
109
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=3xWpeDNDGOw_I2pQ1LDiUhNBEWEvNAtd-HCi_1aklSQ,93666
111
110
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
112
111
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
113
112
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
114
113
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
115
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lgFqJ8DP-cRizmvFKRd4Oy_ebgoT_AceMKIpuqoFm3c,112097
114
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=dpRWxZaPghPcE5vGkI4swKDaXyLWLMAbvDoazuNSobU,109709
116
115
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
117
116
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
118
117
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -120,6 +119,7 @@ airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha25
120
119
  airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
121
120
  airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
122
121
  airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=5bgXoJfBg_6i53krQMptAGb50XB5XoVfqQxKQhlLtBA,15383
122
+ airbyte_cdk/sources/declarative/requesters/README.md,sha256=WabtHlwHg_J34aL1Kwm8vboYqBaSgsFjq10qR-P2sx8,2658
123
123
  airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
124
124
  airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
125
125
  airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py,sha256=1WZdpFmWL6W_Dko0qjflTaKIWeqt8jHT-D6HcujIp3s,884
@@ -134,7 +134,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.
134
134
  airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
135
135
  airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
136
136
  airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=vhWsEKNTYEzZ4gerhHqnDNKu4wGIP485NAzpSQ5DRZg,7941
137
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
137
+ airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=3GtOefPH08evlSUxaILkiKLTHbIspFY4qd5B3ZqNE60,10063
138
138
  airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
139
139
  airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
140
140
  airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
@@ -163,7 +163,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
163
163
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
164
164
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
165
165
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
166
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
166
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
167
167
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
168
168
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
169
169
  airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
@@ -257,7 +257,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIM
257
257
  airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
258
258
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=QP_64kQo-b3sRNHZA5aqrgCJqAhIVegRM3vJ8jGyuSY,15213
259
259
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
260
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=suObbNi24so8Wcj0Wm32OkJAcuvODAOwp373YBmUPp0,21213
260
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Hke6CpD8Sq1FS4g1Xuht39UN7hKkGy1mvOxvQrm1lLM,20810
261
261
  airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
262
262
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
263
263
  airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
@@ -293,7 +293,7 @@ airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py,sha256=Y
293
293
  airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=ka-bBRWvIv09LmZNYl49p2lK9nd_Tvi2g0lIp3OkU40,14872
294
294
  airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
295
295
  airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
296
- airbyte_cdk/sources/types.py,sha256=WWVigI7ZSoQU2TBCzDsHJtoX4Ima9v--lcLyYwUG_cE,4904
296
+ airbyte_cdk/sources/types.py,sha256=nLPkTpyfGV4E6e99qcBWX4r8C3fE4I8Fvgx2EjvT9ic,5005
297
297
  airbyte_cdk/sources/utils/__init__.py,sha256=TTN6VUxVy6Is8BhYQZR5pxJGQh8yH4duXh4O1TiMiEY,118
298
298
  airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABLYYRSXA,256
299
299
  airbyte_cdk/sources/utils/record_helper.py,sha256=jeB0mucudzna7Zvj-pCBbwFrbLJ36SlAWZTh5O4Fb9Y,2168
@@ -343,8 +343,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
343
343
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
344
344
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
345
345
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
346
- airbyte_cdk-6.17.1.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
347
- airbyte_cdk-6.17.1.dev1.dist-info/METADATA,sha256=8TVLQbLq6-v0qkRHb8X4P9x2sYTe9EUjwdvMb2NVOpA,6005
348
- airbyte_cdk-6.17.1.dev1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
349
- airbyte_cdk-6.17.1.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
350
- airbyte_cdk-6.17.1.dev1.dist-info/RECORD,,
346
+ airbyte_cdk-6.18.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
347
+ airbyte_cdk-6.18.0.dist-info/METADATA,sha256=RvVkgbg-LBbS5eGTntO-mp34yRIDMuPYZ26VRmSkhCA,6000
348
+ airbyte_cdk-6.18.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
349
+ airbyte_cdk-6.18.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
350
+ airbyte_cdk-6.18.0.dist-info/RECORD,,
@@ -1,340 +0,0 @@
1
- import copy
2
- import logging
3
-
4
- #
5
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
6
- #
7
- import threading
8
- from collections import OrderedDict
9
- from copy import deepcopy
10
- from datetime import timedelta
11
- from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
12
-
13
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
- from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
15
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
16
- Timer,
17
- iterate_with_last_flag_and_state,
18
- )
19
- from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
20
- from airbyte_cdk.sources.message import MessageRepository
21
- from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
22
- PerPartitionKeySerializer,
23
- )
24
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, CursorField
25
- from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
26
- from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
27
-
28
- logger = logging.getLogger("airbyte")
29
-
30
-
31
- class ConcurrentCursorFactory:
32
- def __init__(self, create_function: Callable[..., Cursor]):
33
- self._create_function = create_function
34
-
35
- def create(self, stream_state: Mapping[str, Any], runtime_lookback_window: Any) -> Cursor:
36
- return self._create_function(
37
- stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
38
- )
39
-
40
-
41
- class ConcurrentPerPartitionCursor(Cursor):
42
- """
43
- Manages state per partition when a stream has many partitions, to prevent data loss or duplication.
44
-
45
- **Partition Limitation and Limit Reached Logic**
46
-
47
- - **DEFAULT_MAX_PARTITIONS_NUMBER**: The maximum number of partitions to keep in memory (default is 10,000).
48
- - **_cursor_per_partition**: An ordered dictionary that stores cursors for each partition.
49
- - **_over_limit**: A counter that increments each time an oldest partition is removed when the limit is exceeded.
50
-
51
- The class ensures that the number of partitions tracked does not exceed the `DEFAULT_MAX_PARTITIONS_NUMBER` to prevent excessive memory usage.
52
-
53
- - When the number of partitions exceeds the limit, the oldest partitions are removed from `_cursor_per_partition`, and `_over_limit` is incremented accordingly.
54
- - The `limit_reached` method returns `True` when `_over_limit` exceeds `DEFAULT_MAX_PARTITIONS_NUMBER`, indicating that the global cursor should be used instead of per-partition cursors.
55
-
56
- This approach avoids unnecessary switching to a global cursor due to temporary spikes in partition counts, ensuring that switching is only done when a sustained high number of partitions is observed.
57
- """
58
-
59
- DEFAULT_MAX_PARTITIONS_NUMBER = 10000
60
- _NO_STATE: Mapping[str, Any] = {}
61
- _NO_CURSOR_STATE: Mapping[str, Any] = {}
62
- _KEY = 0
63
- _VALUE = 1
64
-
65
- def __init__(
66
- self,
67
- cursor_factory: ConcurrentCursorFactory,
68
- partition_router: PartitionRouter,
69
- stream_name: str,
70
- stream_namespace: Optional[str],
71
- stream_state: Any,
72
- message_repository: MessageRepository,
73
- connector_state_manager: ConnectorStateManager,
74
- cursor_field: CursorField,
75
- ) -> None:
76
- self._global_cursor: Mapping[str, Any] = {}
77
- self._stream_name = stream_name
78
- self._stream_namespace = stream_namespace
79
- self._message_repository = message_repository
80
- self._connector_state_manager = connector_state_manager
81
- self._cursor_field = cursor_field
82
-
83
- self._cursor_factory = cursor_factory
84
- self._partition_router = partition_router
85
-
86
- # The dict is ordered to ensure that once the maximum number of partitions is reached,
87
- # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
88
- self._cursor_per_partition: OrderedDict[str, Cursor] = OrderedDict()
89
- self._state = {"states": []}
90
- self._semaphore_per_partition = OrderedDict()
91
- self._finished_partitions = set()
92
- self._lock = threading.Lock()
93
- self._timer = Timer()
94
- self._new_global_cursor = None
95
- self._lookback_window = 0
96
- self._parent_state = None
97
- self._over_limit = 0
98
- self._partition_serializer = PerPartitionKeySerializer()
99
-
100
- self._set_initial_state(stream_state)
101
-
102
- @property
103
- def cursor_field(self) -> CursorField:
104
- return self._cursor_field
105
-
106
- @property
107
- def state(self) -> MutableMapping[str, Any]:
108
- states = []
109
- for partition_tuple, cursor in self._cursor_per_partition.items():
110
- if cursor.state:
111
- states.append(
112
- {
113
- "partition": self._to_dict(partition_tuple),
114
- "cursor": copy.deepcopy(cursor.state),
115
- }
116
- )
117
- state: dict[str, Any] = {"states": states}
118
-
119
- if self._global_cursor:
120
- state["state"] = self._global_cursor
121
- if self._lookback_window is not None:
122
- state["lookback_window"] = self._lookback_window
123
- if self._parent_state is not None:
124
- state["parent_state"] = self._parent_state
125
- return state
126
-
127
- def close_partition(self, partition: Partition) -> None:
128
- self._cursor_per_partition[
129
- self._to_partition_key(partition._stream_slice.partition)
130
- ].close_partition(partition=partition)
131
- with self._lock:
132
- self._semaphore_per_partition[
133
- self._to_partition_key(partition._stream_slice.partition)
134
- ].acquire()
135
- cursor = self._cursor_per_partition[
136
- self._to_partition_key(partition._stream_slice.partition)
137
- ]
138
- if (
139
- self._to_partition_key(partition._stream_slice.partition)
140
- in self._finished_partitions
141
- and self._semaphore_per_partition[
142
- self._to_partition_key(partition._stream_slice.partition)
143
- ]._value
144
- == 0
145
- ):
146
- if (
147
- self._new_global_cursor is None
148
- or self._new_global_cursor[self.cursor_field.cursor_field_key]
149
- < cursor.state[self.cursor_field.cursor_field_key]
150
- ):
151
- self._new_global_cursor = copy.deepcopy(cursor.state)
152
-
153
- def ensure_at_least_one_state_emitted(self) -> None:
154
- """
155
- The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
156
- called.
157
- """
158
- if not any(
159
- semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
160
- ):
161
- self._global_cursor = self._new_global_cursor
162
- self._lookback_window = self._timer.finish()
163
- self._parent_state = self._partition_router.get_stream_state()
164
- self._emit_state_message()
165
-
166
- def _emit_state_message(self) -> None:
167
- self._connector_state_manager.update_state_for_stream(
168
- self._stream_name,
169
- self._stream_namespace,
170
- self.state,
171
- )
172
- state_message = self._connector_state_manager.create_state_message(
173
- self._stream_name, self._stream_namespace
174
- )
175
- self._message_repository.emit_message(state_message)
176
-
177
- def stream_slices(self) -> Iterable[StreamSlice]:
178
- slices = self._partition_router.stream_slices()
179
- self._timer.start()
180
- for partition in slices:
181
- yield from self.generate_slices_from_partition(partition)
182
-
183
- def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
184
- # Ensure the maximum number of partitions is not exceeded
185
- self._ensure_partition_limit()
186
-
187
- cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
188
- if not cursor:
189
- partition_state = self._global_cursor if self._global_cursor else self._NO_CURSOR_STATE
190
- cursor = self._create_cursor(partition_state)
191
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
192
- self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
193
- threading.Semaphore(0)
194
- )
195
-
196
- for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
197
- cursor.stream_slices(),
198
- lambda: None,
199
- ):
200
- self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
201
- if is_last_slice:
202
- self._finished_partitions.add(self._to_partition_key(partition.partition))
203
- yield StreamSlice(
204
- partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
205
- )
206
-
207
- def _ensure_partition_limit(self) -> None:
208
- """
209
- Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
210
- """
211
- while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
212
- self._over_limit += 1
213
- oldest_partition = self._cursor_per_partition.popitem(last=False)[
214
- 0
215
- ] # Remove the oldest partition
216
- logger.warning(
217
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
218
- )
219
-
220
- def limit_reached(self) -> bool:
221
- return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
222
-
223
- def _set_initial_state(self, stream_state: StreamState) -> None:
224
- """
225
- Set the initial state for the cursors.
226
-
227
- This method initializes the state for each partition cursor using the provided stream state.
228
- If a partition state is provided in the stream state, it will update the corresponding partition cursor with this state.
229
-
230
- Additionally, it sets the parent state for partition routers that are based on parent streams. If a partition router
231
- does not have parent streams, this step will be skipped due to the default PartitionRouter implementation.
232
-
233
- Args:
234
- stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
235
- {
236
- "states": [
237
- {
238
- "partition": {
239
- "partition_key": "value"
240
- },
241
- "cursor": {
242
- "last_updated": "2023-05-27T00:00:00Z"
243
- }
244
- }
245
- ],
246
- "parent_state": {
247
- "parent_stream_name": {
248
- "last_updated": "2023-05-27T00:00:00Z"
249
- }
250
- }
251
- }
252
- """
253
- if not stream_state:
254
- return
255
-
256
- if "states" not in stream_state:
257
- # We assume that `stream_state` is in a global format that can be applied to all partitions.
258
- # Example: {"global_state_format_key": "global_state_format_value"}
259
- self._global_cursor = deepcopy(stream_state)
260
- self._new_global_cursor = deepcopy(stream_state)
261
-
262
- else:
263
- self._lookback_window = stream_state.get("lookback_window")
264
-
265
- for state in stream_state["states"]:
266
- self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
267
- self._create_cursor(
268
- state["cursor"], runtime_lookback_window=self._lookback_window
269
- )
270
- )
271
- self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
272
- threading.Semaphore(0)
273
- )
274
-
275
- # set default state for missing partitions if it is per partition with fallback to global
276
- if "state" in stream_state:
277
- self._global_cursor = deepcopy(stream_state["state"])
278
- self._new_global_cursor = deepcopy(stream_state["state"])
279
-
280
- # Set parent state for partition routers based on parent streams
281
- self._partition_router.set_initial_state(stream_state)
282
-
283
- def observe(self, record: Record) -> None:
284
- self._cursor_per_partition[
285
- self._to_partition_key(record.associated_slice.partition)
286
- ].observe(record)
287
-
288
- def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
289
- return self._partition_serializer.to_partition_key(partition)
290
-
291
- def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
292
- return self._partition_serializer.to_partition(partition_key)
293
-
294
- def _create_cursor(self, cursor_state: Any, runtime_lookback_window: Any = None) -> Cursor:
295
- if runtime_lookback_window:
296
- runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
297
- cursor = self._cursor_factory.create(
298
- stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
299
- )
300
- return cursor
301
-
302
- def should_be_synced(self, record: Record) -> bool:
303
- return self._get_cursor(record).should_be_synced(record)
304
-
305
- def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
306
- if not first.associated_slice or not second.associated_slice:
307
- raise ValueError(
308
- f"Both records should have an associated slice but got {first.associated_slice} and {second.associated_slice}"
309
- )
310
- if first.associated_slice.partition != second.associated_slice.partition:
311
- raise ValueError(
312
- f"To compare records, partition should be the same but got {first.associated_slice.partition} and {second.associated_slice.partition}"
313
- )
314
-
315
- return self._get_cursor(first).is_greater_than_or_equal(
316
- self._convert_record_to_cursor_record(first),
317
- self._convert_record_to_cursor_record(second),
318
- )
319
-
320
- @staticmethod
321
- def _convert_record_to_cursor_record(record: Record) -> Record:
322
- return Record(
323
- record.data,
324
- StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
325
- if record.associated_slice
326
- else None,
327
- )
328
-
329
- def _get_cursor(self, record: Record) -> Cursor:
330
- if not record.associated_slice:
331
- raise ValueError(
332
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
333
- )
334
- partition_key = self._to_partition_key(record.associated_slice.partition)
335
- if partition_key not in self._cursor_per_partition:
336
- raise ValueError(
337
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
338
- )
339
- cursor = self._cursor_per_partition[partition_key]
340
- return cursor