airbyte-cdk 7.3.1__py3-none-any.whl → 7.3.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2279,12 +2279,14 @@ definitions:
2279
2279
  - FAIL
2280
2280
  - RETRY
2281
2281
  - IGNORE
2282
+ - RESET_PAGINATION
2282
2283
  - RATE_LIMITED
2283
2284
  examples:
2284
2285
  - SUCCESS
2285
2286
  - FAIL
2286
2287
  - RETRY
2287
2288
  - IGNORE
2289
+ - RESET_PAGINATION
2288
2290
  - RATE_LIMITED
2289
2291
  failure_type:
2290
2292
  title: Failure Type
@@ -3707,6 +3709,9 @@ definitions:
3707
3709
  anyOf:
3708
3710
  - "$ref": "#/definitions/DefaultPaginator"
3709
3711
  - "$ref": "#/definitions/NoPagination"
3712
+ pagination_reset:
3713
+ description: Describes what triggers pagination reset and how to handle it.
3714
+ "$ref": "#/definitions/PaginationReset"
3710
3715
  ignore_stream_slicer_parameters_on_paginated_requests:
3711
3716
  description: If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.
3712
3717
  type: boolean
@@ -3730,6 +3735,36 @@ definitions:
3730
3735
  $parameters:
3731
3736
  type: object
3732
3737
  additionalProperties: true
3738
+ PaginationReset:
3739
+ title: Pagination Reset
3740
+ description: Describes what triggers pagination reset and how to handle it. If SPLIT_USING_CURSOR, the connector developer is accountable for ensuring that the records are returned in ascending order.
3741
+ type: object
3742
+ required:
3743
+ - type
3744
+ - action
3745
+ properties:
3746
+ type:
3747
+ type: string
3748
+ enum: [ PaginationReset ]
3749
+ action:
3750
+ type: string
3751
+ enum:
3752
+ - SPLIT_USING_CURSOR
3753
+ - RESET
3754
+ limits:
3755
+ "$ref": "#/definitions/PaginationResetLimits"
3756
+ PaginationResetLimits:
3757
+ title: Pagination Reset Limits
3758
+ description: Describes the limits that trigger pagination reset
3759
+ type: object
3760
+ required:
3761
+ - type
3762
+ properties:
3763
+ type:
3764
+ type: string
3765
+ enum: [ PaginationResetLimits ]
3766
+ number_of_records:
3767
+ type: integer
3733
3768
  GzipDecoder:
3734
3769
  title: gzip
3735
3770
  description: Select 'gzip' for response data that is compressed with gzip. Requires specifying an inner data type/decoder to parse the decompressed data.
@@ -151,7 +151,7 @@ class ConcurrentPerPartitionCursor(Cursor):
151
151
  self._connector_state_converter = connector_state_converter
152
152
  self._cursor_field = cursor_field
153
153
 
154
- self._cursor_factory = cursor_factory
154
+ self._cursor_factory = cursor_factory # self._cursor_factory is flagged as private but is used in model_to_component_factory to ease pagination reset instantiation
155
155
  self._partition_router = partition_router
156
156
 
157
157
  # The dict is ordered to ensure that once the maximum number of partitions is reached,
@@ -539,6 +539,7 @@ class Action(Enum):
539
539
  FAIL = "FAIL"
540
540
  RETRY = "RETRY"
541
541
  IGNORE = "IGNORE"
542
+ RESET_PAGINATION = "RESET_PAGINATION"
542
543
  RATE_LIMITED = "RATE_LIMITED"
543
544
 
544
545
 
@@ -553,7 +554,14 @@ class HttpResponseFilter(BaseModel):
553
554
  action: Optional[Action] = Field(
554
555
  None,
555
556
  description="Action to execute if a response matches the filter.",
556
- examples=["SUCCESS", "FAIL", "RETRY", "IGNORE", "RATE_LIMITED"],
557
+ examples=[
558
+ "SUCCESS",
559
+ "FAIL",
560
+ "RETRY",
561
+ "IGNORE",
562
+ "RESET_PAGINATION",
563
+ "RATE_LIMITED",
564
+ ],
557
565
  title="Action",
558
566
  )
559
567
  failure_type: Optional[FailureType] = Field(
@@ -1173,6 +1181,16 @@ class LegacySessionTokenAuthenticator(BaseModel):
1173
1181
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1174
1182
 
1175
1183
 
1184
+ class Action1(Enum):
1185
+ SPLIT_USING_CURSOR = "SPLIT_USING_CURSOR"
1186
+ RESET = "RESET"
1187
+
1188
+
1189
+ class PaginationResetLimits(BaseModel):
1190
+ type: Literal["PaginationResetLimits"]
1191
+ number_of_records: Optional[int] = None
1192
+
1193
+
1176
1194
  class CsvDecoder(BaseModel):
1177
1195
  type: Literal["CsvDecoder"]
1178
1196
  encoding: Optional[str] = "utf-8"
@@ -2054,6 +2072,12 @@ class RecordSelector(BaseModel):
2054
2072
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2055
2073
 
2056
2074
 
2075
+ class PaginationReset(BaseModel):
2076
+ type: Literal["PaginationReset"]
2077
+ action: Action1
2078
+ limits: Optional[PaginationResetLimits] = None
2079
+
2080
+
2057
2081
  class GzipDecoder(BaseModel):
2058
2082
  type: Literal["GzipDecoder"]
2059
2083
  decoder: Union[CsvDecoder, GzipDecoder, JsonDecoder, JsonlDecoder]
@@ -2822,6 +2846,10 @@ class SimpleRetriever(BaseModel):
2822
2846
  None,
2823
2847
  description="Paginator component that describes how to navigate through the API's pages.",
2824
2848
  )
2849
+ pagination_reset: Optional[PaginationReset] = Field(
2850
+ None,
2851
+ description="Describes what triggers pagination reset and how to handle it.",
2852
+ )
2825
2853
  ignore_stream_slicer_parameters_on_paginated_requests: Optional[bool] = Field(
2826
2854
  False,
2827
2855
  description="If true, the partition router and incremental request options will be ignored when paginating requests. Request options set directly on the requester will not be ignored.",
@@ -116,6 +116,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
116
116
  )
117
117
  from airbyte_cdk.sources.declarative.models import (
118
118
  CustomStateMigration,
119
+ PaginationResetLimits,
119
120
  )
120
121
  from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import (
121
122
  DEPRECATION_LOGS_TAG,
@@ -358,6 +359,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
358
359
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
359
360
  PageIncrement as PageIncrementModel,
360
361
  )
362
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
363
+ PaginationReset as PaginationResetModel,
364
+ )
361
365
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
362
366
  ParametrizedComponentsResolver as ParametrizedComponentsResolverModel,
363
367
  )
@@ -529,6 +533,7 @@ from airbyte_cdk.sources.declarative.retrievers.file_uploader import (
529
533
  LocalFileSystemFileWriter,
530
534
  NoopFileWriter,
531
535
  )
536
+ from airbyte_cdk.sources.declarative.retrievers.pagination_tracker import PaginationTracker
532
537
  from airbyte_cdk.sources.declarative.schema import (
533
538
  ComplexFieldType,
534
539
  DefaultSchemaLoader,
@@ -644,6 +649,8 @@ _NO_STREAM_SLICING = SinglePartitionRouter(parameters={})
644
649
  # this would be a circular import
645
650
  MAX_SLICES = 5
646
651
 
652
+ LOGGER = logging.getLogger(f"airbyte.model_to_component_factory")
653
+
647
654
 
648
655
  class ModelToComponentFactory:
649
656
  EPOCH_DATETIME_FORMAT = "%s"
@@ -2043,6 +2050,7 @@ class ModelToComponentFactory:
2043
2050
  if isinstance(concurrent_cursor, FinalStateCursor)
2044
2051
  else concurrent_cursor
2045
2052
  )
2053
+
2046
2054
  retriever = self._create_component_from_model(
2047
2055
  model=model.retriever,
2048
2056
  config=config,
@@ -2051,12 +2059,9 @@ class ModelToComponentFactory:
2051
2059
  request_options_provider=request_options_provider,
2052
2060
  stream_slicer=stream_slicer,
2053
2061
  partition_router=partition_router,
2054
- stop_condition_cursor=concurrent_cursor
2055
- if self._is_stop_condition_on_cursor(model)
2056
- else None,
2057
- client_side_incremental_sync={"cursor": concurrent_cursor}
2058
- if self._is_client_side_filtering_enabled(model)
2059
- else None,
2062
+ has_stop_condition_cursor=self._is_stop_condition_on_cursor(model),
2063
+ is_client_side_incremental_sync=self._is_client_side_filtering_enabled(model),
2064
+ cursor=concurrent_cursor,
2060
2065
  transformations=transformations,
2061
2066
  file_uploader=file_uploader,
2062
2067
  incremental_sync=model.incremental_sync,
@@ -3050,7 +3055,7 @@ class ModelToComponentFactory:
3050
3055
  name: str,
3051
3056
  transformations: List[RecordTransformation] | None = None,
3052
3057
  decoder: Decoder | None = None,
3053
- client_side_incremental_sync: Dict[str, Any] | None = None,
3058
+ client_side_incremental_sync_cursor: Optional[Cursor] = None,
3054
3059
  file_uploader: Optional[DefaultFileUploader] = None,
3055
3060
  **kwargs: Any,
3056
3061
  ) -> RecordSelector:
@@ -3066,14 +3071,14 @@ class ModelToComponentFactory:
3066
3071
  transform_before_filtering = (
3067
3072
  False if model.transform_before_filtering is None else model.transform_before_filtering
3068
3073
  )
3069
- if client_side_incremental_sync:
3074
+ if client_side_incremental_sync_cursor:
3070
3075
  record_filter = ClientSideIncrementalRecordFilterDecorator(
3071
3076
  config=config,
3072
3077
  parameters=model.parameters,
3073
3078
  condition=model.record_filter.condition
3074
3079
  if (model.record_filter and hasattr(model.record_filter, "condition"))
3075
3080
  else None,
3076
- **client_side_incremental_sync,
3081
+ cursor=client_side_incremental_sync_cursor,
3077
3082
  )
3078
3083
  transform_before_filtering = (
3079
3084
  True
@@ -3151,8 +3156,9 @@ class ModelToComponentFactory:
3151
3156
  name: str,
3152
3157
  primary_key: Optional[Union[str, List[str], List[List[str]]]],
3153
3158
  request_options_provider: Optional[RequestOptionsProvider] = None,
3154
- stop_condition_cursor: Optional[Cursor] = None,
3155
- client_side_incremental_sync: Optional[Dict[str, Any]] = None,
3159
+ cursor: Optional[Cursor] = None,
3160
+ has_stop_condition_cursor: bool = False,
3161
+ is_client_side_incremental_sync: bool = False,
3156
3162
  transformations: List[RecordTransformation],
3157
3163
  file_uploader: Optional[DefaultFileUploader] = None,
3158
3164
  incremental_sync: Optional[
@@ -3182,6 +3188,9 @@ class ModelToComponentFactory:
3182
3188
 
3183
3189
  return _url or _url_base
3184
3190
 
3191
+ if cursor is None:
3192
+ cursor = FinalStateCursor(name, None, self._message_repository)
3193
+
3185
3194
  decoder = (
3186
3195
  self._create_component_from_model(model=model.decoder, config=config)
3187
3196
  if model.decoder
@@ -3193,7 +3202,7 @@ class ModelToComponentFactory:
3193
3202
  config=config,
3194
3203
  decoder=decoder,
3195
3204
  transformations=transformations,
3196
- client_side_incremental_sync=client_side_incremental_sync,
3205
+ client_side_incremental_sync_cursor=cursor if is_client_side_incremental_sync else None,
3197
3206
  file_uploader=file_uploader,
3198
3207
  )
3199
3208
 
@@ -3270,7 +3279,7 @@ class ModelToComponentFactory:
3270
3279
  url_base=_get_url(requester),
3271
3280
  extractor_model=model.record_selector.extractor,
3272
3281
  decoder=decoder,
3273
- cursor_used_for_stop_condition=stop_condition_cursor or None,
3282
+ cursor_used_for_stop_condition=cursor if has_stop_condition_cursor else None,
3274
3283
  )
3275
3284
  if model.paginator
3276
3285
  else NoPagination(parameters={})
@@ -3319,6 +3328,13 @@ class ModelToComponentFactory:
3319
3328
  parameters=model.parameters or {},
3320
3329
  )
3321
3330
 
3331
+ if (
3332
+ model.record_selector.record_filter
3333
+ and model.pagination_reset
3334
+ and model.pagination_reset.limits
3335
+ ):
3336
+ raise ValueError("PaginationResetLimits are not support while having record filter.")
3337
+
3322
3338
  return SimpleRetriever(
3323
3339
  name=name,
3324
3340
  paginator=paginator,
@@ -3332,9 +3348,34 @@ class ModelToComponentFactory:
3332
3348
  ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,
3333
3349
  additional_query_properties=query_properties,
3334
3350
  log_formatter=self._get_log_formatter(log_formatter, name),
3351
+ pagination_tracker_factory=self._create_pagination_tracker_factory(
3352
+ model.pagination_reset, cursor
3353
+ ),
3335
3354
  parameters=model.parameters or {},
3336
3355
  )
3337
3356
 
3357
+ def _create_pagination_tracker_factory(
3358
+ self, model: Optional[PaginationResetModel], cursor: Cursor
3359
+ ) -> Callable[[], PaginationTracker]:
3360
+ if model is None:
3361
+ return lambda: PaginationTracker()
3362
+
3363
+ # Until we figure out a way to use any cursor for PaginationTracker, we will have to have this cursor selector logic
3364
+ cursor_for_pagination_tracking = None
3365
+ if isinstance(cursor, ConcurrentCursor):
3366
+ cursor_for_pagination_tracking = cursor
3367
+ elif isinstance(cursor, ConcurrentPerPartitionCursor):
3368
+ cursor_for_pagination_tracking = cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here
3369
+ {}, datetime.timedelta(0)
3370
+ )
3371
+ elif not isinstance(cursor, FinalStateCursor):
3372
+ LOGGER.warning(
3373
+ "Unknown cursor for PaginationTracker. Pagination resets might not work properly"
3374
+ )
3375
+
3376
+ limit = model.limits.number_of_records if model and model.limits else None
3377
+ return lambda: PaginationTracker(cursor_for_pagination_tracking, limit)
3378
+
3338
3379
  def _get_log_formatter(
3339
3380
  self, log_formatter: Callable[[Response], Any] | None, name: str
3340
3381
  ) -> Callable[[Response], Any] | None:
@@ -0,0 +1,64 @@
1
+ from typing import Optional
2
+
3
+ from airbyte_cdk.sources.declarative.models import FailureType
4
+ from airbyte_cdk.sources.declarative.types import Record, StreamSlice
5
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor
6
+ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
7
+
8
+
9
+ class PaginationTracker:
10
+ _record_count: int
11
+ _number_of_attempt_with_same_slice: int
12
+
13
+ def __init__(
14
+ self, cursor: Optional[ConcurrentCursor] = None, max_number_of_records: Optional[int] = None
15
+ ) -> None:
16
+ """
17
+ Ideally, we would have passed the `Cursor` interface here instead of `ConcurrentCursor` but not all
18
+ implementations of `Cursor` can support this use case. For example, if the `ConcurrentPerPartitionCursor`
19
+ switch to global state, we stop keeping track of the state per partition and therefore can't get an accurate
20
+ view for a specific stream_slice. In order to solve that, we decided to scope this feature to use only
21
+ ConcurrentCursor which is the only "leaf" cursor that actually emits stream slices with `cursor_partition`.
22
+ """
23
+ self._cursor = cursor
24
+ self._limit = max_number_of_records
25
+ self.reset()
26
+
27
+ """
28
+ Given we have a cursor, we do not allow for the same slice to be processed twice because we assume we will
29
+ always process the same slice.
30
+
31
+ Given no cursor, we assume that the pagination reset is for retrying purposes and we allow to retry once.
32
+ """
33
+ self._allowed_number_of_attempt_with_same_slice = 1 if self._cursor else 2
34
+ self._number_of_attempt_with_same_slice = 0
35
+
36
+ def observe(self, record: Record) -> None:
37
+ self._record_count += 1
38
+ if self._cursor:
39
+ self._cursor.observe(record)
40
+
41
+ def has_reached_limit(self) -> bool:
42
+ return self._limit is not None and self._record_count >= self._limit
43
+
44
+ def reset(self) -> None:
45
+ self._record_count = 0
46
+ self._number_of_attempt_with_same_slice = 0
47
+
48
+ def reduce_slice_range_if_possible(self, stream_slice: StreamSlice) -> StreamSlice:
49
+ new_slice = self._cursor.reduce_slice_range(stream_slice) if self._cursor else stream_slice
50
+
51
+ if new_slice == stream_slice:
52
+ self._number_of_attempt_with_same_slice += 1
53
+ if (
54
+ self._number_of_attempt_with_same_slice
55
+ >= self._allowed_number_of_attempt_with_same_slice
56
+ ):
57
+ raise AirbyteTracedException(
58
+ internal_message=f"There were {self._number_of_attempt_with_same_slice} attempts with the same slice already while the max allowed is {self._allowed_number_of_attempt_with_same_slice}",
59
+ failure_type=FailureType.system_error,
60
+ )
61
+ else:
62
+ self._number_of_attempt_with_same_slice = 0
63
+
64
+ return new_slice
@@ -3,10 +3,10 @@
3
3
  #
4
4
 
5
5
  import json
6
+ import logging
6
7
  from collections import defaultdict
7
8
  from dataclasses import InitVar, dataclass, field
8
9
  from functools import partial
9
- from itertools import islice
10
10
  from typing import (
11
11
  Any,
12
12
  Callable,
@@ -39,14 +39,20 @@ from airbyte_cdk.sources.declarative.requesters.request_options import (
39
39
  RequestOptionsProvider,
40
40
  )
41
41
  from airbyte_cdk.sources.declarative.requesters.requester import Requester
42
+ from airbyte_cdk.sources.declarative.retrievers.pagination_tracker import PaginationTracker
42
43
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
43
44
  from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
44
45
  from airbyte_cdk.sources.source import ExperimentalClassWarning
46
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
45
47
  from airbyte_cdk.sources.streams.core import StreamData
48
+ from airbyte_cdk.sources.streams.http.pagination_reset_exception import (
49
+ PaginationResetRequiredException,
50
+ )
46
51
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
47
52
  from airbyte_cdk.utils.mapping_helpers import combine_mappings
48
53
 
49
54
  FULL_REFRESH_SYNC_COMPLETE_KEY = "__ab_full_refresh_sync_complete"
55
+ LOGGER = logging.getLogger("airbyte")
50
56
 
51
57
 
52
58
  @dataclass
@@ -92,8 +98,14 @@ class SimpleRetriever(Retriever):
92
98
  ignore_stream_slicer_parameters_on_paginated_requests: bool = False
93
99
  additional_query_properties: Optional[QueryProperties] = None
94
100
  log_formatter: Optional[Callable[[requests.Response], Any]] = None
101
+ pagination_tracker_factory: Callable[[], PaginationTracker] = field(
102
+ default_factory=lambda: lambda: PaginationTracker()
103
+ )
95
104
 
96
105
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
106
+ # while changing `ModelToComponentFactory.create_simple_retriever` to accept a cursor, the sources implementing
107
+ # a CustomRetriever inheriting for SimpleRetriever needed to have the following validation added.
108
+ self.cursor = None if isinstance(self.cursor, Cursor) else self.cursor
97
109
  self._paginator = self.paginator or NoPagination(parameters=parameters)
98
110
  self._parameters = parameters
99
111
  self._name = (
@@ -362,90 +374,98 @@ class SimpleRetriever(Retriever):
362
374
  stream_state: Mapping[str, Any],
363
375
  stream_slice: StreamSlice,
364
376
  ) -> Iterable[Record]:
365
- pagination_complete = False
366
- initial_token = self._paginator.get_initial_token()
367
- next_page_token: Optional[Mapping[str, Any]] = (
368
- {"next_page_token": initial_token} if initial_token is not None else None
369
- )
370
- while not pagination_complete:
371
- property_chunks: List[List[str]] = (
372
- list(
373
- self.additional_query_properties.get_request_property_chunks(
374
- stream_slice=stream_slice
375
- )
376
- )
377
- if self.additional_query_properties
378
- else [
379
- []
380
- ] # A single empty property chunk represents the case where property chunking is not configured
381
- )
382
-
377
+ pagination_tracker = self.pagination_tracker_factory()
378
+ reset_pagination = False
379
+ next_page_token = self._get_initial_next_page_token()
380
+ while True:
383
381
  merged_records: MutableMapping[str, Any] = defaultdict(dict)
384
382
  last_page_size = 0
385
383
  last_record: Optional[Record] = None
386
- response: Optional[requests.Response] = None
387
- for properties in property_chunks:
388
- if len(properties) > 0:
389
- stream_slice = StreamSlice(
390
- partition=stream_slice.partition or {},
391
- cursor_slice=stream_slice.cursor_slice or {},
392
- extra_fields={"query_properties": properties},
393
- )
394
-
395
- response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
396
- for current_record in records_generator_fn(response):
397
- if (
398
- current_record
399
- and self.additional_query_properties
400
- and self.additional_query_properties.property_chunking
384
+
385
+ response = None
386
+ try:
387
+ if (
388
+ self.additional_query_properties
389
+ and self.additional_query_properties.property_chunking
390
+ ):
391
+ for properties in self.additional_query_properties.get_request_property_chunks(
392
+ stream_slice=stream_slice
401
393
  ):
402
- merge_key = (
403
- self.additional_query_properties.property_chunking.get_merge_key(
404
- current_record
394
+ stream_slice = StreamSlice(
395
+ partition=stream_slice.partition or {},
396
+ cursor_slice=stream_slice.cursor_slice or {},
397
+ extra_fields={"query_properties": properties},
398
+ )
399
+ response = self._fetch_next_page(
400
+ stream_state, stream_slice, next_page_token
401
+ )
402
+
403
+ for current_record in records_generator_fn(response):
404
+ merge_key = (
405
+ self.additional_query_properties.property_chunking.get_merge_key(
406
+ current_record
407
+ )
405
408
  )
409
+ if merge_key:
410
+ _deep_merge(merged_records[merge_key], current_record)
411
+ else:
412
+ # We should still emit records even if the record did not have a merge key
413
+ pagination_tracker.observe(current_record)
414
+ last_page_size += 1
415
+ last_record = current_record
416
+ yield current_record
417
+
418
+ for merged_record in merged_records.values():
419
+ record = Record(
420
+ data=merged_record, stream_name=self.name, associated_slice=stream_slice
406
421
  )
407
- if merge_key:
408
- _deep_merge(merged_records[merge_key], current_record)
409
- else:
410
- # We should still emit records even if the record did not have a merge key
411
- last_page_size += 1
412
- last_record = current_record
413
- yield current_record
414
- else:
422
+ pagination_tracker.observe(record)
423
+ last_page_size += 1
424
+ last_record = record
425
+ yield record
426
+ else:
427
+ response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
428
+ for current_record in records_generator_fn(response):
429
+ pagination_tracker.observe(current_record)
415
430
  last_page_size += 1
416
431
  last_record = current_record
417
432
  yield current_record
418
-
419
- if (
420
- self.additional_query_properties
421
- and self.additional_query_properties.property_chunking
422
- ):
423
- for merged_record in merged_records.values():
424
- record = Record(
425
- data=merged_record, stream_name=self.name, associated_slice=stream_slice
426
- )
427
- last_page_size += 1
428
- last_record = record
429
- yield record
430
-
431
- if not response:
432
- pagination_complete = True
433
+ except PaginationResetRequiredException:
434
+ reset_pagination = True
435
+ else:
436
+ if not response:
437
+ break
438
+
439
+ if reset_pagination or pagination_tracker.has_reached_limit():
440
+ pagination_tracker.reset()
441
+ next_page_token = self._get_initial_next_page_token()
442
+ previous_slice = stream_slice
443
+ stream_slice = pagination_tracker.reduce_slice_range_if_possible(stream_slice)
444
+ LOGGER.info(
445
+ f"Hitting PaginationReset event. StreamSlice used will go from {previous_slice} to {stream_slice}"
446
+ )
447
+ reset_pagination = False
433
448
  else:
434
449
  last_page_token_value = (
435
450
  next_page_token.get("next_page_token") if next_page_token else None
436
451
  )
437
452
  next_page_token = self._next_page_token(
438
- response=response,
453
+ response=response, # type:ignore # we are breaking from the loop on the try/else if there are no response so this should be fine
439
454
  last_page_size=last_page_size,
440
455
  last_record=last_record,
441
456
  last_page_token_value=last_page_token_value,
442
457
  )
443
458
  if not next_page_token:
444
- pagination_complete = True
459
+ break
445
460
 
446
461
  # Always return an empty generator just in case no records were ever yielded
447
462
  yield from []
448
463
 
464
+ def _get_initial_next_page_token(self) -> Optional[Mapping[str, Any]]:
465
+ initial_token = self._paginator.get_initial_token()
466
+ next_page_token = {"next_page_token": initial_token} if initial_token is not None else None
467
+ return next_page_token
468
+
449
469
  def _read_single_page(
450
470
  self,
451
471
  records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
@@ -3,15 +3,13 @@
3
3
  #
4
4
 
5
5
  import logging
6
- import time
7
6
  from abc import ABC, abstractmethod
8
7
  from datetime import datetime
9
8
  from enum import Enum
10
9
  from io import IOBase
11
10
  from os import makedirs, path
12
- from typing import Any, Iterable, List, MutableMapping, Optional, Set, Tuple
11
+ from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
13
12
 
14
- from airbyte_protocol_dataclasses.models import FailureType
15
13
  from wcmatch.glob import GLOBSTAR, globmatch
16
14
 
17
15
  from airbyte_cdk.models import AirbyteRecordMessageFileReference
@@ -21,9 +19,8 @@ from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import
21
19
  preserve_directory_structure,
22
20
  use_file_transfer,
23
21
  )
24
- from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
25
22
  from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
26
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile, UploadableRemoteFile
23
+ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
27
24
 
28
25
 
29
26
  class FileReadMode(Enum):
@@ -37,7 +34,6 @@ class AbstractFileBasedStreamReader(ABC):
37
34
  FILE_NAME = "file_name"
38
35
  LOCAL_FILE_PATH = "local_file_path"
39
36
  FILE_FOLDER = "file_folder"
40
- FILE_SIZE_LIMIT = 1_500_000_000
41
37
 
42
38
  def __init__(self) -> None:
43
39
  self._config = None
@@ -117,6 +113,16 @@ class AbstractFileBasedStreamReader(ABC):
117
113
  seen.add(file.uri)
118
114
  yield file
119
115
 
116
+ @abstractmethod
117
+ def file_size(self, file: RemoteFile) -> int:
118
+ """Utility method to get size of the remote file.
119
+
120
+ This is required for connectors that will support writing to
121
+ files. If the connector does not support writing files, then the
122
+ subclass can simply `return 0`.
123
+ """
124
+ ...
125
+
120
126
  @staticmethod
121
127
  def file_matches_globs(file: RemoteFile, globs: List[str]) -> bool:
122
128
  # Use the GLOBSTAR flag to enable recursive ** matching
@@ -147,8 +153,9 @@ class AbstractFileBasedStreamReader(ABC):
147
153
  return include_identities_stream(self.config)
148
154
  return False
149
155
 
156
+ @abstractmethod
150
157
  def upload(
151
- self, file: UploadableRemoteFile, local_directory: str, logger: logging.Logger
158
+ self, file: RemoteFile, local_directory: str, logger: logging.Logger
152
159
  ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
153
160
  """
154
161
  This is required for connectors that will support writing to
@@ -166,53 +173,7 @@ class AbstractFileBasedStreamReader(ABC):
166
173
  - file_size_bytes (int): The size of the referenced file in bytes.
167
174
  - source_file_relative_path (str): The relative path to the referenced file in source.
168
175
  """
169
- if not isinstance(file, UploadableRemoteFile):
170
- raise TypeError(f"Expected UploadableRemoteFile, got {type(file)}")
171
-
172
- file_size = file.size
173
-
174
- if file_size > self.FILE_SIZE_LIMIT:
175
- message = f"File size exceeds the {self.FILE_SIZE_LIMIT / 1e9} GB limit."
176
- raise FileSizeLimitError(
177
- message=message, internal_message=message, failure_type=FailureType.config_error
178
- )
179
-
180
- file_paths = self._get_file_transfer_paths(
181
- source_file_relative_path=file.source_file_relative_path,
182
- staging_directory=local_directory,
183
- )
184
- local_file_path = file_paths[self.LOCAL_FILE_PATH]
185
- file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
186
- file_name = file_paths[self.FILE_NAME]
187
-
188
- logger.info(
189
- f"Starting to download the file {file.file_uri_for_logging} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
190
- )
191
- start_download_time = time.time()
192
-
193
- file.download_to_local_directory(local_file_path)
194
-
195
- write_duration = time.time() - start_download_time
196
- logger.info(
197
- f"Finished downloading the file {file.file_uri_for_logging} and saved to {local_file_path} in {write_duration:,.2f} seconds."
198
- )
199
-
200
- file_record_data = FileRecordData(
201
- folder=file_paths[self.FILE_FOLDER],
202
- file_name=file_name,
203
- bytes=file_size,
204
- id=file.id,
205
- mime_type=file.mime_type,
206
- created_at=file.created_at,
207
- updated_at=file.updated_at,
208
- source_uri=file.uri,
209
- )
210
- file_reference = AirbyteRecordMessageFileReference(
211
- staging_file_url=local_file_path,
212
- source_file_relative_path=file_relative_path,
213
- file_size_bytes=file_size,
214
- )
215
- return file_record_data, file_reference
176
+ ...
216
177
 
217
178
  def _get_file_transfer_paths(
218
179
  self, source_file_relative_path: str, staging_directory: str
@@ -7,7 +7,7 @@ from typing import Iterable, Tuple
7
7
  from airbyte_cdk.models import AirbyteRecordMessageFileReference
8
8
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
9
9
  from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
10
- from airbyte_cdk.sources.file_based.remote_file import UploadableRemoteFile
10
+ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
11
11
  from airbyte_cdk.sources.utils.files_directory import get_files_directory
12
12
 
13
13
 
@@ -17,7 +17,7 @@ class FileTransfer:
17
17
 
18
18
  def upload(
19
19
  self,
20
- file: UploadableRemoteFile,
20
+ file: RemoteFile,
21
21
  stream_reader: AbstractFileBasedStreamReader,
22
22
  logger: logging.Logger,
23
23
  ) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
- from abc import ABC, abstractmethod
4
+
5
5
  from datetime import datetime
6
6
  from typing import Optional
7
7
 
@@ -16,42 +16,3 @@ class RemoteFile(BaseModel):
16
16
  uri: str
17
17
  last_modified: datetime
18
18
  mime_type: Optional[str] = None
19
-
20
-
21
- class UploadableRemoteFile(RemoteFile, ABC):
22
- """
23
- A file in a file-based stream that supports uploading(file transferring).
24
- """
25
-
26
- id: Optional[str] = None
27
- created_at: Optional[str] = None
28
- updated_at: Optional[str] = None
29
-
30
- @property
31
- @abstractmethod
32
- def size(self) -> int:
33
- """
34
- Returns the file size in bytes.
35
- """
36
- ...
37
-
38
- @abstractmethod
39
- def download_to_local_directory(self, local_file_path: str) -> None:
40
- """
41
- Download the file from remote source to local storage.
42
- """
43
- ...
44
-
45
- @property
46
- def source_file_relative_path(self) -> str:
47
- """
48
- Returns the relative path of the source file.
49
- """
50
- return self.uri
51
-
52
- @property
53
- def file_uri_for_logging(self) -> str:
54
- """
55
- Returns the URI for the file being logged.
56
- """
57
- return self.uri
@@ -41,7 +41,7 @@ class CursorField:
41
41
  def __init__(self, cursor_field_key: str) -> None:
42
42
  self.cursor_field_key = cursor_field_key
43
43
 
44
- def extract_value(self, record: Record) -> CursorValueType:
44
+ def extract_value(self, record: Record) -> Any:
45
45
  cursor_value = record.data.get(self.cursor_field_key)
46
46
  if cursor_value is None:
47
47
  raise ValueError(f"Could not find cursor field {self.cursor_field_key} in record")
@@ -174,6 +174,7 @@ class ConcurrentCursor(Cursor):
174
174
  # Flag to track if the logger has been triggered (per stream)
175
175
  self._should_be_synced_logger_triggered = False
176
176
  self._clamping_strategy = clamping_strategy
177
+ self._is_ascending_order = True
177
178
 
178
179
  # A lock is required when closing a partition because updating the cursor's concurrent_state is
179
180
  # not thread safe. When multiple partitions are being closed by the cursor at the same time, it is
@@ -245,6 +246,8 @@ class ConcurrentCursor(Cursor):
245
246
 
246
247
  if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
247
248
  self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value
249
+ elif most_recent_cursor_value > cursor_value:
250
+ self._is_ascending_order = False
248
251
  except ValueError:
249
252
  self._log_for_record_without_cursor_value()
250
253
 
@@ -516,3 +519,26 @@ class ConcurrentCursor(Cursor):
516
519
  f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record for stream {self._stream_name}. The incremental sync will assume it needs to be synced"
517
520
  )
518
521
  self._should_be_synced_logger_triggered = True
522
+
523
+ def reduce_slice_range(self, stream_slice: StreamSlice) -> StreamSlice:
524
+ # In theory, we might be more flexible here meaning that it doesn't need to be in ascending order but it just
525
+ # needs to be ordered. For now though, we will only support ascending order.
526
+ if not self._is_ascending_order:
527
+ LOGGER.warning(
528
+ "Attempting to reduce slice while records are not returned in incremental order might lead to missing records"
529
+ )
530
+
531
+ return StreamSlice(
532
+ partition=stream_slice.partition,
533
+ cursor_slice={
534
+ self._slice_boundary_fields_wrapper[
535
+ self._START_BOUNDARY
536
+ ]: self._connector_state_converter.output_format(
537
+ self._most_recent_cursor_value_per_partition[stream_slice]
538
+ ),
539
+ self._slice_boundary_fields_wrapper[self._END_BOUNDARY]: stream_slice.cursor_slice[
540
+ self._slice_boundary_fields_wrapper[self._END_BOUNDARY]
541
+ ],
542
+ },
543
+ extra_fields=stream_slice.extra_fields,
544
+ )
@@ -16,6 +16,7 @@ class ResponseAction(Enum):
16
16
  RETRY = "RETRY"
17
17
  FAIL = "FAIL"
18
18
  IGNORE = "IGNORE"
19
+ RESET_PAGINATION = "RESET_PAGINATION"
19
20
  RATE_LIMITED = "RATE_LIMITED"
20
21
 
21
22
 
@@ -42,6 +42,9 @@ from airbyte_cdk.sources.streams.http.exceptions import (
42
42
  RequestBodyException,
43
43
  UserDefinedBackoffException,
44
44
  )
45
+ from airbyte_cdk.sources.streams.http.pagination_reset_exception import (
46
+ PaginationResetRequiredException,
47
+ )
45
48
  from airbyte_cdk.sources.streams.http.rate_limiting import (
46
49
  http_client_default_backoff_handler,
47
50
  rate_limit_default_backoff_handler,
@@ -428,6 +431,9 @@ class HttpClient:
428
431
  if error_resolution.response_action not in self._ACTIONS_TO_RETRY_ON:
429
432
  self._evict_key(request)
430
433
 
434
+ if error_resolution.response_action == ResponseAction.RESET_PAGINATION:
435
+ raise PaginationResetRequiredException()
436
+
431
437
  # Emit stream status RUNNING with the reason RATE_LIMITED to log that the rate limit has been reached
432
438
  if error_resolution.response_action == ResponseAction.RATE_LIMITED:
433
439
  # TODO: Update to handle with message repository when concurrent message repository is ready
@@ -0,0 +1,2 @@
1
+ class PaginationResetRequiredException(Exception):
2
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 7.3.1
3
+ Version: 7.3.2.dev0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -130,7 +130,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=z0AgJ6AZ
130
130
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
131
131
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
132
132
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
133
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=D5WeK1Iw_T0ZxLKCmKLowyO7GwxnwfhYom-sd1W14uQ,187793
133
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=0rHsucvsnGfcDVZ3hn9mBU9bhiHVC979LfXuZfoqaUk,188875
134
134
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
135
135
  airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=qB4lRUrCXLTE-a3VlpOLaazHiC7RIF_FIVJesuz7ebw,8078
136
136
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
@@ -150,7 +150,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=vCpwX1PVRFP
150
150
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=WJyA2OYIEgFpVP5Y3o0tIj69AV6IKkn9B16MeXaEItI,6513
151
151
  airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
152
152
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=_y8H65KgdmVNpwQAzXtXzi-t9mY6bmIIAWtRAbpHfEo,295
153
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=vaynWCXmScAuVnrbJ2T7M1Y4RSZO7ctAej-kzZJYifk,27868
153
+ airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=ldroIGnz1rie9cNZF-Jsl3J6yAqNe7KYS7PNE342Eqs,27995
154
154
  airbyte_cdk/sources/declarative/interpolation/__init__.py,sha256=Kh7FxhfetyNVDnAQ9zSxNe4oUbb8CvoW7Mqz7cs2iPg,437
155
155
  airbyte_cdk/sources/declarative/interpolation/filters.py,sha256=cYap5zzOxIJWCLIfbkNlpyfUhjZ8FklLroIG4WGzYVs,5537
156
156
  airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py,sha256=8F3ntT_Mfo8cO9n6dCq8rTfJIpfKmzRCsVtVdhzaoGc,1964
@@ -165,14 +165,14 @@ airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migrati
165
165
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
166
166
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
167
167
  airbyte_cdk/sources/declarative/models/base_model_with_deprecations.py,sha256=Imnj3yef0aqRdLfaUxkIYISUb8YkiPrRH_wBd-x8HjM,5999
168
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=t2td-7swgXY3RJc1VDBFUYI2Blc55j5TDFdg90aHwlU,132123
168
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=oLHcK2TVRgzahkDPuPvZ-6OqXS_DQU2gcBGq3SRpKsY,132793
169
169
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
170
170
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
171
171
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9Rbu5OexXSDN9QWDo8YU4tT9M2LDVOgGA,802
172
172
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=la9Ulpc0lQewiBLKJ0FpsWxyU5XISv-ulmFRHJLJ1Pc,11292
173
173
  airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
174
174
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
175
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lMjcNJKqU3HcOjNZyR4_Ln1v32HK-7iNcFq4nmFjxSE,183622
175
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=AZGtNkPHxL8WbJMOknSHYmJxSPZP4x0pq6xqQYiNdaM,185641
176
176
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
177
177
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
178
178
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=ocm4hZ4k-tEGs5HLrtI8ecWSK0hGqNH0Rvz2byx_HZk,6927
@@ -240,8 +240,9 @@ airbyte_cdk/sources/declarative/retrievers/file_uploader/file_uploader.py,sha256
240
240
  airbyte_cdk/sources/declarative/retrievers/file_uploader/file_writer.py,sha256=V8gAFjQXkhX5mwj1NafdcUrMfMBNF1hi0mrdXIl5qEc,359
241
241
  airbyte_cdk/sources/declarative/retrievers/file_uploader/local_file_system_file_writer.py,sha256=jLpdonre1UHfbjGSD5AK_T0codLABJByTvbqepDZtEQ,422
242
242
  airbyte_cdk/sources/declarative/retrievers/file_uploader/noop_file_writer.py,sha256=1yfimzxm09d2j605cu_HhiYVDNVL1rUMi3vs_jYlIyY,330
243
+ airbyte_cdk/sources/declarative/retrievers/pagination_tracker.py,sha256=7IDpP9MwwMdiL-ZiHUMlopFbQ1P04RCtDmFojqNx4tc,2893
243
244
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=os5psYh8z7ZdCAvbfZeTpmjvPa7Qpx0mblpKf47ZaZM,1876
244
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=sa8xS8FTStqgp1kkc_ObJjO_b1Q4Nek3XdJ7KODLqQw,28136
245
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=HVxD66NqLqUPmCKRGAi-z9NM9ZlcCsmwHAdZMQZ8Uc4,29686
245
246
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
246
247
  airbyte_cdk/sources/declarative/schema/composite_schema_loader.py,sha256=ymGbvxS_QyGc4nnjEyRo5ch8bVedELO41PAUxKXZyMw,1113
247
248
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=UnbzlExmwoQiVV8zDg4lhAEaqA_0pRfwbMRe8yqOuWk,1834
@@ -298,18 +299,18 @@ airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha2
298
299
  airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
299
300
  airbyte_cdk/sources/file_based/file_based_source.py,sha256=Xg8OYWnGc-OcVBglvS08uwAWGWHBhEqsBnyODIkOK-4,20051
300
301
  airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py,sha256=4e7FXqQ9hueacexC0SyrZyjF8oREYHza8pKF9CgKbD8,5050
301
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=Yg9KRXpyAtElBrUOO8oX4WHQH6k6Lk7keklrZmB5Klg,9614
302
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=rwz8AhEIqYB9gBF7uW9eR--eUiHOntzuwLH8jFHNacE,7854
302
303
  airbyte_cdk/sources/file_based/file_record_data.py,sha256=Vkr5AyZzlsOezjVCLhFrm_WpymlQdolWCnFAwqLJ9Iw,453
303
304
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
304
305
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=USEYqiICXBWpDV443VtNOCmUA-GINzY_Zah74_5w3qQ,10860
305
306
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
306
307
  airbyte_cdk/sources/file_based/file_types/excel_parser.py,sha256=BeplCq0hmojELU6bZCvvpRLpQ9us81TqbGYwrhd3INo,7188
307
- airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=rFxWaqItBux9tPf4xU03LT6b-wDZf1QolM92mP8Diuk,1120
308
+ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=5l2Jo6bp6neDmgM427PrZMZeqU0hCIZVWnzUZ_7BT10,1100
308
309
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
309
310
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
310
311
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
311
312
  airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
312
- airbyte_cdk/sources/file_based/remote_file.py,sha256=1Afzr2WFWwjiUz8R2vNFepeI192UNeHOZAXIGTWOzOM,1248
313
+ airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
313
314
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=dKXAOTmMI3YmC5u7PeHC9AaZmlL6ft7CYSFQKCg0sXw,9911
314
315
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
315
316
  airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=kjvX7nOmUALYd7HuZHilUzgJPZ-MnZ08mtvuBnt2tQ0,618
@@ -352,7 +353,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCB
352
353
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=h4ZewhWn2PzPTt0lZZjcUL4rrpW9E_of7prnI3bm-c4,14004
353
354
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=M0XmvF3vjlr4GbCM0XH1hAj7udiAONM9SnmXjqufzLM,1035
354
355
  airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
355
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=Dxjx4IAHZ6HHyfJ-B5SUTTYgdb1ZiiBKsZm3pYUquzk,23411
356
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=ujGZIKhOD24a76mqo00EKrNDh2oMHkFZFU_MWTW-ZmY,24668
356
357
  airbyte_cdk/sources/streams/concurrent/cursor_types.py,sha256=ZyWLPpeLX1qXcP5MwS-wxK11IBMsnVPCw9zx8gA2_Ro,843
357
358
  airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=SSufbo5f7OOYS8DZaABXeJVvodcfp9wb8J9lT5Xik3s,4744
358
359
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
@@ -379,10 +380,11 @@ airbyte_cdk/sources/streams/http/error_handlers/error_handler.py,sha256=GuqP7U1e
379
380
  airbyte_cdk/sources/streams/http/error_handlers/error_message_parser.py,sha256=xC93uB5BJd3iOnAXCrYLJTitWeGZlqzwe55VtsZqNnE,456
380
381
  airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py,sha256=2gqececTxxUqO6aIkVNNXADg48Px5EHUwnXHL9KiPT8,4188
381
382
  airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha256=GW5rkBQLLTj7MEaDdbpG7DHxTQVRrDOg1ehLLxjqiM4,1828
382
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
383
+ airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=4uE83yZfBe8s_81U3yZRcY1eRhvJnz9NanCp7G2PY-k,2278
383
384
  airbyte_cdk/sources/streams/http/exceptions.py,sha256=TTUpWq_qLPtdvXqYPpMhtYbFVQ7dGtajDVfjb6KQ8z8,2099
384
385
  airbyte_cdk/sources/streams/http/http.py,sha256=0uariNq8OFnlX7iqOHwBhecxA-Hfd5hSY8_XCEgn3jI,28499
385
- airbyte_cdk/sources/streams/http/http_client.py,sha256=7VzR4Cm5Sqm79SdFg26WXPzQC_-RclPHAFKiyCjoFs8,25442
386
+ airbyte_cdk/sources/streams/http/http_client.py,sha256=Afa4bPJrazMmSG15y-Hum194LnlpkaNrfgjLQk1SzqM,25690
387
+ airbyte_cdk/sources/streams/http/pagination_reset_exception.py,sha256=M5zUi9OREH5DnHHoZUDbKf9uTdIBta8je5p52U9gu94,60
386
388
  airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
387
389
  airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
388
390
  airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=aWrBmJ8AhUtvtHhHq5JGVZFXjDa7jG8DZePG4gEs9VY,19800
@@ -457,9 +459,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
457
459
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=9YDJmnIGFsT51CVQf2tSSvTapGimITjEFGbUTSZAGTI,963
458
460
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
459
461
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
460
- airbyte_cdk-7.3.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
461
- airbyte_cdk-7.3.1.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
462
- airbyte_cdk-7.3.1.dist-info/METADATA,sha256=_n29oKSyO6A6mUMN1c6YqHvrJRFQrXOIhhAh1E0PuXo,6798
463
- airbyte_cdk-7.3.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
464
- airbyte_cdk-7.3.1.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
465
- airbyte_cdk-7.3.1.dist-info/RECORD,,
462
+ airbyte_cdk-7.3.2.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
+ airbyte_cdk-7.3.2.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
464
+ airbyte_cdk-7.3.2.dev0.dist-info/METADATA,sha256=KkRSWb9jIulH2Hl6GmxMQNFstPn9tmri4oOLEeipXAY,6803
465
+ airbyte_cdk-7.3.2.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
466
+ airbyte_cdk-7.3.2.dev0.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
467
+ airbyte_cdk-7.3.2.dev0.dist-info/RECORD,,