airbyte-cdk 6.14.0.dev1__py3-none-any.whl → 6.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (20) hide show
  1. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +51 -27
  2. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +45 -0
  3. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +19 -0
  4. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -5
  5. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +35 -52
  6. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +7 -10
  7. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +4 -9
  8. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +6 -11
  9. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +13 -13
  10. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +13 -14
  11. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +8 -7
  12. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -10
  13. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +64 -71
  14. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +3 -3
  15. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +61 -0
  16. {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.1.dist-info}/METADATA +1 -1
  17. {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.1.dist-info}/RECORD +20 -19
  18. {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.1.dist-info}/LICENSE.txt +0 -0
  19. {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.1.dist-info}/WHEEL +0 -0
  20. {airbyte_cdk-6.14.0.dev1.dist-info → airbyte_cdk-6.15.1.dist-info}/entry_points.txt +0 -0
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple
6
+ from typing import Any, Callable, Generic, Iterator, List, Mapping, Optional, Tuple, Union
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -28,11 +28,15 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
28
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
29
  DatetimeBasedCursor as DatetimeBasedCursorModel,
30
30
  )
31
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
32
+ DeclarativeStream as DeclarativeStreamModel,
33
+ )
31
34
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
35
+ ComponentDefinition,
32
36
  ModelToComponentFactory,
33
37
  )
34
38
  from airbyte_cdk.sources.declarative.requesters import HttpRequester
35
- from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
39
+ from airbyte_cdk.sources.declarative.retrievers import Retriever, SimpleRetriever
36
40
  from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
37
41
  DeclarativePartitionFactory,
38
42
  StreamSlicerPartitionGenerator,
@@ -48,6 +52,7 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
48
52
  from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
49
53
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
50
54
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
55
+ from airbyte_cdk.sources.types import Config, StreamState
51
56
 
52
57
 
53
58
  class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
@@ -189,11 +194,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
189
194
  # Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
190
195
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
191
196
  # so we need to treat them as synchronous
192
- if isinstance(declarative_stream, DeclarativeStream) and (
193
- name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
197
+ if (
198
+ isinstance(declarative_stream, DeclarativeStream)
199
+ and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
194
200
  == "SimpleRetriever"
195
- or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
196
- == "AsyncRetriever"
197
201
  ):
198
202
  incremental_sync_component_definition = name_to_stream_mapping[
199
203
  declarative_stream.name
@@ -213,11 +217,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
213
217
  and not incremental_sync_component_definition
214
218
  )
215
219
 
216
- is_async_job_stream = (
217
- name_to_stream_mapping[declarative_stream.name].get("retriever", {}).get("type")
218
- == "AsyncRetriever"
219
- )
220
-
221
220
  if self._is_datetime_incremental_without_partition_routing(
222
221
  declarative_stream, incremental_sync_component_definition
223
222
  ):
@@ -235,25 +234,15 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
235
234
  stream_state=stream_state,
236
235
  )
237
236
 
238
- retriever = declarative_stream.retriever
239
-
240
- # This is an optimization so that we don't invoke any cursor or state management flows within the
241
- # low-code framework because state management is handled through the ConcurrentCursor.
242
- if declarative_stream and isinstance(retriever, SimpleRetriever):
243
- # Also a temporary hack. In the legacy Stream implementation, as part of the read,
244
- # set_initial_state() is called to instantiate incoming state on the cursor. Although we no
245
- # longer rely on the legacy low-code cursor for concurrent checkpointing, low-code components
246
- # like StopConditionPaginationStrategyDecorator and ClientSideIncrementalRecordFilterDecorator
247
- # still rely on a DatetimeBasedCursor that is properly initialized with state.
248
- if retriever.cursor:
249
- retriever.cursor.set_initial_state(stream_state=stream_state)
250
- retriever.cursor = None
251
-
252
237
  partition_generator = StreamSlicerPartitionGenerator(
253
238
  DeclarativePartitionFactory(
254
239
  declarative_stream.name,
255
240
  declarative_stream.get_json_schema(),
256
- retriever,
241
+ self._retriever_factory(
242
+ name_to_stream_mapping[declarative_stream.name],
243
+ config,
244
+ stream_state,
245
+ ),
257
246
  self.message_repository,
258
247
  ),
259
248
  cursor,
@@ -283,7 +272,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
283
272
  DeclarativePartitionFactory(
284
273
  declarative_stream.name,
285
274
  declarative_stream.get_json_schema(),
286
- declarative_stream.retriever,
275
+ self._retriever_factory(
276
+ name_to_stream_mapping[declarative_stream.name],
277
+ config,
278
+ {},
279
+ ),
287
280
  self.message_repository,
288
281
  ),
289
282
  declarative_stream.retriever.stream_slicer,
@@ -422,3 +415,34 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
422
415
  if stream.stream.name not in concurrent_stream_names
423
416
  ]
424
417
  )
418
+
419
+ def _retriever_factory(
420
+ self, stream_config: ComponentDefinition, source_config: Config, stream_state: StreamState
421
+ ) -> Callable[[], Retriever]:
422
+ def _factory_method() -> Retriever:
423
+ declarative_stream: DeclarativeStream = self._constructor.create_component(
424
+ DeclarativeStreamModel,
425
+ stream_config,
426
+ source_config,
427
+ emit_connector_builder_messages=self._emit_connector_builder_messages,
428
+ )
429
+
430
+ # This is an optimization so that we don't invoke any cursor or state management flows within the
431
+ # low-code framework because state management is handled through the ConcurrentCursor.
432
+ if (
433
+ declarative_stream
434
+ and declarative_stream.retriever
435
+ and isinstance(declarative_stream.retriever, SimpleRetriever)
436
+ ):
437
+ # Also a temporary hack. In the legacy Stream implementation, as part of the read, set_initial_state() is
438
+ # called to instantiate incoming state on the cursor. Although we no longer rely on the legacy low-code cursor
439
+ # for concurrent checkpointing, low-code components like StopConditionPaginationStrategyDecorator and
440
+ # ClientSideIncrementalRecordFilterDecorator still rely on a DatetimeBasedCursor that is properly initialized
441
+ # with state.
442
+ if declarative_stream.retriever.cursor:
443
+ declarative_stream.retriever.cursor.set_initial_state(stream_state=stream_state)
444
+ declarative_stream.retriever.cursor = None
445
+
446
+ return declarative_stream.retriever
447
+
448
+ return _factory_method
@@ -1241,6 +1241,7 @@ definitions:
1241
1241
  - "$ref": "#/definitions/KeysToLower"
1242
1242
  - "$ref": "#/definitions/KeysToSnakeCase"
1243
1243
  - "$ref": "#/definitions/FlattenFields"
1244
+ - "$ref": "#/definitions/KeysReplace"
1244
1245
  state_migrations:
1245
1246
  title: State Migrations
1246
1247
  description: Array of state migrations to be applied on the input state
@@ -1785,6 +1786,7 @@ definitions:
1785
1786
  - "$ref": "#/definitions/KeysToLower"
1786
1787
  - "$ref": "#/definitions/KeysToSnakeCase"
1787
1788
  - "$ref": "#/definitions/FlattenFields"
1789
+ - "$ref": "#/definitions/KeysReplace"
1788
1790
  schema_type_identifier:
1789
1791
  "$ref": "#/definitions/SchemaTypeIdentifier"
1790
1792
  $parameters:
@@ -1883,6 +1885,49 @@ definitions:
1883
1885
  $parameters:
1884
1886
  type: object
1885
1887
  additionalProperties: true
1888
+ KeysReplace:
1889
+ title: Keys Replace
1890
+ description: A transformation that replaces symbols in keys.
1891
+ type: object
1892
+ required:
1893
+ - type
1894
+ - old
1895
+ - new
1896
+ properties:
1897
+ type:
1898
+ type: string
1899
+ enum: [KeysReplace]
1900
+ old:
1901
+ type: string
1902
+ title: Old value
1903
+ description: Old value to replace.
1904
+ examples:
1905
+ - " "
1906
+ - "{{ record.id }}"
1907
+ - "{{ config['id'] }}"
1908
+ - "{{ stream_slice['id'] }}"
1909
+ interpolation_context:
1910
+ - config
1911
+ - record
1912
+ - stream_state
1913
+ - stream_slice
1914
+ new:
1915
+ type: string
1916
+ title: New value
1917
+ description: New value to set.
1918
+ examples:
1919
+ - "_"
1920
+ - "{{ record.id }}"
1921
+ - "{{ config['id'] }}"
1922
+ - "{{ stream_slice['id'] }}"
1923
+ interpolation_context:
1924
+ - config
1925
+ - record
1926
+ - stream_state
1927
+ - stream_slice
1928
+ $parameters:
1929
+ type: object
1930
+ additionalProperties: true
1886
1931
  IterableDecoder:
1887
1932
  title: Iterable Decoder
1888
1933
  description: Use this if the response consists of strings separated by new lines (`\n`). The Decoder will wrap each row into a JSON object with the `record` key.
@@ -721,6 +721,23 @@ class KeysToSnakeCase(BaseModel):
721
721
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
722
722
 
723
723
 
724
+ class KeysReplace(BaseModel):
725
+ type: Literal["KeysReplace"]
726
+ old: str = Field(
727
+ ...,
728
+ description="Old value to replace.",
729
+ examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
730
+ title="Old value",
731
+ )
732
+ new: str = Field(
733
+ ...,
734
+ description="New value to set.",
735
+ examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
736
+ title="New value",
737
+ )
738
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
739
+
740
+
724
741
  class FlattenFields(BaseModel):
725
742
  type: Literal["FlattenFields"]
726
743
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -1701,6 +1718,7 @@ class DeclarativeStream(BaseModel):
1701
1718
  KeysToLower,
1702
1719
  KeysToSnakeCase,
1703
1720
  FlattenFields,
1721
+ KeysReplace,
1704
1722
  ]
1705
1723
  ]
1706
1724
  ] = Field(
@@ -1875,6 +1893,7 @@ class DynamicSchemaLoader(BaseModel):
1875
1893
  KeysToLower,
1876
1894
  KeysToSnakeCase,
1877
1895
  FlattenFields,
1896
+ KeysReplace,
1878
1897
  ]
1879
1898
  ]
1880
1899
  ] = Field(
@@ -254,6 +254,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
254
254
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
255
255
  JwtPayload as JwtPayloadModel,
256
256
  )
257
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
258
+ KeysReplace as KeysReplaceModel,
259
+ )
257
260
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
258
261
  KeysToLower as KeysToLowerModel,
259
262
  )
@@ -417,6 +420,9 @@ from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFiel
417
420
  from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
418
421
  FlattenFields,
419
422
  )
423
+ from airbyte_cdk.sources.declarative.transformations.keys_replace_transformation import (
424
+ KeysReplaceTransformation,
425
+ )
420
426
  from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
421
427
  KeysToLowerTransformation,
422
428
  )
@@ -509,6 +515,7 @@ class ModelToComponentFactory:
509
515
  GzipParserModel: self.create_gzip_parser,
510
516
  KeysToLowerModel: self.create_keys_to_lower_transformation,
511
517
  KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
518
+ KeysReplaceModel: self.create_keys_replace_transformation,
512
519
  FlattenFieldsModel: self.create_flatten_fields,
513
520
  IterableDecoderModel: self.create_iterable_decoder,
514
521
  XmlDecoderModel: self.create_xml_decoder,
@@ -630,6 +637,13 @@ class ModelToComponentFactory:
630
637
  ) -> KeysToSnakeCaseTransformation:
631
638
  return KeysToSnakeCaseTransformation()
632
639
 
640
+ def create_keys_replace_transformation(
641
+ self, model: KeysReplaceModel, config: Config, **kwargs: Any
642
+ ) -> KeysReplaceTransformation:
643
+ return KeysReplaceTransformation(
644
+ old=model.old, new=model.new, parameters=model.parameters or {}
645
+ )
646
+
633
647
  def create_flatten_fields(
634
648
  self, model: FlattenFieldsModel, config: Config, **kwargs: Any
635
649
  ) -> FlattenFields:
@@ -1560,7 +1574,12 @@ class ModelToComponentFactory:
1560
1574
  )
1561
1575
 
1562
1576
  def create_http_requester(
1563
- self, model: HttpRequesterModel, decoder: Decoder, config: Config, *, name: str
1577
+ self,
1578
+ model: HttpRequesterModel,
1579
+ config: Config,
1580
+ decoder: Decoder = JsonDecoder(parameters={}),
1581
+ *,
1582
+ name: str,
1564
1583
  ) -> HttpRequester:
1565
1584
  authenticator = (
1566
1585
  self._create_component_from_model(
@@ -1976,9 +1995,9 @@ class ModelToComponentFactory:
1976
1995
  config: Config,
1977
1996
  *,
1978
1997
  name: str,
1979
- transformations: List[RecordTransformation],
1980
- decoder: Optional[Decoder] = None,
1981
- client_side_incremental_sync: Optional[Dict[str, Any]] = None,
1998
+ transformations: List[RecordTransformation] | None = None,
1999
+ decoder: Decoder | None = None,
2000
+ client_side_incremental_sync: Dict[str, Any] | None = None,
1982
2001
  **kwargs: Any,
1983
2002
  ) -> RecordSelector:
1984
2003
  assert model.schema_normalization is not None # for mypy
@@ -2008,7 +2027,7 @@ class ModelToComponentFactory:
2008
2027
  name=name,
2009
2028
  config=config,
2010
2029
  record_filter=record_filter,
2011
- transformations=transformations,
2030
+ transformations=transformations or [],
2012
2031
  schema_normalization=schema_normalization,
2013
2032
  parameters=model.parameters or {},
2014
2033
  )
@@ -112,39 +112,27 @@ class DefaultPaginator(Paginator):
112
112
  )
113
113
  if isinstance(self.url_base, str):
114
114
  self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
115
-
116
- def get_initial_token(self) -> Optional[Any]:
117
- """
118
- Return the page token that should be used for the first request of a stream
119
-
120
- WARNING: get_initial_token() should not be used by streams that use RFR that perform checkpointing
121
- of state using page numbers. Because paginators are stateless
122
- """
123
- return self.pagination_strategy.initial_token
115
+ self._token: Optional[Any] = self.pagination_strategy.initial_token
124
116
 
125
117
  def next_page_token(
126
- self,
127
- response: requests.Response,
128
- last_page_size: int,
129
- last_record: Optional[Record],
130
- last_page_token_value: Optional[Any] = None,
118
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
131
119
  ) -> Optional[Mapping[str, Any]]:
132
- next_page_token = self.pagination_strategy.next_page_token(
133
- response=response,
134
- last_page_size=last_page_size,
135
- last_record=last_record,
136
- last_page_token_value=last_page_token_value,
120
+ self._token = self.pagination_strategy.next_page_token(
121
+ response, last_page_size, last_record
137
122
  )
138
- if next_page_token:
139
- return {"next_page_token": next_page_token}
123
+ if self._token:
124
+ return {"next_page_token": self._token}
140
125
  else:
141
126
  return None
142
127
 
143
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
144
- token = next_page_token.get("next_page_token") if next_page_token else None
145
- if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
128
+ def path(self) -> Optional[str]:
129
+ if (
130
+ self._token
131
+ and self.page_token_option
132
+ and isinstance(self.page_token_option, RequestPath)
133
+ ):
146
134
  # Replace url base to only return the path
147
- return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
135
+ return str(self._token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
148
136
  else:
149
137
  return None
150
138
 
@@ -155,7 +143,7 @@ class DefaultPaginator(Paginator):
155
143
  stream_slice: Optional[StreamSlice] = None,
156
144
  next_page_token: Optional[Mapping[str, Any]] = None,
157
145
  ) -> MutableMapping[str, Any]:
158
- return self._get_request_options(RequestOptionType.request_parameter, next_page_token)
146
+ return self._get_request_options(RequestOptionType.request_parameter)
159
147
 
160
148
  def get_request_headers(
161
149
  self,
@@ -164,7 +152,7 @@ class DefaultPaginator(Paginator):
164
152
  stream_slice: Optional[StreamSlice] = None,
165
153
  next_page_token: Optional[Mapping[str, Any]] = None,
166
154
  ) -> Mapping[str, str]:
167
- return self._get_request_options(RequestOptionType.header, next_page_token)
155
+ return self._get_request_options(RequestOptionType.header)
168
156
 
169
157
  def get_request_body_data(
170
158
  self,
@@ -173,7 +161,7 @@ class DefaultPaginator(Paginator):
173
161
  stream_slice: Optional[StreamSlice] = None,
174
162
  next_page_token: Optional[Mapping[str, Any]] = None,
175
163
  ) -> Mapping[str, Any]:
176
- return self._get_request_options(RequestOptionType.body_data, next_page_token)
164
+ return self._get_request_options(RequestOptionType.body_data)
177
165
 
178
166
  def get_request_body_json(
179
167
  self,
@@ -182,21 +170,25 @@ class DefaultPaginator(Paginator):
182
170
  stream_slice: Optional[StreamSlice] = None,
183
171
  next_page_token: Optional[Mapping[str, Any]] = None,
184
172
  ) -> Mapping[str, Any]:
185
- return self._get_request_options(RequestOptionType.body_json, next_page_token)
173
+ return self._get_request_options(RequestOptionType.body_json)
186
174
 
187
- def _get_request_options(
188
- self, option_type: RequestOptionType, next_page_token: Optional[Mapping[str, Any]]
189
- ) -> MutableMapping[str, Any]:
175
+ def reset(self, reset_value: Optional[Any] = None) -> None:
176
+ if reset_value:
177
+ self.pagination_strategy.reset(reset_value=reset_value)
178
+ else:
179
+ self.pagination_strategy.reset()
180
+ self._token = self.pagination_strategy.initial_token
181
+
182
+ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping[str, Any]:
190
183
  options = {}
191
184
 
192
- token = next_page_token.get("next_page_token") if next_page_token else None
193
185
  if (
194
186
  self.page_token_option
195
- and token is not None
187
+ and self._token is not None
196
188
  and isinstance(self.page_token_option, RequestOption)
197
189
  and self.page_token_option.inject_into == option_type
198
190
  ):
199
- options[self.page_token_option.field_name.eval(config=self.config)] = token # type: ignore # field_name is always cast to an interpolated string
191
+ options[self.page_token_option.field_name.eval(config=self.config)] = self._token # type: ignore # field_name is always cast to an interpolated string
200
192
  if (
201
193
  self.page_size_option
202
194
  and self.pagination_strategy.get_page_size()
@@ -212,9 +204,6 @@ class PaginatorTestReadDecorator(Paginator):
212
204
  """
213
205
  In some cases, we want to limit the number of requests that are made to the backend source. This class allows for limiting the number of
214
206
  pages that are queried throughout a read command.
215
-
216
- WARNING: This decorator is not currently thread-safe like the rest of the low-code framework because it has
217
- an internal state to track the current number of pages counted so that it can exit early during a test read
218
207
  """
219
208
 
220
209
  _PAGE_COUNT_BEFORE_FIRST_NEXT_CALL = 1
@@ -228,27 +217,17 @@ class PaginatorTestReadDecorator(Paginator):
228
217
  self._decorated = decorated
229
218
  self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
230
219
 
231
- def get_initial_token(self) -> Optional[Any]:
232
- self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
233
- return self._decorated.get_initial_token()
234
-
235
220
  def next_page_token(
236
- self,
237
- response: requests.Response,
238
- last_page_size: int,
239
- last_record: Optional[Record],
240
- last_page_token_value: Optional[Any] = None,
221
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
241
222
  ) -> Optional[Mapping[str, Any]]:
242
223
  if self._page_count >= self._maximum_number_of_pages:
243
224
  return None
244
225
 
245
226
  self._page_count += 1
246
- return self._decorated.next_page_token(
247
- response, last_page_size, last_record, last_page_token_value
248
- )
227
+ return self._decorated.next_page_token(response, last_page_size, last_record)
249
228
 
250
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
251
- return self._decorated.path(next_page_token)
229
+ def path(self) -> Optional[str]:
230
+ return self._decorated.path()
252
231
 
253
232
  def get_request_params(
254
233
  self,
@@ -293,3 +272,7 @@ class PaginatorTestReadDecorator(Paginator):
293
272
  return self._decorated.get_request_body_json(
294
273
  stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
295
274
  )
275
+
276
+ def reset(self, reset_value: Optional[Any] = None) -> None:
277
+ self._decorated.reset()
278
+ self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
@@ -19,7 +19,7 @@ class NoPagination(Paginator):
19
19
 
20
20
  parameters: InitVar[Mapping[str, Any]]
21
21
 
22
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
22
+ def path(self) -> Optional[str]:
23
23
  return None
24
24
 
25
25
  def get_request_params(
@@ -58,14 +58,11 @@ class NoPagination(Paginator):
58
58
  ) -> Mapping[str, Any]:
59
59
  return {}
60
60
 
61
- def get_initial_token(self) -> Optional[Any]:
62
- return None
63
-
64
61
  def next_page_token(
65
- self,
66
- response: requests.Response,
67
- last_page_size: int,
68
- last_record: Optional[Record],
69
- last_page_token_value: Optional[Any],
70
- ) -> Optional[Mapping[str, Any]]:
62
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
63
+ ) -> Mapping[str, Any]:
71
64
  return {}
65
+
66
+ def reset(self, reset_value: Optional[Any] = None) -> None:
67
+ # No state to reset
68
+ pass
@@ -24,18 +24,14 @@ class Paginator(ABC, RequestOptionsProvider):
24
24
  """
25
25
 
26
26
  @abstractmethod
27
- def get_initial_token(self) -> Optional[Any]:
27
+ def reset(self, reset_value: Optional[Any] = None) -> None:
28
28
  """
29
- Get the page token that should be included in the request to get the first page of records
29
+ Reset the pagination's inner state
30
30
  """
31
31
 
32
32
  @abstractmethod
33
33
  def next_page_token(
34
- self,
35
- response: requests.Response,
36
- last_page_size: int,
37
- last_record: Optional[Record],
38
- last_page_token_value: Optional[Any],
34
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
39
35
  ) -> Optional[Mapping[str, Any]]:
40
36
  """
41
37
  Returns the next_page_token to use to fetch the next page of records.
@@ -43,13 +39,12 @@ class Paginator(ABC, RequestOptionsProvider):
43
39
  :param response: the response to process
44
40
  :param last_page_size: the number of records read from the response
45
41
  :param last_record: the last record extracted from the response
46
- :param last_page_token_value: The current value of the page token made on the last request
47
42
  :return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
48
43
  """
49
44
  pass
50
45
 
51
46
  @abstractmethod
52
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
47
+ def path(self) -> Optional[str]:
53
48
  """
54
49
  Returns the URL path to hit to fetch the next page of records
55
50
 
@@ -43,6 +43,7 @@ class CursorPaginationStrategy(PaginationStrategy):
43
43
  )
44
44
 
45
45
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
46
+ self._initial_cursor = None
46
47
  if isinstance(self.cursor_value, str):
47
48
  self._cursor_value = InterpolatedString.create(self.cursor_value, parameters=parameters)
48
49
  else:
@@ -56,19 +57,10 @@ class CursorPaginationStrategy(PaginationStrategy):
56
57
 
57
58
  @property
58
59
  def initial_token(self) -> Optional[Any]:
59
- """
60
- CursorPaginationStrategy does not have an initial value because the next cursor is typically included
61
- in the response of the first request. For Resumable Full Refresh streams that checkpoint the page
62
- cursor, the next cursor should be read from the state or stream slice object.
63
- """
64
- return None
60
+ return self._initial_cursor
65
61
 
66
62
  def next_page_token(
67
- self,
68
- response: requests.Response,
69
- last_page_size: int,
70
- last_record: Optional[Record],
71
- last_page_token_value: Optional[Any] = None,
63
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
72
64
  ) -> Optional[Any]:
73
65
  decoded_response = next(self.decoder.decode(response))
74
66
 
@@ -95,5 +87,8 @@ class CursorPaginationStrategy(PaginationStrategy):
95
87
  )
96
88
  return token if token else None
97
89
 
90
+ def reset(self, reset_value: Optional[Any] = None) -> None:
91
+ self._initial_cursor = reset_value
92
+
98
93
  def get_page_size(self) -> Optional[int]:
99
94
  return self.page_size
@@ -52,6 +52,7 @@ class OffsetIncrement(PaginationStrategy):
52
52
  inject_on_first_request: bool = False
53
53
 
54
54
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
55
+ self._offset = 0
55
56
  page_size = str(self.page_size) if isinstance(self.page_size, int) else self.page_size
56
57
  if page_size:
57
58
  self._page_size: Optional[InterpolatedString] = InterpolatedString(
@@ -63,15 +64,11 @@ class OffsetIncrement(PaginationStrategy):
63
64
  @property
64
65
  def initial_token(self) -> Optional[Any]:
65
66
  if self.inject_on_first_request:
66
- return 0
67
+ return self._offset
67
68
  return None
68
69
 
69
70
  def next_page_token(
70
- self,
71
- response: requests.Response,
72
- last_page_size: int,
73
- last_record: Optional[Record],
74
- last_page_token_value: Optional[Any] = None,
71
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
75
72
  ) -> Optional[Any]:
76
73
  decoded_response = next(self.decoder.decode(response))
77
74
 
@@ -81,16 +78,19 @@ class OffsetIncrement(PaginationStrategy):
81
78
  and last_page_size < self._page_size.eval(self.config, response=decoded_response)
82
79
  ) or last_page_size == 0:
83
80
  return None
84
- elif last_page_token_value is None:
85
- # If the OffsetIncrement strategy does not inject on the first request, the incoming last_page_token_value
86
- # will be None. For this case, we assume that None was the first page and progress to the next offset
87
- return 0 + last_page_size
88
- elif not isinstance(last_page_token_value, int):
81
+ else:
82
+ self._offset += last_page_size
83
+ return self._offset
84
+
85
+ def reset(self, reset_value: Optional[Any] = 0) -> None:
86
+ if reset_value is None:
87
+ self._offset = 0
88
+ elif not isinstance(reset_value, int):
89
89
  raise ValueError(
90
- f"Last page token value {last_page_token_value} for OffsetIncrement pagination strategy was not an integer"
90
+ f"Reset value {reset_value} for OffsetIncrement pagination strategy was not an integer"
91
91
  )
92
92
  else:
93
- return last_page_token_value + last_page_size
93
+ self._offset = reset_value
94
94
 
95
95
  def get_page_size(self) -> Optional[int]:
96
96
  if self._page_size:
@@ -31,6 +31,7 @@ class PageIncrement(PaginationStrategy):
31
31
  inject_on_first_request: bool = False
32
32
 
33
33
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
34
+ self._page = self.start_from_page
34
35
  if isinstance(self.page_size, int) or (self.page_size is None):
35
36
  self._page_size = self.page_size
36
37
  else:
@@ -42,30 +43,28 @@ class PageIncrement(PaginationStrategy):
42
43
  @property
43
44
  def initial_token(self) -> Optional[Any]:
44
45
  if self.inject_on_first_request:
45
- return self.start_from_page
46
+ return self._page
46
47
  return None
47
48
 
48
49
  def next_page_token(
49
- self,
50
- response: requests.Response,
51
- last_page_size: int,
52
- last_record: Optional[Record],
53
- last_page_token_value: Optional[Any],
50
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
54
51
  ) -> Optional[Any]:
55
52
  # Stop paginating when there are fewer records than the page size or the current page has no records
56
53
  if (self._page_size and last_page_size < self._page_size) or last_page_size == 0:
57
54
  return None
58
- elif last_page_token_value is None:
59
- # If the PageIncrement strategy does not inject on the first request, the incoming last_page_token_value
60
- # may be None. When this is the case, we assume we've already requested the first page specified by
61
- # start_from_page and must now get the next page
62
- return self.start_from_page + 1
63
- elif not isinstance(last_page_token_value, int):
55
+ else:
56
+ self._page += 1
57
+ return self._page
58
+
59
+ def reset(self, reset_value: Optional[Any] = None) -> None:
60
+ if reset_value is None:
61
+ self._page = self.start_from_page
62
+ elif not isinstance(reset_value, int):
64
63
  raise ValueError(
65
- f"Last page token value {last_page_token_value} for PageIncrement pagination strategy was not an integer"
64
+ f"Reset value {reset_value} for PageIncrement pagination strategy was not an integer"
66
65
  )
67
66
  else:
68
- return last_page_token_value + 1
67
+ self._page = reset_value
69
68
 
70
69
  def get_page_size(self) -> Optional[int]:
71
70
  return self._page_size
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from dataclasses import dataclass
7
- from typing import Any, Mapping, Optional
7
+ from typing import Any, Optional
8
8
 
9
9
  import requests
10
10
 
@@ -26,21 +26,22 @@ class PaginationStrategy:
26
26
 
27
27
  @abstractmethod
28
28
  def next_page_token(
29
- self,
30
- response: requests.Response,
31
- last_page_size: int,
32
- last_record: Optional[Record],
33
- last_page_token_value: Optional[Any],
29
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
34
30
  ) -> Optional[Any]:
35
31
  """
36
32
  :param response: response to process
37
33
  :param last_page_size: the number of records read from the response
38
34
  :param last_record: the last record extracted from the response
39
- :param last_page_token_value: The current value of the page token made on the last request
40
35
  :return: next page token. Returns None if there are no more pages to fetch
41
36
  """
42
37
  pass
43
38
 
39
+ @abstractmethod
40
+ def reset(self, reset_value: Optional[Any] = None) -> None:
41
+ """
42
+ Reset the pagination's inner state
43
+ """
44
+
44
45
  @abstractmethod
45
46
  def get_page_size(self) -> Optional[int]:
46
47
  """
@@ -44,19 +44,19 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
44
44
  self._stop_condition = stop_condition
45
45
 
46
46
  def next_page_token(
47
- self,
48
- response: requests.Response,
49
- last_page_size: int,
50
- last_record: Optional[Record],
51
- last_page_token_value: Optional[Any] = None,
47
+ self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
52
48
  ) -> Optional[Any]:
53
- # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure
54
- # will return records in descending order. In terms of performance/memory, we return the records lazily
49
+ # We evaluate in reverse order because the assumption is that most of the APIs using data feed structure will return records in
50
+ # descending order. In terms of performance/memory, we return the records lazily
55
51
  if last_record and self._stop_condition.is_met(last_record):
56
52
  return None
57
- return self._delegate.next_page_token(
58
- response, last_page_size, last_record, last_page_token_value
59
- )
53
+ return self._delegate.next_page_token(response, last_page_size, last_record)
54
+
55
+ def reset(self, reset_value: Optional[Any] = None) -> None:
56
+ if reset_value:
57
+ self._delegate.reset(reset_value)
58
+ else:
59
+ self._delegate.reset()
60
60
 
61
61
  def get_page_size(self) -> Optional[int]:
62
62
  return self._delegate.get_page_size()
@@ -6,7 +6,18 @@ import json
6
6
  from dataclasses import InitVar, dataclass, field
7
7
  from functools import partial
8
8
  from itertools import islice
9
- from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union
9
+ from typing import (
10
+ Any,
11
+ Callable,
12
+ Iterable,
13
+ List,
14
+ Mapping,
15
+ MutableMapping,
16
+ Optional,
17
+ Set,
18
+ Tuple,
19
+ Union,
20
+ )
10
21
 
11
22
  import requests
12
23
 
@@ -79,6 +90,9 @@ class SimpleRetriever(Retriever):
79
90
 
80
91
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
81
92
  self._paginator = self.paginator or NoPagination(parameters=parameters)
93
+ self._last_response: Optional[requests.Response] = None
94
+ self._last_page_size: int = 0
95
+ self._last_record: Optional[Record] = None
82
96
  self._parameters = parameters
83
97
  self._name = (
84
98
  InterpolatedString(self._name, parameters=parameters)
@@ -86,6 +100,10 @@ class SimpleRetriever(Retriever):
86
100
  else self._name
87
101
  )
88
102
 
103
+ # This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
104
+ # records. Partitions serve as the key and map to True if they already began processing records
105
+ self._partition_started: MutableMapping[Any, bool] = dict()
106
+
89
107
  @property # type: ignore
90
108
  def name(self) -> str:
91
109
  """
@@ -233,13 +251,17 @@ class SimpleRetriever(Retriever):
233
251
  raise ValueError("Request body json cannot be a string")
234
252
  return body_json
235
253
 
236
- def _paginator_path(self, next_page_token: Optional[Mapping[str, Any]] = None) -> Optional[str]:
254
+ def _paginator_path(
255
+ self,
256
+ ) -> Optional[str]:
237
257
  """
238
258
  If the paginator points to a path, follow it, else return nothing so the requester is used.
259
+ :param stream_state:
260
+ :param stream_slice:
239
261
  :param next_page_token:
240
262
  :return:
241
263
  """
242
- return self._paginator.path(next_page_token=next_page_token)
264
+ return self._paginator.path()
243
265
 
244
266
  def _parse_response(
245
267
  self,
@@ -250,15 +272,22 @@ class SimpleRetriever(Retriever):
250
272
  next_page_token: Optional[Mapping[str, Any]] = None,
251
273
  ) -> Iterable[Record]:
252
274
  if not response:
275
+ self._last_response = None
253
276
  yield from []
254
277
  else:
255
- yield from self.record_selector.select_records(
278
+ self._last_response = response
279
+ record_generator = self.record_selector.select_records(
256
280
  response=response,
257
281
  stream_state=stream_state,
258
282
  records_schema=records_schema,
259
283
  stream_slice=stream_slice,
260
284
  next_page_token=next_page_token,
261
285
  )
286
+ self._last_page_size = 0
287
+ for record in record_generator:
288
+ self._last_page_size += 1
289
+ self._last_record = record
290
+ yield record
262
291
 
263
292
  @property # type: ignore
264
293
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -270,13 +299,7 @@ class SimpleRetriever(Retriever):
270
299
  if not isinstance(value, property):
271
300
  self._primary_key = value
272
301
 
273
- def _next_page_token(
274
- self,
275
- response: requests.Response,
276
- last_page_size: int,
277
- last_record: Optional[Record],
278
- last_page_token_value: Optional[Any],
279
- ) -> Optional[Mapping[str, Any]]:
302
+ def _next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
280
303
  """
281
304
  Specifies a pagination strategy.
282
305
 
@@ -284,12 +307,7 @@ class SimpleRetriever(Retriever):
284
307
 
285
308
  :return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
286
309
  """
287
- return self._paginator.next_page_token(
288
- response=response,
289
- last_page_size=last_page_size,
290
- last_record=last_record,
291
- last_page_token_value=last_page_token_value,
292
- )
310
+ return self._paginator.next_page_token(response, self._last_page_size, self._last_record)
293
311
 
294
312
  def _fetch_next_page(
295
313
  self,
@@ -298,7 +316,7 @@ class SimpleRetriever(Retriever):
298
316
  next_page_token: Optional[Mapping[str, Any]] = None,
299
317
  ) -> Optional[requests.Response]:
300
318
  return self.requester.send_request(
301
- path=self._paginator_path(next_page_token=next_page_token),
319
+ path=self._paginator_path(),
302
320
  stream_state=stream_state,
303
321
  stream_slice=stream_slice,
304
322
  next_page_token=next_page_token,
@@ -327,37 +345,20 @@ class SimpleRetriever(Retriever):
327
345
  # This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
328
346
  def _read_pages(
329
347
  self,
330
- records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
348
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
331
349
  stream_state: Mapping[str, Any],
332
350
  stream_slice: StreamSlice,
333
- ) -> Iterable[Record]:
351
+ ) -> Iterable[StreamData]:
334
352
  pagination_complete = False
335
- initial_token = self._paginator.get_initial_token()
336
- next_page_token: Optional[Mapping[str, Any]] = (
337
- {"next_page_token": initial_token} if initial_token else None
338
- )
353
+ next_page_token = None
339
354
  while not pagination_complete:
340
355
  response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
341
-
342
- last_page_size = 0
343
- last_record: Optional[Record] = None
344
- for record in records_generator_fn(response):
345
- last_page_size += 1
346
- last_record = record
347
- yield record
356
+ yield from records_generator_fn(response)
348
357
 
349
358
  if not response:
350
359
  pagination_complete = True
351
360
  else:
352
- last_page_token_value = (
353
- next_page_token.get("next_page_token") if next_page_token else None
354
- )
355
- next_page_token = self._next_page_token(
356
- response=response,
357
- last_page_size=last_page_size,
358
- last_record=last_record,
359
- last_page_token_value=last_page_token_value,
360
- )
361
+ next_page_token = self._next_page_token(response)
361
362
  if not next_page_token:
362
363
  pagination_complete = True
363
364
 
@@ -366,38 +367,19 @@ class SimpleRetriever(Retriever):
366
367
 
367
368
  def _read_single_page(
368
369
  self,
369
- records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
370
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
370
371
  stream_state: Mapping[str, Any],
371
372
  stream_slice: StreamSlice,
372
373
  ) -> Iterable[StreamData]:
373
- initial_token = stream_state.get("next_page_token")
374
- if initial_token is None:
375
- initial_token = self._paginator.get_initial_token()
376
- next_page_token: Optional[Mapping[str, Any]] = (
377
- {"next_page_token": initial_token} if initial_token else None
378
- )
379
-
380
- response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
381
-
382
- last_page_size = 0
383
- last_record: Optional[Record] = None
384
- for record in records_generator_fn(response):
385
- last_page_size += 1
386
- last_record = record
387
- yield record
374
+ response = self._fetch_next_page(stream_state, stream_slice)
375
+ yield from records_generator_fn(response)
388
376
 
389
377
  if not response:
390
- next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
378
+ next_page_token: Mapping[str, Any] = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
391
379
  else:
392
- last_page_token_value = (
393
- next_page_token.get("next_page_token") if next_page_token else None
394
- )
395
- next_page_token = self._next_page_token(
396
- response=response,
397
- last_page_size=last_page_size,
398
- last_record=last_record,
399
- last_page_token_value=last_page_token_value,
400
- ) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
380
+ next_page_token = self._next_page_token(response) or {
381
+ FULL_REFRESH_SYNC_COMPLETE_KEY: True
382
+ }
401
383
 
402
384
  if self.cursor:
403
385
  self.cursor.close_slice(
@@ -432,14 +414,25 @@ class SimpleRetriever(Retriever):
432
414
  if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
433
415
  stream_state = self.state
434
416
 
435
- # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
436
- # fetch more records. The platform deletes stream state for full refresh streams before starting a
437
- # new job, so we don't need to worry about this value existing for the initial attempt
417
+ # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to fetch more records
418
+ # The platform deletes stream state for full refresh streams before starting a new job, so we don't need to worry about
419
+ # this value existing for the initial attempt
438
420
  if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
439
421
  return
422
+ cursor_value = stream_state.get("next_page_token")
423
+
424
+ # The first attempt to read a page for the current partition should reset the paginator to the current
425
+ # cursor state which is initially assigned to the incoming state from the platform
426
+ partition_key = self._to_partition_key(_slice.partition)
427
+ if partition_key not in self._partition_started:
428
+ self._partition_started[partition_key] = True
429
+ self._paginator.reset(reset_value=cursor_value)
440
430
 
441
431
  yield from self._read_single_page(record_generator, stream_state, _slice)
442
432
  else:
433
+ # Fixing paginator types has a long tail of dependencies
434
+ self._paginator.reset()
435
+
443
436
  for stream_data in self._read_pages(record_generator, self.state, _slice):
444
437
  current_record = self._extract_record(stream_data, _slice)
445
438
  if self.cursor and current_record:
@@ -525,7 +518,7 @@ class SimpleRetriever(Retriever):
525
518
  stream_state: Mapping[str, Any],
526
519
  records_schema: Mapping[str, Any],
527
520
  stream_slice: Optional[StreamSlice],
528
- ) -> Iterable[Record]:
521
+ ) -> Iterable[StreamData]:
529
522
  yield from self._parse_response(
530
523
  response,
531
524
  stream_slice=stream_slice,
@@ -569,7 +562,7 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
569
562
  next_page_token: Optional[Mapping[str, Any]] = None,
570
563
  ) -> Optional[requests.Response]:
571
564
  return self.requester.send_request(
572
- path=self._paginator_path(next_page_token=next_page_token),
565
+ path=self._paginator_path(),
573
566
  stream_state=stream_state,
574
567
  stream_slice=stream_slice,
575
568
  next_page_token=next_page_token,
@@ -16,7 +16,7 @@ class DeclarativePartitionFactory:
16
16
  self,
17
17
  stream_name: str,
18
18
  json_schema: Mapping[str, Any],
19
- retriever: Retriever,
19
+ retriever_factory: Callable[[], Retriever],
20
20
  message_repository: MessageRepository,
21
21
  ) -> None:
22
22
  """
@@ -26,14 +26,14 @@ class DeclarativePartitionFactory:
26
26
  """
27
27
  self._stream_name = stream_name
28
28
  self._json_schema = json_schema
29
- self._retriever = retriever
29
+ self._retriever_factory = retriever_factory
30
30
  self._message_repository = message_repository
31
31
 
32
32
  def create(self, stream_slice: StreamSlice) -> Partition:
33
33
  return DeclarativePartition(
34
34
  self._stream_name,
35
35
  self._json_schema,
36
- self._retriever,
36
+ self._retriever_factory(),
37
37
  self._message_repository,
38
38
  stream_slice,
39
39
  )
@@ -0,0 +1,61 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from dataclasses import InitVar, dataclass
6
+ from typing import Any, Dict, Mapping, Optional
7
+
8
+ from airbyte_cdk import InterpolatedString
9
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
10
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
11
+
12
+
13
+ @dataclass
14
+ class KeysReplaceTransformation(RecordTransformation):
15
+ """
16
+ Transformation that applies keys names replacement.
17
+
18
+ Example usage:
19
+ - type: KeysReplace
20
+ old: " "
21
+ new: "_"
22
+ Result:
23
+ from: {"created time": ..., "customer id": ..., "user id": ...}
24
+ to: {"created_time": ..., "customer_id": ..., "user_id": ...}
25
+ """
26
+
27
+ old: str
28
+ new: str
29
+ parameters: InitVar[Mapping[str, Any]]
30
+
31
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
32
+ self._old = InterpolatedString.create(self.old, parameters=parameters)
33
+ self._new = InterpolatedString.create(self.new, parameters=parameters)
34
+
35
+ def transform(
36
+ self,
37
+ record: Dict[str, Any],
38
+ config: Optional[Config] = None,
39
+ stream_state: Optional[StreamState] = None,
40
+ stream_slice: Optional[StreamSlice] = None,
41
+ ) -> None:
42
+ if config is None:
43
+ config = {}
44
+
45
+ kwargs = {"record": record, "stream_state": stream_state, "stream_slice": stream_slice}
46
+ old_key = str(self._old.eval(config, **kwargs))
47
+ new_key = str(self._new.eval(config, **kwargs))
48
+
49
+ def _transform(data: Dict[str, Any]) -> Dict[str, Any]:
50
+ result = {}
51
+ for key, value in data.items():
52
+ updated_key = key.replace(old_key, new_key)
53
+ if isinstance(value, dict):
54
+ result[updated_key] = _transform(value)
55
+ else:
56
+ result[updated_key] = value
57
+ return result
58
+
59
+ transformed_record = _transform(record)
60
+ record.clear()
61
+ record.update(transformed_record)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.14.0.dev1
3
+ Version: 6.15.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=v61HsAm_TmkhxbvOQS7Qvo4sNou-n9GtUT8thams6i0,22480
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=w1WkfTgZN8znoC1X2XQCv6RhHdTxAO9O0pmFzQRL52k,130843
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=W8H8rYMEJihZBY3VgGUo-lo4OfCze9Rli2NorehDr38,131973
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
@@ -105,12 +105,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
105
105
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
106
106
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
107
107
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
108
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=1EdKXjEKoXVs7BuF4H9mBMSbVsDIy5uAv1Txboict7Q,91733
108
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=U64qHqBut90L29EuUJ2_4OdY6eCMZIL2MH4DqGYhifQ,92340
109
109
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
110
110
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
112
112
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
113
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=NYTa2KqBHq-JU2oKp0J79WADw8JgR2lgQS_unTB0bl4,107945
113
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=qoVwCMDpAxBSxG0vbXkjvw_nd_qW5wAeb1zYzzBUSj8,108622
114
114
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
115
115
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
116
116
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -135,15 +135,15 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.p
135
135
  airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=o0520AmHMb7SAoeokVNwoOzuZzIAT6ryx9uFYGSOrs0,8664
136
136
  airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=RqYPkgJFAWfcZBTc-JBcGHPm4JL1ZQOhs9GKU4MP2eE,14723
137
137
  airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
138
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=FnSl3qPvv5wD6ieAI2Ic5c4dqBk-3fRe4tCaWzq3YwM,11840
139
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=j6j9QRPaTbKQ2N661RFVKthhkWiodEp6ut0tKeEd0Ng,2019
140
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=OlN-y0PEOMzlUNUh3pzonoTpIJpGwkP4ibFengvpLVU,2230
138
+ airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=LxTq1hieznRWlYlfODdZbMDUml-g6NyBkdwVI2mCNMM,10910
139
+ airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=-P-QOlefFhEe99bsB2y3yTvA8c8kCCbfBaTS6qPvF6I,1927
140
+ airbyte_cdk/sources/declarative/requesters/paginators/paginator.py,sha256=ZgyvH7DOrASQ5K__J5SRAXH3REUW2n3yPHnFW9xq4NU,1972
141
141
  airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py,sha256=2gly8fuZpDNwtu1Qg6oE2jBLGqQRdzSLJdnpk_iDV6I,767
142
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=yLzzK5YIRTkXd2Z-BS__AZXuTd6HXjJIxq05K-lQoxI,3898
143
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=WvGt_DTFcAgTR-NHrlrR7B71yG-L6jmfW-Gwm9iYzjY,3624
144
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=Z2i6a-oKMmOTxHxsTVSnyaShkJ3u8xZw1xIJdx2yxss,2731
145
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=UiHQI2lsRDPqM4nMvKMnmsXA3gFg5BFE4lCPEBhuCTs,1317
146
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=LoKXdUbSgHEtSwtA8DFrnX6SpQbRVVwreY8NguTKTcI,2229
142
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py,sha256=vFzpNv8BdgXrYO5qhi2_Un4x4y-EAQWxinZtEPWz5KI,3654
143
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py,sha256=TKG4Mp1t8MfmFJDeHtXmxCp_ibRK03J5O04N5HVtBvE,3430
144
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py,sha256=kQGpfr-dOwarxTIf2S4sHVulBzm8zSwQXBM7rOhkafA,2491
145
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py,sha256=ABpO4t0UUziBZnyml8UT_NhlF6loekhQji57TpKnaiY,1290
146
+ airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py,sha256=2b005ulACvHgIL8ktTWwposu4umowyu0iGV2mGOb_Tg,2290
147
147
  airbyte_cdk/sources/declarative/requesters/request_option.py,sha256=_qmv8CLQQ3fERt6BuMZeRu6tZXscPoeARx1VJdWMQ_M,1055
148
148
  airbyte_cdk/sources/declarative/requesters/request_options/__init__.py,sha256=WCwpKqM4wKqy-DHJaCHbKAlFqRVOqMi9K5qonxIfi_Y,809
149
149
  airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py,sha256=FLkg0uzC9bc-zFnALWr0FLYpKsz8iK2xQsd4UOyeW08,3706
@@ -161,7 +161,7 @@ airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=Aio
161
161
  airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
162
162
  airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=_-d3MvHh-4r46i4wjQikD4ZygKA7TvuDu2i04qqULEg,3731
163
163
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
164
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=jxQ_9xcVD07r9PKhofitAqMkdX1k8ZNyy50qz5NwkFs,24540
164
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
165
165
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=HztgVVaZdil5UfgUZcv_Hyy84r89_EKRwyO2hoewNVg,749
166
166
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
167
167
  airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=H6A3NQ6kPPM-cUNPmdvDPc9xNzR1rQNrK95GbgCW334,8822
@@ -171,11 +171,12 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
171
171
  airbyte_cdk/sources/declarative/spec/__init__.py,sha256=H0UwoRhgucbKBIzg85AXrifybVmfpwWpPdy22vZKVuo,141
172
172
  airbyte_cdk/sources/declarative/spec/spec.py,sha256=ODSNUgkDOhnLQnwLjgSaME6R3kNeywjROvbNrWEnsgU,1876
173
173
  airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=sI9vhc95RwJYOnA0VKjcbtKgFcmAbWjhdWBXFbAijOs,176
174
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=ldmfzOhkA8yMPQKDOHO-bO8zUYJ0oVAs8BIZ-O57exk,3415
174
+ airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=E7feZ5xkHwFHODq8FSjwdGe291RZoCMCRHT1rWnQ1lI,3463
175
175
  airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
176
176
  airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
177
177
  airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=r4YdAuAk2bQtNWJMztIIy2CC-NglD9NeK1s1TeO9wkw,5027
178
178
  airbyte_cdk/sources/declarative/transformations/flatten_fields.py,sha256=ti9fLVk-EpMeDY7ImduvQq1YGounLYmH9dHzp7MIRxk,1703
179
+ airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py,sha256=vbIn6ump-Ut6g20yMub7PFoPBhOKVtrHSAUdcOUdLfw,1999
179
180
  airbyte_cdk/sources/declarative/transformations/keys_to_lower_transformation.py,sha256=RTs5KX4V3hM7A6QN1WlGF21YccTIyNH6qQI9IMb__hw,670
180
181
  airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,sha256=43zwe6_F5ba5C4eY0RgXxPz7ndPKZfXGChHepFn-2lk,2263
181
182
  airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
@@ -340,8 +341,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
340
341
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
341
342
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
342
343
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
343
- airbyte_cdk-6.14.0.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
344
- airbyte_cdk-6.14.0.dev1.dist-info/METADATA,sha256=IE3J33y4yRYF6vsHR7l-BFHI4_K1LVq5S431ivv5Sos,5993
345
- airbyte_cdk-6.14.0.dev1.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
346
- airbyte_cdk-6.14.0.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
347
- airbyte_cdk-6.14.0.dev1.dist-info/RECORD,,
344
+ airbyte_cdk-6.15.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
345
+ airbyte_cdk-6.15.1.dist-info/METADATA,sha256=PZrqiNEmT_X9GWDFF9GMQZzyedYwkoKJ3JcjAGIYnFg,5988
346
+ airbyte_cdk-6.15.1.dist-info/WHEEL,sha256=RaoafKOydTQ7I_I3JTrPCg6kUmTgtm4BornzOqyEfJ8,88
347
+ airbyte_cdk-6.15.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
348
+ airbyte_cdk-6.15.1.dist-info/RECORD,,