airbyte-cdk 6.12.4.dev0__py3-none-any.whl → 6.13.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. airbyte_cdk/__init__.py +93 -34
  2. airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
  3. airbyte_cdk/models/__init__.py +10 -11
  4. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
  5. airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
  6. airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +25 -65
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +78 -1
  9. airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
  10. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
  11. airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +10 -6
  14. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -14
  15. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +49 -2
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +96 -80
  17. airbyte_cdk/sources/declarative/partition_routers/__init__.py +23 -5
  18. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
  19. airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
  20. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
  21. airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
  22. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +52 -35
  23. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +10 -7
  24. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +9 -4
  25. airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
  26. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +11 -6
  27. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +16 -5
  28. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +14 -13
  29. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +7 -8
  30. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +10 -7
  31. airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
  32. airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
  33. airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
  34. airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
  35. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +9 -32
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +72 -65
  37. airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
  38. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
  39. airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
  40. airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
  41. airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
  42. airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
  43. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
  44. airbyte_cdk/sources/message/__init__.py +7 -1
  45. airbyte_cdk/sources/streams/__init__.py +1 -1
  46. airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
  47. airbyte_cdk/sources/streams/concurrent/cursor.py +0 -1
  48. airbyte_cdk/sources/streams/http/__init__.py +2 -2
  49. airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
  50. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +8 -3
  51. airbyte_cdk/test/mock_http/__init__.py +1 -1
  52. airbyte_cdk/test/mock_http/mocker.py +3 -1
  53. airbyte_cdk/test/mock_http/response_builder.py +1 -1
  54. airbyte_cdk/utils/__init__.py +1 -1
  55. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/METADATA +2 -2
  56. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/RECORD +59 -58
  57. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -344
  58. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/LICENSE.txt +0 -0
  59. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/WHEEL +0 -0
  60. {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -2,17 +2,40 @@
2
2
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.resolvers.components_resolver import ComponentsResolver, ComponentMappingDefinition, ResolvedComponentMappingDefinition
6
- from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import HttpComponentsResolver
7
- from airbyte_cdk.sources.declarative.resolvers.config_components_resolver import ConfigComponentsResolver, StreamConfig
8
- from airbyte_cdk.sources.declarative.models import HttpComponentsResolver as HttpComponentsResolverModel
9
- from airbyte_cdk.sources.declarative.models import ConfigComponentsResolver as ConfigComponentsResolverModel
10
- from pydantic.v1 import BaseModel
11
5
  from typing import Mapping
12
6
 
7
+ from pydantic.v1 import BaseModel
8
+
9
+ from airbyte_cdk.sources.declarative.models import (
10
+ ConfigComponentsResolver as ConfigComponentsResolverModel,
11
+ )
12
+ from airbyte_cdk.sources.declarative.models import (
13
+ HttpComponentsResolver as HttpComponentsResolverModel,
14
+ )
15
+ from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
16
+ ComponentMappingDefinition,
17
+ ComponentsResolver,
18
+ ResolvedComponentMappingDefinition,
19
+ )
20
+ from airbyte_cdk.sources.declarative.resolvers.config_components_resolver import (
21
+ ConfigComponentsResolver,
22
+ StreamConfig,
23
+ )
24
+ from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import (
25
+ HttpComponentsResolver,
26
+ )
27
+
13
28
  COMPONENTS_RESOLVER_TYPE_MAPPING: Mapping[str, type[BaseModel]] = {
14
29
  "HttpComponentsResolver": HttpComponentsResolverModel,
15
- "ConfigComponentsResolver": ConfigComponentsResolverModel
30
+ "ConfigComponentsResolver": ConfigComponentsResolverModel,
16
31
  }
17
32
 
18
- __all__ = ["ComponentsResolver", "HttpComponentsResolver", "ComponentMappingDefinition", "ResolvedComponentMappingDefinition", "StreamConfig", "ConfigComponentsResolver", "COMPONENTS_RESOLVER_TYPE_MAPPING"]
33
+ __all__ = [
34
+ "ComponentsResolver",
35
+ "HttpComponentsResolver",
36
+ "ComponentMappingDefinition",
37
+ "ResolvedComponentMappingDefinition",
38
+ "StreamConfig",
39
+ "ConfigComponentsResolver",
40
+ "COMPONENTS_RESOLVER_TYPE_MAPPING",
41
+ ]
@@ -88,19 +88,25 @@ class HttpComponentsResolver(ComponentsResolver):
88
88
  """
89
89
  kwargs = {"stream_template_config": stream_template_config}
90
90
 
91
- for components_values in self.retriever.read_records({}):
92
- updated_config = deepcopy(stream_template_config)
93
- kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
94
-
95
- for resolved_component in self._resolved_components:
96
- valid_types = (
97
- (resolved_component.value_type,) if resolved_component.value_type else None
98
- )
99
- value = resolved_component.value.eval(
100
- self.config, valid_types=valid_types, **kwargs
101
- )
91
+ for stream_slice in self.retriever.stream_slices():
92
+ for components_values in self.retriever.read_records(
93
+ records_schema={}, stream_slice=stream_slice
94
+ ):
95
+ updated_config = deepcopy(stream_template_config)
96
+ kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
97
+ kwargs["stream_slice"] = stream_slice # type: ignore[assignment] # stream_slice will always be of type Mapping[str, Any]
98
+
99
+ for resolved_component in self._resolved_components:
100
+ valid_types = (
101
+ (resolved_component.value_type,) if resolved_component.value_type else None
102
+ )
103
+ value = resolved_component.value.eval(
104
+ self.config, valid_types=valid_types, **kwargs
105
+ )
102
106
 
103
- path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
104
- dpath.set(updated_config, path, value)
107
+ path = [
108
+ path.eval(self.config, **kwargs) for path in resolved_component.field_path
109
+ ]
110
+ dpath.set(updated_config, path, value)
105
111
 
106
- yield updated_config
112
+ yield updated_config
@@ -2,8 +2,11 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
6
- from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever, SimpleRetrieverTestReadDecorator
7
5
  from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
6
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
7
+ from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
8
+ SimpleRetriever,
9
+ SimpleRetrieverTestReadDecorator,
10
+ )
8
11
 
9
12
  __all__ = ["Retriever", "SimpleRetriever", "SimpleRetrieverTestReadDecorator", "AsyncRetriever"]
@@ -1,8 +1,8 @@
1
1
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
2
 
3
3
 
4
- from dataclasses import InitVar, dataclass, field
5
- from typing import Any, Callable, Iterable, Mapping, Optional
4
+ from dataclasses import InitVar, dataclass
5
+ from typing import Any, Iterable, Mapping, Optional
6
6
 
7
7
  from typing_extensions import deprecated
8
8
 
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
12
12
  AsyncPartition,
13
13
  )
14
14
  from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
15
- from airbyte_cdk.sources.declarative.partition_routers import SinglePartitionRouter
16
- from airbyte_cdk.sources.declarative.retrievers import Retriever
17
- from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
15
+ from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
16
+ AsyncJobPartitionRouter,
17
+ )
18
+ from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
18
19
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
20
  from airbyte_cdk.sources.streams.core import StreamData
20
21
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
29
30
  class AsyncRetriever(Retriever):
30
31
  config: Config
31
32
  parameters: InitVar[Mapping[str, Any]]
32
- job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
33
33
  record_selector: RecordSelector
34
- stream_slicer: StreamSlicer = field(
35
- default_factory=lambda: SinglePartitionRouter(parameters={})
36
- )
34
+ stream_slicer: AsyncJobPartitionRouter
37
35
 
38
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
39
- self._job_orchestrator_factory = self.job_orchestrator_factory
40
- self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
41
37
  self._parameters = parameters
42
38
 
43
39
  @property
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
54
50
  """
55
51
  pass
56
52
 
57
- @property
58
- def _job_orchestrator(self) -> AsyncJobOrchestrator:
59
- if not self.__job_orchestrator:
60
- raise AirbyteTracedException(
61
- message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
62
- internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
63
- failure_type=FailureType.system_error,
64
- )
65
-
66
- return self.__job_orchestrator
67
-
68
53
  def _get_stream_state(self) -> StreamState:
69
54
  """
70
55
  Gets the current state of the stream.
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
99
84
  return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
100
85
 
101
86
  def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
102
- slices = self.stream_slicer.stream_slices()
103
- self.__job_orchestrator = self._job_orchestrator_factory(slices)
104
-
105
- for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
106
- yield StreamSlice(
107
- partition=dict(completed_partition.stream_slice.partition)
108
- | {"partition": completed_partition},
109
- cursor_slice=completed_partition.stream_slice.cursor_slice,
110
- )
87
+ return self.stream_slicer.stream_slices()
111
88
 
112
89
  def read_records(
113
90
  self,
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
116
93
  ) -> Iterable[StreamData]:
117
94
  stream_state: StreamState = self._get_stream_state()
118
95
  partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
119
- records: Iterable[Mapping[str, Any]] = self._job_orchestrator.fetch_records(partition)
96
+ records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
120
97
 
121
98
  yield from self.record_selector.filter_and_transform(
122
99
  all_data=records,
@@ -6,18 +6,7 @@ import json
6
6
  from dataclasses import InitVar, dataclass, field
7
7
  from functools import partial
8
8
  from itertools import islice
9
- from typing import (
10
- Any,
11
- Callable,
12
- Iterable,
13
- List,
14
- Mapping,
15
- MutableMapping,
16
- Optional,
17
- Set,
18
- Tuple,
19
- Union,
20
- )
9
+ from typing import Any, Callable, Iterable, List, Mapping, Optional, Set, Tuple, Union
21
10
 
22
11
  import requests
23
12
 
@@ -90,9 +79,6 @@ class SimpleRetriever(Retriever):
90
79
 
91
80
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
92
81
  self._paginator = self.paginator or NoPagination(parameters=parameters)
93
- self._last_response: Optional[requests.Response] = None
94
- self._last_page_size: int = 0
95
- self._last_record: Optional[Record] = None
96
82
  self._parameters = parameters
97
83
  self._name = (
98
84
  InterpolatedString(self._name, parameters=parameters)
@@ -100,10 +86,6 @@ class SimpleRetriever(Retriever):
100
86
  else self._name
101
87
  )
102
88
 
103
- # This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
104
- # records. Partitions serve as the key and map to True if they already began processing records
105
- self._partition_started: MutableMapping[Any, bool] = dict()
106
-
107
89
  @property # type: ignore
108
90
  def name(self) -> str:
109
91
  """
@@ -178,7 +160,7 @@ class SimpleRetriever(Retriever):
178
160
  stream_slice,
179
161
  next_page_token,
180
162
  self._paginator.get_request_headers,
181
- self.request_option_provider.get_request_headers,
163
+ self.stream_slicer.get_request_headers,
182
164
  )
183
165
  if isinstance(headers, str):
184
166
  raise ValueError("Request headers cannot be a string")
@@ -251,17 +233,13 @@ class SimpleRetriever(Retriever):
251
233
  raise ValueError("Request body json cannot be a string")
252
234
  return body_json
253
235
 
254
- def _paginator_path(
255
- self,
256
- ) -> Optional[str]:
236
+ def _paginator_path(self, next_page_token: Optional[Mapping[str, Any]] = None) -> Optional[str]:
257
237
  """
258
238
  If the paginator points to a path, follow it, else return nothing so the requester is used.
259
- :param stream_state:
260
- :param stream_slice:
261
239
  :param next_page_token:
262
240
  :return:
263
241
  """
264
- return self._paginator.path()
242
+ return self._paginator.path(next_page_token=next_page_token)
265
243
 
266
244
  def _parse_response(
267
245
  self,
@@ -272,22 +250,15 @@ class SimpleRetriever(Retriever):
272
250
  next_page_token: Optional[Mapping[str, Any]] = None,
273
251
  ) -> Iterable[Record]:
274
252
  if not response:
275
- self._last_response = None
276
253
  yield from []
277
254
  else:
278
- self._last_response = response
279
- record_generator = self.record_selector.select_records(
255
+ yield from self.record_selector.select_records(
280
256
  response=response,
281
257
  stream_state=stream_state,
282
258
  records_schema=records_schema,
283
259
  stream_slice=stream_slice,
284
260
  next_page_token=next_page_token,
285
261
  )
286
- self._last_page_size = 0
287
- for record in record_generator:
288
- self._last_page_size += 1
289
- self._last_record = record
290
- yield record
291
262
 
292
263
  @property # type: ignore
293
264
  def primary_key(self) -> Optional[Union[str, List[str], List[List[str]]]]:
@@ -299,7 +270,13 @@ class SimpleRetriever(Retriever):
299
270
  if not isinstance(value, property):
300
271
  self._primary_key = value
301
272
 
302
- def _next_page_token(self, response: requests.Response) -> Optional[Mapping[str, Any]]:
273
+ def _next_page_token(
274
+ self,
275
+ response: requests.Response,
276
+ last_page_size: int,
277
+ last_record: Optional[Record],
278
+ last_page_token_value: Optional[Any],
279
+ ) -> Optional[Mapping[str, Any]]:
303
280
  """
304
281
  Specifies a pagination strategy.
305
282
 
@@ -307,7 +284,12 @@ class SimpleRetriever(Retriever):
307
284
 
308
285
  :return: The token for the next page from the input response object. Returning None means there are no more pages to read in this response.
309
286
  """
310
- return self._paginator.next_page_token(response, self._last_page_size, self._last_record)
287
+ return self._paginator.next_page_token(
288
+ response=response,
289
+ last_page_size=last_page_size,
290
+ last_record=last_record,
291
+ last_page_token_value=last_page_token_value,
292
+ )
311
293
 
312
294
  def _fetch_next_page(
313
295
  self,
@@ -316,7 +298,7 @@ class SimpleRetriever(Retriever):
316
298
  next_page_token: Optional[Mapping[str, Any]] = None,
317
299
  ) -> Optional[requests.Response]:
318
300
  return self.requester.send_request(
319
- path=self._paginator_path(),
301
+ path=self._paginator_path(next_page_token=next_page_token),
320
302
  stream_state=stream_state,
321
303
  stream_slice=stream_slice,
322
304
  next_page_token=next_page_token,
@@ -345,20 +327,37 @@ class SimpleRetriever(Retriever):
345
327
  # This logic is similar to _read_pages in the HttpStream class. When making changes here, consider making changes there as well.
346
328
  def _read_pages(
347
329
  self,
348
- records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
330
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
349
331
  stream_state: Mapping[str, Any],
350
332
  stream_slice: StreamSlice,
351
- ) -> Iterable[StreamData]:
333
+ ) -> Iterable[Record]:
352
334
  pagination_complete = False
353
- next_page_token = None
335
+ initial_token = self._paginator.get_initial_token()
336
+ next_page_token: Optional[Mapping[str, Any]] = (
337
+ {"next_page_token": initial_token} if initial_token else None
338
+ )
354
339
  while not pagination_complete:
355
340
  response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
356
- yield from records_generator_fn(response)
341
+
342
+ last_page_size = 0
343
+ last_record: Optional[Record] = None
344
+ for record in records_generator_fn(response):
345
+ last_page_size += 1
346
+ last_record = record
347
+ yield record
357
348
 
358
349
  if not response:
359
350
  pagination_complete = True
360
351
  else:
361
- next_page_token = self._next_page_token(response)
352
+ last_page_token_value = (
353
+ next_page_token.get("next_page_token") if next_page_token else None
354
+ )
355
+ next_page_token = self._next_page_token(
356
+ response=response,
357
+ last_page_size=last_page_size,
358
+ last_record=last_record,
359
+ last_page_token_value=last_page_token_value,
360
+ )
362
361
  if not next_page_token:
363
362
  pagination_complete = True
364
363
 
@@ -367,19 +366,38 @@ class SimpleRetriever(Retriever):
367
366
 
368
367
  def _read_single_page(
369
368
  self,
370
- records_generator_fn: Callable[[Optional[requests.Response]], Iterable[StreamData]],
369
+ records_generator_fn: Callable[[Optional[requests.Response]], Iterable[Record]],
371
370
  stream_state: Mapping[str, Any],
372
371
  stream_slice: StreamSlice,
373
372
  ) -> Iterable[StreamData]:
374
- response = self._fetch_next_page(stream_state, stream_slice)
375
- yield from records_generator_fn(response)
373
+ initial_token = stream_state.get("next_page_token")
374
+ if initial_token is None:
375
+ initial_token = self._paginator.get_initial_token()
376
+ next_page_token: Optional[Mapping[str, Any]] = (
377
+ {"next_page_token": initial_token} if initial_token else None
378
+ )
379
+
380
+ response = self._fetch_next_page(stream_state, stream_slice, next_page_token)
381
+
382
+ last_page_size = 0
383
+ last_record: Optional[Record] = None
384
+ for record in records_generator_fn(response):
385
+ last_page_size += 1
386
+ last_record = record
387
+ yield record
376
388
 
377
389
  if not response:
378
- next_page_token: Mapping[str, Any] = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
390
+ next_page_token = {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
379
391
  else:
380
- next_page_token = self._next_page_token(response) or {
381
- FULL_REFRESH_SYNC_COMPLETE_KEY: True
382
- }
392
+ last_page_token_value = (
393
+ next_page_token.get("next_page_token") if next_page_token else None
394
+ )
395
+ next_page_token = self._next_page_token(
396
+ response=response,
397
+ last_page_size=last_page_size,
398
+ last_record=last_record,
399
+ last_page_token_value=last_page_token_value,
400
+ ) or {FULL_REFRESH_SYNC_COMPLETE_KEY: True}
383
401
 
384
402
  if self.cursor:
385
403
  self.cursor.close_slice(
@@ -414,25 +432,14 @@ class SimpleRetriever(Retriever):
414
432
  if self.cursor and isinstance(self.cursor, ResumableFullRefreshCursor):
415
433
  stream_state = self.state
416
434
 
417
- # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to fetch more records
418
- # The platform deletes stream state for full refresh streams before starting a new job, so we don't need to worry about
419
- # this value existing for the initial attempt
435
+ # Before syncing the RFR stream, we check if the job's prior attempt was successful and don't need to
436
+ # fetch more records. The platform deletes stream state for full refresh streams before starting a
437
+ # new job, so we don't need to worry about this value existing for the initial attempt
420
438
  if stream_state.get(FULL_REFRESH_SYNC_COMPLETE_KEY):
421
439
  return
422
- cursor_value = stream_state.get("next_page_token")
423
-
424
- # The first attempt to read a page for the current partition should reset the paginator to the current
425
- # cursor state which is initially assigned to the incoming state from the platform
426
- partition_key = self._to_partition_key(_slice.partition)
427
- if partition_key not in self._partition_started:
428
- self._partition_started[partition_key] = True
429
- self._paginator.reset(reset_value=cursor_value)
430
440
 
431
441
  yield from self._read_single_page(record_generator, stream_state, _slice)
432
442
  else:
433
- # Fixing paginator types has a long tail of dependencies
434
- self._paginator.reset()
435
-
436
443
  for stream_data in self._read_pages(record_generator, self.state, _slice):
437
444
  current_record = self._extract_record(stream_data, _slice)
438
445
  if self.cursor and current_record:
@@ -518,7 +525,7 @@ class SimpleRetriever(Retriever):
518
525
  stream_state: Mapping[str, Any],
519
526
  records_schema: Mapping[str, Any],
520
527
  stream_slice: Optional[StreamSlice],
521
- ) -> Iterable[StreamData]:
528
+ ) -> Iterable[Record]:
522
529
  yield from self._parse_response(
523
530
  response,
524
531
  stream_slice=stream_slice,
@@ -562,7 +569,7 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
562
569
  next_page_token: Optional[Mapping[str, Any]] = None,
563
570
  ) -> Optional[requests.Response]:
564
571
  return self.requester.send_request(
565
- path=self._paginator_path(),
572
+ path=self._paginator_path(next_page_token=next_page_token),
566
573
  stream_state=stream_state,
567
574
  stream_slice=stream_slice,
568
575
  next_page_token=next_page_token,
@@ -3,9 +3,21 @@
3
3
  #
4
4
 
5
5
  from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
6
+ from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
7
+ DynamicSchemaLoader,
8
+ SchemaTypeIdentifier,
9
+ TypesMap,
10
+ )
6
11
  from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
7
12
  from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
8
13
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
9
- from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
10
14
 
11
- __all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]
15
+ __all__ = [
16
+ "JsonFileSchemaLoader",
17
+ "DefaultSchemaLoader",
18
+ "SchemaLoader",
19
+ "InlineSchemaLoader",
20
+ "DynamicSchemaLoader",
21
+ "TypesMap",
22
+ "SchemaTypeIdentifier",
23
+ ]
@@ -4,7 +4,7 @@
4
4
 
5
5
 
6
6
  from copy import deepcopy
7
- from dataclasses import InitVar, dataclass
7
+ from dataclasses import InitVar, dataclass, field
8
8
  from typing import Any, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
10
  import dpath
@@ -13,8 +13,9 @@ from typing_extensions import deprecated
13
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
14
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
15
15
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
16
+ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
16
17
  from airbyte_cdk.sources.source import ExperimentalClassWarning
17
- from airbyte_cdk.sources.types import Config
18
+ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
18
19
 
19
20
  AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
20
21
  "string": {"type": ["null", "string"]},
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
103
104
  config: Config
104
105
  parameters: InitVar[Mapping[str, Any]]
105
106
  schema_type_identifier: SchemaTypeIdentifier
107
+ schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
106
108
 
107
109
  def get_json_schema(self) -> Mapping[str, Any]:
108
110
  """
@@ -128,12 +130,27 @@ class DynamicSchemaLoader(SchemaLoader):
128
130
  )
129
131
  properties[key] = value
130
132
 
133
+ transformed_properties = self._transform(properties, {})
134
+
131
135
  return {
132
136
  "$schema": "http://json-schema.org/draft-07/schema#",
133
137
  "type": "object",
134
- "properties": properties,
138
+ "properties": transformed_properties,
135
139
  }
136
140
 
141
+ def _transform(
142
+ self,
143
+ properties: Mapping[str, Any],
144
+ stream_state: StreamState,
145
+ stream_slice: Optional[StreamSlice] = None,
146
+ ) -> Mapping[str, Any]:
147
+ for transformation in self.schema_transformations:
148
+ transformation.transform(
149
+ properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
150
+ config=self.config,
151
+ )
152
+ return properties
153
+
137
154
  def _get_key(
138
155
  self,
139
156
  raw_schema: MutableMapping[str, Any],
@@ -1,4 +1,11 @@
1
- from .abstract_file_based_availability_strategy import AbstractFileBasedAvailabilityStrategy, AbstractFileBasedAvailabilityStrategyWrapper
1
+ from .abstract_file_based_availability_strategy import (
2
+ AbstractFileBasedAvailabilityStrategy,
3
+ AbstractFileBasedAvailabilityStrategyWrapper,
4
+ )
2
5
  from .default_file_based_availability_strategy import DefaultFileBasedAvailabilityStrategy
3
6
 
4
- __all__ = ["AbstractFileBasedAvailabilityStrategy", "AbstractFileBasedAvailabilityStrategyWrapper", "DefaultFileBasedAvailabilityStrategy"]
7
+ __all__ = [
8
+ "AbstractFileBasedAvailabilityStrategy",
9
+ "AbstractFileBasedAvailabilityStrategyWrapper",
10
+ "DefaultFileBasedAvailabilityStrategy",
11
+ ]
@@ -1,4 +1,8 @@
1
- from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import AbstractDiscoveryPolicy
2
- from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import DefaultDiscoveryPolicy
1
+ from airbyte_cdk.sources.file_based.discovery_policy.abstract_discovery_policy import (
2
+ AbstractDiscoveryPolicy,
3
+ )
4
+ from airbyte_cdk.sources.file_based.discovery_policy.default_discovery_policy import (
5
+ DefaultDiscoveryPolicy,
6
+ )
3
7
 
4
8
  __all__ = ["AbstractDiscoveryPolicy", "DefaultDiscoveryPolicy"]
@@ -1,8 +1,8 @@
1
1
  from typing import Any, Mapping, Type
2
2
 
3
3
  from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
4
- from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
5
4
  from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
5
+ from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
6
6
  from airbyte_cdk.sources.file_based.config.jsonl_format import JsonlFormat
7
7
  from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
8
8
  from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
@@ -10,11 +10,11 @@ from airbyte_cdk.sources.file_based.config.unstructured_format import Unstructur
10
10
  from .avro_parser import AvroParser
11
11
  from .csv_parser import CsvParser
12
12
  from .excel_parser import ExcelParser
13
+ from .file_transfer import FileTransfer
13
14
  from .file_type_parser import FileTypeParser
14
15
  from .jsonl_parser import JsonlParser
15
16
  from .parquet_parser import ParquetParser
16
17
  from .unstructured_parser import UnstructuredParser
17
- from .file_transfer import FileTransfer
18
18
 
19
19
  default_parsers: Mapping[Type[Any], FileTypeParser] = {
20
20
  AvroFormat: AvroParser(),
@@ -25,4 +25,13 @@ default_parsers: Mapping[Type[Any], FileTypeParser] = {
25
25
  UnstructuredFormat: UnstructuredParser(),
26
26
  }
27
27
 
28
- __all__ = ["AvroParser", "CsvParser", "ExcelParser", "JsonlParser", "ParquetParser", "UnstructuredParser", "FileTransfer", "default_parsers"]
28
+ __all__ = [
29
+ "AvroParser",
30
+ "CsvParser",
31
+ "ExcelParser",
32
+ "JsonlParser",
33
+ "ParquetParser",
34
+ "UnstructuredParser",
35
+ "FileTransfer",
36
+ "default_parsers",
37
+ ]
@@ -1,4 +1,6 @@
1
- from airbyte_cdk.sources.file_based.schema_validation_policies.abstract_schema_validation_policy import AbstractSchemaValidationPolicy
1
+ from airbyte_cdk.sources.file_based.schema_validation_policies.abstract_schema_validation_policy import (
2
+ AbstractSchemaValidationPolicy,
3
+ )
2
4
  from airbyte_cdk.sources.file_based.schema_validation_policies.default_schema_validation_policies import (
3
5
  DEFAULT_SCHEMA_VALIDATION_POLICIES,
4
6
  EmitRecordPolicy,
@@ -2,4 +2,8 @@ from .abstract_concurrent_file_based_cursor import AbstractConcurrentFileBasedCu
2
2
  from .file_based_concurrent_cursor import FileBasedConcurrentCursor
3
3
  from .file_based_final_state_cursor import FileBasedFinalStateCursor
4
4
 
5
- __all__ = ["AbstractConcurrentFileBasedCursor", "FileBasedConcurrentCursor", "FileBasedFinalStateCursor"]
5
+ __all__ = [
6
+ "AbstractConcurrentFileBasedCursor",
7
+ "FileBasedConcurrentCursor",
8
+ "FileBasedFinalStateCursor",
9
+ ]
@@ -10,4 +10,10 @@ from .repository import (
10
10
  NoopMessageRepository,
11
11
  )
12
12
 
13
- __all__ = ["InMemoryMessageRepository", "LogAppenderMessageRepositoryDecorator", "LogMessage", "MessageRepository", "NoopMessageRepository"]
13
+ __all__ = [
14
+ "InMemoryMessageRepository",
15
+ "LogAppenderMessageRepositoryDecorator",
16
+ "LogMessage",
17
+ "MessageRepository",
18
+ "NoopMessageRepository",
19
+ ]
@@ -3,6 +3,6 @@
3
3
  #
4
4
 
5
5
  # Initialize Streams Package
6
- from .core import NO_CURSOR_STATE_KEY, IncrementalMixin, CheckpointMixin, Stream
6
+ from .core import NO_CURSOR_STATE_KEY, CheckpointMixin, IncrementalMixin, Stream
7
7
 
8
8
  __all__ = ["NO_CURSOR_STATE_KEY", "IncrementalMixin", "CheckpointMixin", "Stream"]
@@ -8,12 +8,11 @@ from .checkpoint_reader import (
8
8
  FullRefreshCheckpointReader,
9
9
  IncrementalCheckpointReader,
10
10
  LegacyCursorBasedCheckpointReader,
11
- ResumableFullRefreshCheckpointReader
11
+ ResumableFullRefreshCheckpointReader,
12
12
  )
13
13
  from .cursor import Cursor
14
14
  from .resumable_full_refresh_cursor import ResumableFullRefreshCursor
15
15
 
16
-
17
16
  __all__ = [
18
17
  "CheckpointMode",
19
18
  "CheckpointReader",
@@ -23,5 +22,5 @@ __all__ = [
23
22
  "IncrementalCheckpointReader",
24
23
  "LegacyCursorBasedCheckpointReader",
25
24
  "ResumableFullRefreshCheckpointReader",
26
- "ResumableFullRefreshCursor"
25
+ "ResumableFullRefreshCursor",
27
26
  ]