airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.34.1.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
  2. airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
  3. airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
  4. airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
  5. airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
  6. airbyte_cdk/connector_builder/test_reader/types.py +75 -0
  7. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  8. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  9. airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
  10. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  11. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
  12. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +203 -100
  13. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  14. airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
  15. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +7 -2
  16. airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
  17. airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
  18. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
  19. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
  20. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
  21. airbyte_cdk/sources/declarative/interpolation/jinja.py +13 -0
  22. airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
  23. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
  24. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
  25. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  26. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  27. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
  28. airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
  29. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
  30. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  31. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  32. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
  33. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
  34. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
  35. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
  37. airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
  38. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  39. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
  40. airbyte_cdk/sources/file_based/file_based_source.py +70 -37
  41. airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
  42. airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
  43. airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
  44. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
  45. airbyte_cdk/sources/specs/transfer_modes.py +26 -0
  46. airbyte_cdk/sources/streams/call_rate.py +185 -47
  47. airbyte_cdk/sources/streams/http/http.py +1 -2
  48. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
  49. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
  50. airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
  51. airbyte_cdk/test/mock_http/mocker.py +9 -1
  52. airbyte_cdk/test/mock_http/response.py +6 -3
  53. airbyte_cdk/utils/datetime_helpers.py +48 -66
  54. airbyte_cdk/utils/mapping_helpers.py +126 -26
  55. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/METADATA +1 -1
  56. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/RECORD +60 -51
  57. airbyte_cdk/connector_builder/message_grouper.py +0 -448
  58. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE.txt +0 -0
  59. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/LICENSE_SHORT +0 -0
  60. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/WHEEL +0 -0
  61. {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.34.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -60,10 +60,8 @@ from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
60
60
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
61
61
  from airbyte_cdk.sources.declarative.decoders import (
62
62
  Decoder,
63
- GzipJsonDecoder,
64
63
  IterableDecoder,
65
64
  JsonDecoder,
66
- JsonlDecoder,
67
65
  PaginationDecoderDecorator,
68
66
  XmlDecoder,
69
67
  ZipfileDecoder,
@@ -103,8 +101,8 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
103
101
  LegacyToPerPartitionStateMigration,
104
102
  )
105
103
  from airbyte_cdk.sources.declarative.models import (
106
- Clamping,
107
104
  CustomStateMigration,
105
+ GzipDecoder,
108
106
  )
109
107
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
110
108
  AddedFieldDefinition as AddedFieldDefinitionModel,
@@ -142,9 +140,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
142
140
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
143
141
  CompositeErrorHandler as CompositeErrorHandlerModel,
144
142
  )
145
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
146
- CompositeRawDecoder as CompositeRawDecoderModel,
147
- )
148
143
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
149
144
  ConcurrencyLevel as ConcurrencyLevelModel,
150
145
  )
@@ -155,7 +150,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
155
150
  ConstantBackoffStrategy as ConstantBackoffStrategyModel,
156
151
  )
157
152
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
158
- CsvParser as CsvParserModel,
153
+ CsvDecoder as CsvDecoderModel,
159
154
  )
160
155
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
161
156
  CursorPagination as CursorPaginationModel,
@@ -226,14 +221,17 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
226
221
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
227
222
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
228
223
  )
224
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
225
+ FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
226
+ )
229
227
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
230
228
  FlattenFields as FlattenFieldsModel,
231
229
  )
232
230
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
233
- GzipJsonDecoder as GzipJsonDecoderModel,
231
+ GzipDecoder as GzipDecoderModel,
234
232
  )
235
233
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
236
- GzipParser as GzipParserModel,
234
+ HTTPAPIBudget as HTTPAPIBudgetModel,
237
235
  )
238
236
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
239
237
  HttpComponentsResolver as HttpComponentsResolverModel,
@@ -241,6 +239,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
241
239
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
242
240
  HttpRequester as HttpRequesterModel,
243
241
  )
242
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
243
+ HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
244
+ )
244
245
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
245
246
  HttpResponseFilter as HttpResponseFilterModel,
246
247
  )
@@ -259,12 +260,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
259
260
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
260
261
  JsonlDecoder as JsonlDecoderModel,
261
262
  )
262
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
263
- JsonLineParser as JsonLineParserModel,
264
- )
265
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
266
- JsonParser as JsonParserModel,
267
- )
268
263
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
269
264
  JwtAuthenticator as JwtAuthenticatorModel,
270
265
  )
@@ -295,6 +290,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
295
290
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
296
291
  MinMaxDatetime as MinMaxDatetimeModel,
297
292
  )
293
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
294
+ MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
295
+ )
298
296
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
299
297
  NoAuth as NoAuthModel,
300
298
  )
@@ -313,6 +311,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
313
311
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
314
312
  ParentStreamConfig as ParentStreamConfigModel,
315
313
  )
314
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
315
+ Rate as RateModel,
316
+ )
316
317
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
317
318
  RecordFilter as RecordFilterModel,
318
319
  )
@@ -356,6 +357,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
356
357
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
357
358
  TypesMap as TypesMapModel,
358
359
  )
360
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
361
+ UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
362
+ )
359
363
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
360
364
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
361
365
  WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -469,6 +473,15 @@ from airbyte_cdk.sources.message import (
469
473
  MessageRepository,
470
474
  NoopMessageRepository,
471
475
  )
476
+ from airbyte_cdk.sources.streams.call_rate import (
477
+ APIBudget,
478
+ FixedWindowCallRatePolicy,
479
+ HttpAPIBudget,
480
+ HttpRequestRegexMatcher,
481
+ MovingWindowCallRatePolicy,
482
+ Rate,
483
+ UnlimitedCallRatePolicy,
484
+ )
472
485
  from airbyte_cdk.sources.streams.concurrent.clamping import (
473
486
  ClampingEndProvider,
474
487
  ClampingStrategy,
@@ -520,6 +533,7 @@ class ModelToComponentFactory:
520
533
  self._evaluate_log_level(emit_connector_builder_messages)
521
534
  )
522
535
  self._connector_state_manager = connector_state_manager or ConnectorStateManager()
536
+ self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
523
537
 
524
538
  def _init_mappings(self) -> None:
525
539
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -531,9 +545,9 @@ class ModelToComponentFactory:
531
545
  CheckStreamModel: self.create_check_stream,
532
546
  CheckDynamicStreamModel: self.create_check_dynamic_stream,
533
547
  CompositeErrorHandlerModel: self.create_composite_error_handler,
534
- CompositeRawDecoderModel: self.create_composite_raw_decoder,
535
548
  ConcurrencyLevelModel: self.create_concurrency_level,
536
549
  ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
550
+ CsvDecoderModel: self.create_csv_decoder,
537
551
  CursorPaginationModel: self.create_cursor_pagination,
538
552
  CustomAuthenticatorModel: self.create_custom_component,
539
553
  CustomBackoffStrategyModel: self.create_custom_component,
@@ -563,10 +577,7 @@ class ModelToComponentFactory:
563
577
  InlineSchemaLoaderModel: self.create_inline_schema_loader,
564
578
  JsonDecoderModel: self.create_json_decoder,
565
579
  JsonlDecoderModel: self.create_jsonl_decoder,
566
- JsonLineParserModel: self.create_json_line_parser,
567
- JsonParserModel: self.create_json_parser,
568
- GzipJsonDecoderModel: self.create_gzipjson_decoder,
569
- GzipParserModel: self.create_gzip_parser,
580
+ GzipDecoderModel: self.create_gzip_decoder,
570
581
  KeysToLowerModel: self.create_keys_to_lower_transformation,
571
582
  KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
572
583
  KeysReplaceModel: self.create_keys_replace_transformation,
@@ -607,6 +618,12 @@ class ModelToComponentFactory:
607
618
  StreamConfigModel: self.create_stream_config,
608
619
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
609
620
  ZipfileDecoderModel: self.create_zipfile_decoder,
621
+ HTTPAPIBudgetModel: self.create_http_api_budget,
622
+ FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
623
+ MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
624
+ UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
625
+ RateModel: self.create_rate,
626
+ HttpRequestRegexMatcherModel: self.create_http_request_matcher,
610
627
  }
611
628
 
612
629
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -733,8 +750,8 @@ class ModelToComponentFactory:
733
750
  }
734
751
  return names_to_types[value_type]
735
752
 
736
- @staticmethod
737
753
  def create_api_key_authenticator(
754
+ self,
738
755
  model: ApiKeyAuthenticatorModel,
739
756
  config: Config,
740
757
  token_provider: Optional[TokenProvider] = None,
@@ -756,10 +773,8 @@ class ModelToComponentFactory:
756
773
  )
757
774
 
758
775
  request_option = (
759
- RequestOption(
760
- inject_into=RequestOptionType(model.inject_into.inject_into.value),
761
- field_name=model.inject_into.field_name,
762
- parameters=model.parameters or {},
776
+ self._create_component_from_model(
777
+ model.inject_into, config, parameters=model.parameters or {}
763
778
  )
764
779
  if model.inject_into
765
780
  else RequestOption(
@@ -768,6 +783,7 @@ class ModelToComponentFactory:
768
783
  parameters=model.parameters or {},
769
784
  )
770
785
  )
786
+
771
787
  return ApiKeyAuthenticator(
772
788
  token_provider=(
773
789
  token_provider
@@ -849,7 +865,7 @@ class ModelToComponentFactory:
849
865
  token_provider=token_provider,
850
866
  )
851
867
  else:
852
- return ModelToComponentFactory.create_api_key_authenticator(
868
+ return self.create_api_key_authenticator(
853
869
  ApiKeyAuthenticatorModel(
854
870
  type="ApiKeyAuthenticator",
855
871
  api_token="",
@@ -935,6 +951,17 @@ class ModelToComponentFactory:
935
951
  parameters={},
936
952
  )
937
953
 
954
+ @staticmethod
955
+ def apply_stream_state_migrations(
956
+ stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
957
+ ) -> MutableMapping[str, Any]:
958
+ if stream_state_migrations:
959
+ for state_migration in stream_state_migrations:
960
+ if state_migration.should_migrate(stream_state):
961
+ # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
962
+ stream_state = dict(state_migration.migrate(stream_state))
963
+ return stream_state
964
+
938
965
  def create_concurrent_cursor_from_datetime_based_cursor(
939
966
  self,
940
967
  model_type: Type[BaseModel],
@@ -944,6 +971,7 @@ class ModelToComponentFactory:
944
971
  config: Config,
945
972
  message_repository: Optional[MessageRepository] = None,
946
973
  runtime_lookback_window: Optional[datetime.timedelta] = None,
974
+ stream_state_migrations: Optional[List[Any]] = None,
947
975
  **kwargs: Any,
948
976
  ) -> ConcurrentCursor:
949
977
  # Per-partition incremental streams can dynamically create child cursors which will pass their current
@@ -954,6 +982,7 @@ class ModelToComponentFactory:
954
982
  if "stream_state" not in kwargs
955
983
  else kwargs["stream_state"]
956
984
  )
985
+ stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
957
986
 
958
987
  component_type = component_definition.get("type")
959
988
  if component_definition.get("type") != model_type.__name__:
@@ -1189,6 +1218,7 @@ class ModelToComponentFactory:
1189
1218
  config: Config,
1190
1219
  stream_state: MutableMapping[str, Any],
1191
1220
  partition_router: PartitionRouter,
1221
+ stream_state_migrations: Optional[List[Any]] = None,
1192
1222
  **kwargs: Any,
1193
1223
  ) -> ConcurrentPerPartitionCursor:
1194
1224
  component_type = component_definition.get("type")
@@ -1237,8 +1267,10 @@ class ModelToComponentFactory:
1237
1267
  stream_namespace=stream_namespace,
1238
1268
  config=config,
1239
1269
  message_repository=NoopMessageRepository(),
1270
+ stream_state_migrations=stream_state_migrations,
1240
1271
  )
1241
1272
  )
1273
+ stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1242
1274
 
1243
1275
  # Return the concurrent cursor and state converter
1244
1276
  return ConcurrentPerPartitionCursor(
@@ -1489,19 +1521,15 @@ class ModelToComponentFactory:
1489
1521
  )
1490
1522
 
1491
1523
  end_time_option = (
1492
- RequestOption(
1493
- inject_into=RequestOptionType(model.end_time_option.inject_into.value),
1494
- field_name=model.end_time_option.field_name,
1495
- parameters=model.parameters or {},
1524
+ self._create_component_from_model(
1525
+ model.end_time_option, config, parameters=model.parameters or {}
1496
1526
  )
1497
1527
  if model.end_time_option
1498
1528
  else None
1499
1529
  )
1500
1530
  start_time_option = (
1501
- RequestOption(
1502
- inject_into=RequestOptionType(model.start_time_option.inject_into.value),
1503
- field_name=model.start_time_option.field_name,
1504
- parameters=model.parameters or {},
1531
+ self._create_component_from_model(
1532
+ model.start_time_option, config, parameters=model.parameters or {}
1505
1533
  )
1506
1534
  if model.start_time_option
1507
1535
  else None
@@ -1572,19 +1600,15 @@ class ModelToComponentFactory:
1572
1600
  cursor_model = model.incremental_sync
1573
1601
 
1574
1602
  end_time_option = (
1575
- RequestOption(
1576
- inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
1577
- field_name=cursor_model.end_time_option.field_name,
1578
- parameters=cursor_model.parameters or {},
1603
+ self._create_component_from_model(
1604
+ cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1579
1605
  )
1580
1606
  if cursor_model.end_time_option
1581
1607
  else None
1582
1608
  )
1583
1609
  start_time_option = (
1584
- RequestOption(
1585
- inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
1586
- field_name=cursor_model.start_time_option.field_name,
1587
- parameters=cursor_model.parameters or {},
1610
+ self._create_component_from_model(
1611
+ cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1588
1612
  )
1589
1613
  if cursor_model.start_time_option
1590
1614
  else None
@@ -1656,7 +1680,7 @@ class ModelToComponentFactory:
1656
1680
  ) -> Optional[PartitionRouter]:
1657
1681
  if (
1658
1682
  hasattr(model, "partition_router")
1659
- and isinstance(model, SimpleRetrieverModel)
1683
+ and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1660
1684
  and model.partition_router
1661
1685
  ):
1662
1686
  stream_slicer_model = model.partition_router
@@ -1690,6 +1714,31 @@ class ModelToComponentFactory:
1690
1714
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1691
1715
 
1692
1716
  if model.incremental_sync and stream_slicer:
1717
+ if model.retriever.type == "AsyncRetriever":
1718
+ if model.incremental_sync.type != "DatetimeBasedCursor":
1719
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1720
+ raise ValueError(
1721
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1722
+ )
1723
+ if stream_slicer:
1724
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1725
+ state_manager=self._connector_state_manager,
1726
+ model_type=DatetimeBasedCursorModel,
1727
+ component_definition=model.incremental_sync.__dict__,
1728
+ stream_name=model.name or "",
1729
+ stream_namespace=None,
1730
+ config=config or {},
1731
+ stream_state={},
1732
+ partition_router=stream_slicer,
1733
+ )
1734
+ return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1735
+ model_type=DatetimeBasedCursorModel,
1736
+ component_definition=model.incremental_sync.__dict__,
1737
+ stream_name=model.name or "",
1738
+ stream_namespace=None,
1739
+ config=config or {},
1740
+ )
1741
+
1693
1742
  incremental_sync_model = model.incremental_sync
1694
1743
  if (
1695
1744
  hasattr(incremental_sync_model, "global_substream_cursor")
@@ -1730,6 +1779,7 @@ class ModelToComponentFactory:
1730
1779
  stream_name=model.name or "",
1731
1780
  stream_namespace=None,
1732
1781
  config=config or {},
1782
+ stream_state_migrations=model.state_migrations,
1733
1783
  )
1734
1784
  return (
1735
1785
  self._create_component_from_model(model=model.incremental_sync, config=config)
@@ -1886,6 +1936,8 @@ class ModelToComponentFactory:
1886
1936
  )
1887
1937
  )
1888
1938
 
1939
+ api_budget = self._api_budget
1940
+
1889
1941
  request_options_provider = InterpolatedRequestOptionsProvider(
1890
1942
  request_body_data=model.request_body_data,
1891
1943
  request_body_json=model.request_body_json,
@@ -1906,6 +1958,7 @@ class ModelToComponentFactory:
1906
1958
  path=model.path,
1907
1959
  authenticator=authenticator,
1908
1960
  error_handler=error_handler,
1961
+ api_budget=api_budget,
1909
1962
  http_method=HttpMethod[model.http_method.value],
1910
1963
  request_options_provider=request_options_provider,
1911
1964
  config=config,
@@ -2035,25 +2088,26 @@ class ModelToComponentFactory:
2035
2088
  )
2036
2089
 
2037
2090
  @staticmethod
2038
- def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
2091
+ def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2039
2092
  return JsonDecoder(parameters={})
2040
2093
 
2041
2094
  @staticmethod
2042
- def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
2043
- encoding = model.encoding if model.encoding else "utf-8"
2044
- return JsonParser(encoding=encoding)
2095
+ def create_csv_decoder(model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2096
+ return CompositeRawDecoder(
2097
+ parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2098
+ )
2045
2099
 
2046
2100
  @staticmethod
2047
- def create_jsonl_decoder(
2048
- model: JsonlDecoderModel, config: Config, **kwargs: Any
2049
- ) -> JsonlDecoder:
2050
- return JsonlDecoder(parameters={})
2101
+ def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2102
+ return CompositeRawDecoder(
2103
+ parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2104
+ )
2051
2105
 
2052
2106
  @staticmethod
2053
- def create_json_line_parser(
2054
- model: JsonLineParserModel, config: Config, **kwargs: Any
2055
- ) -> JsonLineParser:
2056
- return JsonLineParser(encoding=model.encoding)
2107
+ def create_gzip_decoder(model: GzipDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2108
+ return CompositeRawDecoder(
2109
+ parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2110
+ )
2057
2111
 
2058
2112
  @staticmethod
2059
2113
  def create_iterable_decoder(
@@ -2065,33 +2119,30 @@ class ModelToComponentFactory:
2065
2119
  def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2066
2120
  return XmlDecoder(parameters={})
2067
2121
 
2068
- @staticmethod
2069
- def create_gzipjson_decoder(
2070
- model: GzipJsonDecoderModel, config: Config, **kwargs: Any
2071
- ) -> GzipJsonDecoder:
2072
- return GzipJsonDecoder(parameters={}, encoding=model.encoding)
2073
-
2074
2122
  def create_zipfile_decoder(
2075
2123
  self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2076
2124
  ) -> ZipfileDecoder:
2077
- parser = self._create_component_from_model(model=model.parser, config=config)
2078
- return ZipfileDecoder(parser=parser)
2079
-
2080
- def create_gzip_parser(
2081
- self, model: GzipParserModel, config: Config, **kwargs: Any
2082
- ) -> GzipParser:
2083
- inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
2084
- return GzipParser(inner_parser=inner_parser)
2125
+ return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2085
2126
 
2086
2127
  @staticmethod
2087
- def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
2088
- return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2128
+ def _get_parser(model: BaseModel, config: Config) -> Parser:
2129
+ if isinstance(model, JsonDecoderModel):
2130
+ # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2131
+ return JsonParser()
2132
+ elif isinstance(model, JsonlDecoderModel):
2133
+ return JsonLineParser()
2134
+ elif isinstance(model, CsvDecoderModel):
2135
+ return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2136
+ elif isinstance(model, GzipDecoderModel):
2137
+ return GzipParser(
2138
+ inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2139
+ )
2140
+ elif isinstance(
2141
+ model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2142
+ ):
2143
+ raise ValueError(f"Decoder type {model} does not have parser associated to it")
2089
2144
 
2090
- def create_composite_raw_decoder(
2091
- self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
2092
- ) -> CompositeRawDecoder:
2093
- parser = self._create_component_from_model(model=model.parser, config=config)
2094
- return CompositeRawDecoder(parser=parser)
2145
+ raise ValueError(f"Unknown decoder type {model}")
2095
2146
 
2096
2147
  @staticmethod
2097
2148
  def create_json_file_schema_loader(
@@ -2125,16 +2176,11 @@ class ModelToComponentFactory:
2125
2176
  additional_jwt_payload=model.additional_jwt_payload,
2126
2177
  )
2127
2178
 
2128
- @staticmethod
2129
2179
  def create_list_partition_router(
2130
- model: ListPartitionRouterModel, config: Config, **kwargs: Any
2180
+ self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2131
2181
  ) -> ListPartitionRouter:
2132
2182
  request_option = (
2133
- RequestOption(
2134
- inject_into=RequestOptionType(model.request_option.inject_into.value),
2135
- field_name=model.request_option.field_name,
2136
- parameters=model.parameters or {},
2137
- )
2183
+ self._create_component_from_model(model.request_option, config)
2138
2184
  if model.request_option
2139
2185
  else None
2140
2186
  )
@@ -2330,7 +2376,25 @@ class ModelToComponentFactory:
2330
2376
  model: RequestOptionModel, config: Config, **kwargs: Any
2331
2377
  ) -> RequestOption:
2332
2378
  inject_into = RequestOptionType(model.inject_into.value)
2333
- return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
2379
+ field_path: Optional[List[Union[InterpolatedString, str]]] = (
2380
+ [
2381
+ InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2382
+ for segment in model.field_path
2383
+ ]
2384
+ if model.field_path
2385
+ else None
2386
+ )
2387
+ field_name = (
2388
+ InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2389
+ if model.field_name
2390
+ else None
2391
+ )
2392
+ return RequestOption(
2393
+ field_name=field_name,
2394
+ field_path=field_path,
2395
+ inject_into=inject_into,
2396
+ parameters=kwargs.get("parameters", {}),
2397
+ )
2334
2398
 
2335
2399
  def create_record_selector(
2336
2400
  self,
@@ -2351,6 +2415,8 @@ class ModelToComponentFactory:
2351
2415
  if model.record_filter
2352
2416
  else None
2353
2417
  )
2418
+
2419
+ transform_before_filtering = False
2354
2420
  if client_side_incremental_sync:
2355
2421
  record_filter = ClientSideIncrementalRecordFilterDecorator(
2356
2422
  config=config,
@@ -2360,6 +2426,8 @@ class ModelToComponentFactory:
2360
2426
  else None,
2361
2427
  **client_side_incremental_sync,
2362
2428
  )
2429
+ transform_before_filtering = True
2430
+
2363
2431
  schema_normalization = (
2364
2432
  TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2365
2433
  if isinstance(model.schema_normalization, SchemaNormalizationModel)
@@ -2374,6 +2442,7 @@ class ModelToComponentFactory:
2374
2442
  transformations=transformations or [],
2375
2443
  schema_normalization=schema_normalization,
2376
2444
  parameters=model.parameters or {},
2445
+ transform_before_filtering=transform_before_filtering,
2377
2446
  )
2378
2447
 
2379
2448
  @staticmethod
@@ -2894,3 +2963,84 @@ class ModelToComponentFactory:
2894
2963
  return isinstance(parser.inner_parser, JsonParser)
2895
2964
  else:
2896
2965
  return False
2966
+
2967
+ def create_http_api_budget(
2968
+ self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
2969
+ ) -> HttpAPIBudget:
2970
+ policies = [
2971
+ self._create_component_from_model(model=policy, config=config)
2972
+ for policy in model.policies
2973
+ ]
2974
+
2975
+ return HttpAPIBudget(
2976
+ policies=policies,
2977
+ ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
2978
+ ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
2979
+ status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
2980
+ )
2981
+
2982
+ def create_fixed_window_call_rate_policy(
2983
+ self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
2984
+ ) -> FixedWindowCallRatePolicy:
2985
+ matchers = [
2986
+ self._create_component_from_model(model=matcher, config=config)
2987
+ for matcher in model.matchers
2988
+ ]
2989
+
2990
+ # Set the initial reset timestamp to 10 days from now.
2991
+ # This value will be updated by the first request.
2992
+ return FixedWindowCallRatePolicy(
2993
+ next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
2994
+ period=parse_duration(model.period),
2995
+ call_limit=model.call_limit,
2996
+ matchers=matchers,
2997
+ )
2998
+
2999
+ def create_moving_window_call_rate_policy(
3000
+ self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3001
+ ) -> MovingWindowCallRatePolicy:
3002
+ rates = [
3003
+ self._create_component_from_model(model=rate, config=config) for rate in model.rates
3004
+ ]
3005
+ matchers = [
3006
+ self._create_component_from_model(model=matcher, config=config)
3007
+ for matcher in model.matchers
3008
+ ]
3009
+ return MovingWindowCallRatePolicy(
3010
+ rates=rates,
3011
+ matchers=matchers,
3012
+ )
3013
+
3014
+ def create_unlimited_call_rate_policy(
3015
+ self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
3016
+ ) -> UnlimitedCallRatePolicy:
3017
+ matchers = [
3018
+ self._create_component_from_model(model=matcher, config=config)
3019
+ for matcher in model.matchers
3020
+ ]
3021
+
3022
+ return UnlimitedCallRatePolicy(
3023
+ matchers=matchers,
3024
+ )
3025
+
3026
+ def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3027
+ return Rate(
3028
+ limit=model.limit,
3029
+ interval=parse_duration(model.interval),
3030
+ )
3031
+
3032
+ def create_http_request_matcher(
3033
+ self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
3034
+ ) -> HttpRequestRegexMatcher:
3035
+ return HttpRequestRegexMatcher(
3036
+ method=model.method,
3037
+ url_base=model.url_base,
3038
+ url_path_pattern=model.url_path_pattern,
3039
+ params=model.params,
3040
+ headers=model.headers,
3041
+ )
3042
+
3043
+ def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
3044
+ self._api_budget = self.create_component(
3045
+ model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3046
+ )
@@ -4,9 +4,9 @@ from dataclasses import InitVar, dataclass, field
4
4
  from typing import Any, Callable, Iterable, Mapping, Optional
5
5
 
6
6
  from airbyte_cdk.models import FailureType
7
+ from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
7
8
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
8
9
  AsyncJobOrchestrator,
9
- AsyncPartition,
10
10
  )
11
11
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
12
12
  SinglePartitionRouter,
@@ -42,12 +42,12 @@ class AsyncJobPartitionRouter(StreamSlicer):
42
42
 
43
43
  for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
44
44
  yield StreamSlice(
45
- partition=dict(completed_partition.stream_slice.partition)
46
- | {"partition": completed_partition},
45
+ partition=dict(completed_partition.stream_slice.partition),
47
46
  cursor_slice=completed_partition.stream_slice.cursor_slice,
47
+ extra_fields={"jobs": list(completed_partition.jobs)},
48
48
  )
49
49
 
50
- def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
50
+ def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
51
51
  """
52
52
  This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
53
53
  be responsible for. However, this was added in because the JobOrchestrator is required to
@@ -62,4 +62,4 @@ class AsyncJobPartitionRouter(StreamSlicer):
62
62
  failure_type=FailureType.system_error,
63
63
  )
64
64
 
65
- return self._job_orchestrator.fetch_records(partition=partition)
65
+ return self._job_orchestrator.fetch_records(async_jobs=async_jobs)
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass
6
- from typing import Any, Iterable, List, Mapping, Optional, Union
6
+ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
9
9
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
@@ -100,7 +100,9 @@ class ListPartitionRouter(PartitionRouter):
100
100
  ):
101
101
  slice_value = stream_slice.get(self._cursor_field.eval(self.config))
102
102
  if slice_value:
103
- return {self.request_option.field_name.eval(self.config): slice_value} # type: ignore # field_name is always casted to InterpolatedString
103
+ options: MutableMapping[str, Any] = {}
104
+ self.request_option.inject_into_request(options, slice_value, self.config)
105
+ return options
104
106
  else:
105
107
  return {}
106
108
  else: