airbyte-cdk 6.34.0.dev1__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
  2. airbyte_cdk/connector_builder/message_grouper.py +448 -0
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
  8. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
  9. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  10. airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
  11. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
  12. airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
  13. airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +25 -56
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
  18. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
  19. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  20. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
  22. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
  23. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
  24. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  25. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  26. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  27. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
  28. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
  29. airbyte_cdk/sources/file_based/file_based_source.py +37 -70
  30. airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
  31. airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
  32. airbyte_cdk/sources/streams/call_rate.py +47 -185
  33. airbyte_cdk/sources/streams/http/http.py +2 -1
  34. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
  35. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
  36. airbyte_cdk/utils/datetime_helpers.py +66 -48
  37. airbyte_cdk/utils/mapping_helpers.py +26 -126
  38. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
  39. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +43 -52
  40. airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
  41. airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
  42. airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
  43. airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
  44. airbyte_cdk/connector_builder/test_reader/types.py +0 -75
  45. airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
  46. airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
  47. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
  48. airbyte_cdk/sources/specs/transfer_modes.py +0 -26
  49. airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
  50. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
  51. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
  52. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
  53. {airbyte_cdk-6.34.0.dev1.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -60,8 +60,10 @@ from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
60
60
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
61
61
  from airbyte_cdk.sources.declarative.decoders import (
62
62
  Decoder,
63
+ GzipJsonDecoder,
63
64
  IterableDecoder,
64
65
  JsonDecoder,
66
+ JsonlDecoder,
65
67
  PaginationDecoderDecorator,
66
68
  XmlDecoder,
67
69
  ZipfileDecoder,
@@ -101,8 +103,8 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
101
103
  LegacyToPerPartitionStateMigration,
102
104
  )
103
105
  from airbyte_cdk.sources.declarative.models import (
106
+ Clamping,
104
107
  CustomStateMigration,
105
- GzipDecoder,
106
108
  )
107
109
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
108
110
  AddedFieldDefinition as AddedFieldDefinitionModel,
@@ -140,6 +142,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
140
142
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
141
143
  CompositeErrorHandler as CompositeErrorHandlerModel,
142
144
  )
145
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
146
+ CompositeRawDecoder as CompositeRawDecoderModel,
147
+ )
143
148
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
144
149
  ConcurrencyLevel as ConcurrencyLevelModel,
145
150
  )
@@ -150,7 +155,7 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
150
155
  ConstantBackoffStrategy as ConstantBackoffStrategyModel,
151
156
  )
152
157
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
153
- CsvDecoder as CsvDecoderModel,
158
+ CsvParser as CsvParserModel,
154
159
  )
155
160
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
156
161
  CursorPagination as CursorPaginationModel,
@@ -221,17 +226,14 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
221
226
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
222
227
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
223
228
  )
224
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
225
- FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
226
- )
227
229
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
228
230
  FlattenFields as FlattenFieldsModel,
229
231
  )
230
232
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
- GzipDecoder as GzipDecoderModel,
233
+ GzipJsonDecoder as GzipJsonDecoderModel,
232
234
  )
233
235
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
234
- HTTPAPIBudget as HTTPAPIBudgetModel,
236
+ GzipParser as GzipParserModel,
235
237
  )
236
238
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
237
239
  HttpComponentsResolver as HttpComponentsResolverModel,
@@ -239,9 +241,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
239
241
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
240
242
  HttpRequester as HttpRequesterModel,
241
243
  )
242
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
243
- HttpRequestRegexMatcher as HttpRequestRegexMatcherModel,
244
- )
245
244
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
246
245
  HttpResponseFilter as HttpResponseFilterModel,
247
246
  )
@@ -260,6 +259,12 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
260
259
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
261
260
  JsonlDecoder as JsonlDecoderModel,
262
261
  )
262
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
263
+ JsonLineParser as JsonLineParserModel,
264
+ )
265
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
266
+ JsonParser as JsonParserModel,
267
+ )
263
268
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
264
269
  JwtAuthenticator as JwtAuthenticatorModel,
265
270
  )
@@ -290,9 +295,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
290
295
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
291
296
  MinMaxDatetime as MinMaxDatetimeModel,
292
297
  )
293
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
294
- MovingWindowCallRatePolicy as MovingWindowCallRatePolicyModel,
295
- )
296
298
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
297
299
  NoAuth as NoAuthModel,
298
300
  )
@@ -311,9 +313,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
311
313
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
312
314
  ParentStreamConfig as ParentStreamConfigModel,
313
315
  )
314
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
315
- Rate as RateModel,
316
- )
317
316
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
318
317
  RecordFilter as RecordFilterModel,
319
318
  )
@@ -357,9 +356,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
357
356
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
358
357
  TypesMap as TypesMapModel,
359
358
  )
360
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
361
- UnlimitedCallRatePolicy as UnlimitedCallRatePolicyModel,
362
- )
363
359
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
364
360
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
365
361
  WaitTimeFromHeader as WaitTimeFromHeaderModel,
@@ -473,15 +469,6 @@ from airbyte_cdk.sources.message import (
473
469
  MessageRepository,
474
470
  NoopMessageRepository,
475
471
  )
476
- from airbyte_cdk.sources.streams.call_rate import (
477
- APIBudget,
478
- FixedWindowCallRatePolicy,
479
- HttpAPIBudget,
480
- HttpRequestRegexMatcher,
481
- MovingWindowCallRatePolicy,
482
- Rate,
483
- UnlimitedCallRatePolicy,
484
- )
485
472
  from airbyte_cdk.sources.streams.concurrent.clamping import (
486
473
  ClampingEndProvider,
487
474
  ClampingStrategy,
@@ -533,7 +520,6 @@ class ModelToComponentFactory:
533
520
  self._evaluate_log_level(emit_connector_builder_messages)
534
521
  )
535
522
  self._connector_state_manager = connector_state_manager or ConnectorStateManager()
536
- self._api_budget: Optional[Union[APIBudget, HttpAPIBudget]] = None
537
523
 
538
524
  def _init_mappings(self) -> None:
539
525
  self.PYDANTIC_MODEL_TO_CONSTRUCTOR: Mapping[Type[BaseModel], Callable[..., Any]] = {
@@ -545,9 +531,9 @@ class ModelToComponentFactory:
545
531
  CheckStreamModel: self.create_check_stream,
546
532
  CheckDynamicStreamModel: self.create_check_dynamic_stream,
547
533
  CompositeErrorHandlerModel: self.create_composite_error_handler,
534
+ CompositeRawDecoderModel: self.create_composite_raw_decoder,
548
535
  ConcurrencyLevelModel: self.create_concurrency_level,
549
536
  ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
550
- CsvDecoderModel: self.create_csv_decoder,
551
537
  CursorPaginationModel: self.create_cursor_pagination,
552
538
  CustomAuthenticatorModel: self.create_custom_component,
553
539
  CustomBackoffStrategyModel: self.create_custom_component,
@@ -577,7 +563,10 @@ class ModelToComponentFactory:
577
563
  InlineSchemaLoaderModel: self.create_inline_schema_loader,
578
564
  JsonDecoderModel: self.create_json_decoder,
579
565
  JsonlDecoderModel: self.create_jsonl_decoder,
580
- GzipDecoderModel: self.create_gzip_decoder,
566
+ JsonLineParserModel: self.create_json_line_parser,
567
+ JsonParserModel: self.create_json_parser,
568
+ GzipJsonDecoderModel: self.create_gzipjson_decoder,
569
+ GzipParserModel: self.create_gzip_parser,
581
570
  KeysToLowerModel: self.create_keys_to_lower_transformation,
582
571
  KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
583
572
  KeysReplaceModel: self.create_keys_replace_transformation,
@@ -618,12 +607,6 @@ class ModelToComponentFactory:
618
607
  StreamConfigModel: self.create_stream_config,
619
608
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
620
609
  ZipfileDecoderModel: self.create_zipfile_decoder,
621
- HTTPAPIBudgetModel: self.create_http_api_budget,
622
- FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
623
- MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
624
- UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
625
- RateModel: self.create_rate,
626
- HttpRequestRegexMatcherModel: self.create_http_request_matcher,
627
610
  }
628
611
 
629
612
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -750,8 +733,8 @@ class ModelToComponentFactory:
750
733
  }
751
734
  return names_to_types[value_type]
752
735
 
736
+ @staticmethod
753
737
  def create_api_key_authenticator(
754
- self,
755
738
  model: ApiKeyAuthenticatorModel,
756
739
  config: Config,
757
740
  token_provider: Optional[TokenProvider] = None,
@@ -773,8 +756,10 @@ class ModelToComponentFactory:
773
756
  )
774
757
 
775
758
  request_option = (
776
- self._create_component_from_model(
777
- model.inject_into, config, parameters=model.parameters or {}
759
+ RequestOption(
760
+ inject_into=RequestOptionType(model.inject_into.inject_into.value),
761
+ field_name=model.inject_into.field_name,
762
+ parameters=model.parameters or {},
778
763
  )
779
764
  if model.inject_into
780
765
  else RequestOption(
@@ -783,7 +768,6 @@ class ModelToComponentFactory:
783
768
  parameters=model.parameters or {},
784
769
  )
785
770
  )
786
-
787
771
  return ApiKeyAuthenticator(
788
772
  token_provider=(
789
773
  token_provider
@@ -865,7 +849,7 @@ class ModelToComponentFactory:
865
849
  token_provider=token_provider,
866
850
  )
867
851
  else:
868
- return self.create_api_key_authenticator(
852
+ return ModelToComponentFactory.create_api_key_authenticator(
869
853
  ApiKeyAuthenticatorModel(
870
854
  type="ApiKeyAuthenticator",
871
855
  api_token="",
@@ -951,17 +935,6 @@ class ModelToComponentFactory:
951
935
  parameters={},
952
936
  )
953
937
 
954
- @staticmethod
955
- def apply_stream_state_migrations(
956
- stream_state_migrations: List[Any] | None, stream_state: MutableMapping[str, Any]
957
- ) -> MutableMapping[str, Any]:
958
- if stream_state_migrations:
959
- for state_migration in stream_state_migrations:
960
- if state_migration.should_migrate(stream_state):
961
- # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
962
- stream_state = dict(state_migration.migrate(stream_state))
963
- return stream_state
964
-
965
938
  def create_concurrent_cursor_from_datetime_based_cursor(
966
939
  self,
967
940
  model_type: Type[BaseModel],
@@ -971,7 +944,6 @@ class ModelToComponentFactory:
971
944
  config: Config,
972
945
  message_repository: Optional[MessageRepository] = None,
973
946
  runtime_lookback_window: Optional[datetime.timedelta] = None,
974
- stream_state_migrations: Optional[List[Any]] = None,
975
947
  **kwargs: Any,
976
948
  ) -> ConcurrentCursor:
977
949
  # Per-partition incremental streams can dynamically create child cursors which will pass their current
@@ -982,7 +954,6 @@ class ModelToComponentFactory:
982
954
  if "stream_state" not in kwargs
983
955
  else kwargs["stream_state"]
984
956
  )
985
- stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
986
957
 
987
958
  component_type = component_definition.get("type")
988
959
  if component_definition.get("type") != model_type.__name__:
@@ -1218,7 +1189,6 @@ class ModelToComponentFactory:
1218
1189
  config: Config,
1219
1190
  stream_state: MutableMapping[str, Any],
1220
1191
  partition_router: PartitionRouter,
1221
- stream_state_migrations: Optional[List[Any]] = None,
1222
1192
  **kwargs: Any,
1223
1193
  ) -> ConcurrentPerPartitionCursor:
1224
1194
  component_type = component_definition.get("type")
@@ -1267,10 +1237,8 @@ class ModelToComponentFactory:
1267
1237
  stream_namespace=stream_namespace,
1268
1238
  config=config,
1269
1239
  message_repository=NoopMessageRepository(),
1270
- stream_state_migrations=stream_state_migrations,
1271
1240
  )
1272
1241
  )
1273
- stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
1274
1242
 
1275
1243
  # Return the concurrent cursor and state converter
1276
1244
  return ConcurrentPerPartitionCursor(
@@ -1521,15 +1489,19 @@ class ModelToComponentFactory:
1521
1489
  )
1522
1490
 
1523
1491
  end_time_option = (
1524
- self._create_component_from_model(
1525
- model.end_time_option, config, parameters=model.parameters or {}
1492
+ RequestOption(
1493
+ inject_into=RequestOptionType(model.end_time_option.inject_into.value),
1494
+ field_name=model.end_time_option.field_name,
1495
+ parameters=model.parameters or {},
1526
1496
  )
1527
1497
  if model.end_time_option
1528
1498
  else None
1529
1499
  )
1530
1500
  start_time_option = (
1531
- self._create_component_from_model(
1532
- model.start_time_option, config, parameters=model.parameters or {}
1501
+ RequestOption(
1502
+ inject_into=RequestOptionType(model.start_time_option.inject_into.value),
1503
+ field_name=model.start_time_option.field_name,
1504
+ parameters=model.parameters or {},
1533
1505
  )
1534
1506
  if model.start_time_option
1535
1507
  else None
@@ -1600,15 +1572,19 @@ class ModelToComponentFactory:
1600
1572
  cursor_model = model.incremental_sync
1601
1573
 
1602
1574
  end_time_option = (
1603
- self._create_component_from_model(
1604
- cursor_model.end_time_option, config, parameters=cursor_model.parameters or {}
1575
+ RequestOption(
1576
+ inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
1577
+ field_name=cursor_model.end_time_option.field_name,
1578
+ parameters=cursor_model.parameters or {},
1605
1579
  )
1606
1580
  if cursor_model.end_time_option
1607
1581
  else None
1608
1582
  )
1609
1583
  start_time_option = (
1610
- self._create_component_from_model(
1611
- cursor_model.start_time_option, config, parameters=cursor_model.parameters or {}
1584
+ RequestOption(
1585
+ inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
1586
+ field_name=cursor_model.start_time_option.field_name,
1587
+ parameters=cursor_model.parameters or {},
1612
1588
  )
1613
1589
  if cursor_model.start_time_option
1614
1590
  else None
@@ -1680,7 +1656,7 @@ class ModelToComponentFactory:
1680
1656
  ) -> Optional[PartitionRouter]:
1681
1657
  if (
1682
1658
  hasattr(model, "partition_router")
1683
- and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
1659
+ and isinstance(model, SimpleRetrieverModel)
1684
1660
  and model.partition_router
1685
1661
  ):
1686
1662
  stream_slicer_model = model.partition_router
@@ -1714,31 +1690,6 @@ class ModelToComponentFactory:
1714
1690
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1715
1691
 
1716
1692
  if model.incremental_sync and stream_slicer:
1717
- if model.retriever.type == "AsyncRetriever":
1718
- if model.incremental_sync.type != "DatetimeBasedCursor":
1719
- # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1720
- raise ValueError(
1721
- "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1722
- )
1723
- if stream_slicer:
1724
- return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1725
- state_manager=self._connector_state_manager,
1726
- model_type=DatetimeBasedCursorModel,
1727
- component_definition=model.incremental_sync.__dict__,
1728
- stream_name=model.name or "",
1729
- stream_namespace=None,
1730
- config=config or {},
1731
- stream_state={},
1732
- partition_router=stream_slicer,
1733
- )
1734
- return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1735
- model_type=DatetimeBasedCursorModel,
1736
- component_definition=model.incremental_sync.__dict__,
1737
- stream_name=model.name or "",
1738
- stream_namespace=None,
1739
- config=config or {},
1740
- )
1741
-
1742
1693
  incremental_sync_model = model.incremental_sync
1743
1694
  if (
1744
1695
  hasattr(incremental_sync_model, "global_substream_cursor")
@@ -1779,7 +1730,6 @@ class ModelToComponentFactory:
1779
1730
  stream_name=model.name or "",
1780
1731
  stream_namespace=None,
1781
1732
  config=config or {},
1782
- stream_state_migrations=model.state_migrations,
1783
1733
  )
1784
1734
  return (
1785
1735
  self._create_component_from_model(model=model.incremental_sync, config=config)
@@ -1936,8 +1886,6 @@ class ModelToComponentFactory:
1936
1886
  )
1937
1887
  )
1938
1888
 
1939
- api_budget = self._api_budget
1940
-
1941
1889
  request_options_provider = InterpolatedRequestOptionsProvider(
1942
1890
  request_body_data=model.request_body_data,
1943
1891
  request_body_json=model.request_body_json,
@@ -1958,7 +1906,6 @@ class ModelToComponentFactory:
1958
1906
  path=model.path,
1959
1907
  authenticator=authenticator,
1960
1908
  error_handler=error_handler,
1961
- api_budget=api_budget,
1962
1909
  http_method=HttpMethod[model.http_method.value],
1963
1910
  request_options_provider=request_options_provider,
1964
1911
  config=config,
@@ -2088,26 +2035,25 @@ class ModelToComponentFactory:
2088
2035
  )
2089
2036
 
2090
2037
  @staticmethod
2091
- def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2038
+ def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
2092
2039
  return JsonDecoder(parameters={})
2093
2040
 
2094
2041
  @staticmethod
2095
- def create_csv_decoder(model: CsvDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2096
- return CompositeRawDecoder(
2097
- parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2098
- )
2042
+ def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
2043
+ encoding = model.encoding if model.encoding else "utf-8"
2044
+ return JsonParser(encoding=encoding)
2099
2045
 
2100
2046
  @staticmethod
2101
- def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2102
- return CompositeRawDecoder(
2103
- parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2104
- )
2047
+ def create_jsonl_decoder(
2048
+ model: JsonlDecoderModel, config: Config, **kwargs: Any
2049
+ ) -> JsonlDecoder:
2050
+ return JsonlDecoder(parameters={})
2105
2051
 
2106
2052
  @staticmethod
2107
- def create_gzip_decoder(model: GzipDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2108
- return CompositeRawDecoder(
2109
- parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2110
- )
2053
+ def create_json_line_parser(
2054
+ model: JsonLineParserModel, config: Config, **kwargs: Any
2055
+ ) -> JsonLineParser:
2056
+ return JsonLineParser(encoding=model.encoding)
2111
2057
 
2112
2058
  @staticmethod
2113
2059
  def create_iterable_decoder(
@@ -2119,30 +2065,33 @@ class ModelToComponentFactory:
2119
2065
  def create_xml_decoder(model: XmlDecoderModel, config: Config, **kwargs: Any) -> XmlDecoder:
2120
2066
  return XmlDecoder(parameters={})
2121
2067
 
2068
+ @staticmethod
2069
+ def create_gzipjson_decoder(
2070
+ model: GzipJsonDecoderModel, config: Config, **kwargs: Any
2071
+ ) -> GzipJsonDecoder:
2072
+ return GzipJsonDecoder(parameters={}, encoding=model.encoding)
2073
+
2122
2074
  def create_zipfile_decoder(
2123
2075
  self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2124
2076
  ) -> ZipfileDecoder:
2125
- return ZipfileDecoder(parser=ModelToComponentFactory._get_parser(model.decoder, config))
2077
+ parser = self._create_component_from_model(model=model.parser, config=config)
2078
+ return ZipfileDecoder(parser=parser)
2079
+
2080
+ def create_gzip_parser(
2081
+ self, model: GzipParserModel, config: Config, **kwargs: Any
2082
+ ) -> GzipParser:
2083
+ inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
2084
+ return GzipParser(inner_parser=inner_parser)
2126
2085
 
2127
2086
  @staticmethod
2128
- def _get_parser(model: BaseModel, config: Config) -> Parser:
2129
- if isinstance(model, JsonDecoderModel):
2130
- # Note that the logic is a bit different from the JsonDecoder as there is some legacy that is maintained to return {} on error cases
2131
- return JsonParser()
2132
- elif isinstance(model, JsonlDecoderModel):
2133
- return JsonLineParser()
2134
- elif isinstance(model, CsvDecoderModel):
2135
- return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2136
- elif isinstance(model, GzipDecoderModel):
2137
- return GzipParser(
2138
- inner_parser=ModelToComponentFactory._get_parser(model.decoder, config)
2139
- )
2140
- elif isinstance(
2141
- model, (CustomDecoderModel, IterableDecoderModel, XmlDecoderModel, ZipfileDecoderModel)
2142
- ):
2143
- raise ValueError(f"Decoder type {model} does not have parser associated to it")
2087
+ def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
2088
+ return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
2144
2089
 
2145
- raise ValueError(f"Unknown decoder type {model}")
2090
+ def create_composite_raw_decoder(
2091
+ self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
2092
+ ) -> CompositeRawDecoder:
2093
+ parser = self._create_component_from_model(model=model.parser, config=config)
2094
+ return CompositeRawDecoder(parser=parser)
2146
2095
 
2147
2096
  @staticmethod
2148
2097
  def create_json_file_schema_loader(
@@ -2176,11 +2125,16 @@ class ModelToComponentFactory:
2176
2125
  additional_jwt_payload=model.additional_jwt_payload,
2177
2126
  )
2178
2127
 
2128
+ @staticmethod
2179
2129
  def create_list_partition_router(
2180
- self, model: ListPartitionRouterModel, config: Config, **kwargs: Any
2130
+ model: ListPartitionRouterModel, config: Config, **kwargs: Any
2181
2131
  ) -> ListPartitionRouter:
2182
2132
  request_option = (
2183
- self._create_component_from_model(model.request_option, config)
2133
+ RequestOption(
2134
+ inject_into=RequestOptionType(model.request_option.inject_into.value),
2135
+ field_name=model.request_option.field_name,
2136
+ parameters=model.parameters or {},
2137
+ )
2184
2138
  if model.request_option
2185
2139
  else None
2186
2140
  )
@@ -2376,25 +2330,7 @@ class ModelToComponentFactory:
2376
2330
  model: RequestOptionModel, config: Config, **kwargs: Any
2377
2331
  ) -> RequestOption:
2378
2332
  inject_into = RequestOptionType(model.inject_into.value)
2379
- field_path: Optional[List[Union[InterpolatedString, str]]] = (
2380
- [
2381
- InterpolatedString.create(segment, parameters=kwargs.get("parameters", {}))
2382
- for segment in model.field_path
2383
- ]
2384
- if model.field_path
2385
- else None
2386
- )
2387
- field_name = (
2388
- InterpolatedString.create(model.field_name, parameters=kwargs.get("parameters", {}))
2389
- if model.field_name
2390
- else None
2391
- )
2392
- return RequestOption(
2393
- field_name=field_name,
2394
- field_path=field_path,
2395
- inject_into=inject_into,
2396
- parameters=kwargs.get("parameters", {}),
2397
- )
2333
+ return RequestOption(field_name=model.field_name, inject_into=inject_into, parameters={})
2398
2334
 
2399
2335
  def create_record_selector(
2400
2336
  self,
@@ -2415,8 +2351,6 @@ class ModelToComponentFactory:
2415
2351
  if model.record_filter
2416
2352
  else None
2417
2353
  )
2418
-
2419
- transform_before_filtering = False
2420
2354
  if client_side_incremental_sync:
2421
2355
  record_filter = ClientSideIncrementalRecordFilterDecorator(
2422
2356
  config=config,
@@ -2426,8 +2360,6 @@ class ModelToComponentFactory:
2426
2360
  else None,
2427
2361
  **client_side_incremental_sync,
2428
2362
  )
2429
- transform_before_filtering = True
2430
-
2431
2363
  schema_normalization = (
2432
2364
  TypeTransformer(SCHEMA_TRANSFORMER_TYPE_MAPPING[model.schema_normalization])
2433
2365
  if isinstance(model.schema_normalization, SchemaNormalizationModel)
@@ -2442,7 +2374,6 @@ class ModelToComponentFactory:
2442
2374
  transformations=transformations or [],
2443
2375
  schema_normalization=schema_normalization,
2444
2376
  parameters=model.parameters or {},
2445
- transform_before_filtering=transform_before_filtering,
2446
2377
  )
2447
2378
 
2448
2379
  @staticmethod
@@ -2963,84 +2894,3 @@ class ModelToComponentFactory:
2963
2894
  return isinstance(parser.inner_parser, JsonParser)
2964
2895
  else:
2965
2896
  return False
2966
-
2967
- def create_http_api_budget(
2968
- self, model: HTTPAPIBudgetModel, config: Config, **kwargs: Any
2969
- ) -> HttpAPIBudget:
2970
- policies = [
2971
- self._create_component_from_model(model=policy, config=config)
2972
- for policy in model.policies
2973
- ]
2974
-
2975
- return HttpAPIBudget(
2976
- policies=policies,
2977
- ratelimit_reset_header=model.ratelimit_reset_header or "ratelimit-reset",
2978
- ratelimit_remaining_header=model.ratelimit_remaining_header or "ratelimit-remaining",
2979
- status_codes_for_ratelimit_hit=model.status_codes_for_ratelimit_hit or [429],
2980
- )
2981
-
2982
- def create_fixed_window_call_rate_policy(
2983
- self, model: FixedWindowCallRatePolicyModel, config: Config, **kwargs: Any
2984
- ) -> FixedWindowCallRatePolicy:
2985
- matchers = [
2986
- self._create_component_from_model(model=matcher, config=config)
2987
- for matcher in model.matchers
2988
- ]
2989
-
2990
- # Set the initial reset timestamp to 10 days from now.
2991
- # This value will be updated by the first request.
2992
- return FixedWindowCallRatePolicy(
2993
- next_reset_ts=datetime.datetime.now() + datetime.timedelta(days=10),
2994
- period=parse_duration(model.period),
2995
- call_limit=model.call_limit,
2996
- matchers=matchers,
2997
- )
2998
-
2999
- def create_moving_window_call_rate_policy(
3000
- self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3001
- ) -> MovingWindowCallRatePolicy:
3002
- rates = [
3003
- self._create_component_from_model(model=rate, config=config) for rate in model.rates
3004
- ]
3005
- matchers = [
3006
- self._create_component_from_model(model=matcher, config=config)
3007
- for matcher in model.matchers
3008
- ]
3009
- return MovingWindowCallRatePolicy(
3010
- rates=rates,
3011
- matchers=matchers,
3012
- )
3013
-
3014
- def create_unlimited_call_rate_policy(
3015
- self, model: UnlimitedCallRatePolicyModel, config: Config, **kwargs: Any
3016
- ) -> UnlimitedCallRatePolicy:
3017
- matchers = [
3018
- self._create_component_from_model(model=matcher, config=config)
3019
- for matcher in model.matchers
3020
- ]
3021
-
3022
- return UnlimitedCallRatePolicy(
3023
- matchers=matchers,
3024
- )
3025
-
3026
- def create_rate(self, model: RateModel, config: Config, **kwargs: Any) -> Rate:
3027
- return Rate(
3028
- limit=model.limit,
3029
- interval=parse_duration(model.interval),
3030
- )
3031
-
3032
- def create_http_request_matcher(
3033
- self, model: HttpRequestRegexMatcherModel, config: Config, **kwargs: Any
3034
- ) -> HttpRequestRegexMatcher:
3035
- return HttpRequestRegexMatcher(
3036
- method=model.method,
3037
- url_base=model.url_base,
3038
- url_path_pattern=model.url_path_pattern,
3039
- params=model.params,
3040
- headers=model.headers,
3041
- )
3042
-
3043
- def set_api_budget(self, component_definition: ComponentDefinition, config: Config) -> None:
3044
- self._api_budget = self.create_component(
3045
- model_type=HTTPAPIBudgetModel, component_definition=component_definition, config=config
3046
- )
@@ -4,9 +4,9 @@ from dataclasses import InitVar, dataclass, field
4
4
  from typing import Any, Callable, Iterable, Mapping, Optional
5
5
 
6
6
  from airbyte_cdk.models import FailureType
7
- from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
8
7
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
9
8
  AsyncJobOrchestrator,
9
+ AsyncPartition,
10
10
  )
11
11
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
12
12
  SinglePartitionRouter,
@@ -42,12 +42,12 @@ class AsyncJobPartitionRouter(StreamSlicer):
42
42
 
43
43
  for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
44
44
  yield StreamSlice(
45
- partition=dict(completed_partition.stream_slice.partition),
45
+ partition=dict(completed_partition.stream_slice.partition)
46
+ | {"partition": completed_partition},
46
47
  cursor_slice=completed_partition.stream_slice.cursor_slice,
47
- extra_fields={"jobs": list(completed_partition.jobs)},
48
48
  )
49
49
 
50
- def fetch_records(self, async_jobs: Iterable[AsyncJob]) -> Iterable[Mapping[str, Any]]:
50
+ def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
51
51
  """
52
52
  This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
53
53
  be responsible for. However, this was added in because the JobOrchestrator is required to
@@ -62,4 +62,4 @@ class AsyncJobPartitionRouter(StreamSlicer):
62
62
  failure_type=FailureType.system_error,
63
63
  )
64
64
 
65
- return self._job_orchestrator.fetch_records(async_jobs=async_jobs)
65
+ return self._job_orchestrator.fetch_records(partition=partition)
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  from dataclasses import InitVar, dataclass
6
- from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
6
+ from typing import Any, Iterable, List, Mapping, Optional, Union
7
7
 
8
8
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
9
9
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
@@ -100,9 +100,7 @@ class ListPartitionRouter(PartitionRouter):
100
100
  ):
101
101
  slice_value = stream_slice.get(self._cursor_field.eval(self.config))
102
102
  if slice_value:
103
- options: MutableMapping[str, Any] = {}
104
- self.request_option.inject_into_request(options, slice_value, self.config)
105
- return options
103
+ return {self.request_option.field_name.eval(self.config): slice_value} # type: ignore # field_name is always casted to InterpolatedString
106
104
  else:
107
105
  return {}
108
106
  else: