airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
  2. airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
  3. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
  4. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
  5. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
  6. airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
  7. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
  8. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
  9. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  10. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  11. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  12. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
  13. airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
  14. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
  16. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  17. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
  18. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  19. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
  20. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
  21. airbyte_cdk/sources/file_based/exceptions.py +34 -0
  22. airbyte_cdk/sources/file_based/file_based_source.py +28 -5
  23. airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
  24. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
  25. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
  26. airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
  27. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
  28. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
  29. airbyte_cdk/sources/types.py +3 -0
  30. airbyte_cdk/sources/utils/transform.py +29 -3
  31. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
  32. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
  33. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
  34. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
@@ -54,7 +54,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
54
54
  SessionTokenProvider,
55
55
  TokenProvider,
56
56
  )
57
- from airbyte_cdk.sources.declarative.checks import CheckStream
57
+ from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
58
58
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
59
59
  from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
60
60
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
@@ -66,12 +66,15 @@ from airbyte_cdk.sources.declarative.decoders import (
66
66
  JsonlDecoder,
67
67
  PaginationDecoderDecorator,
68
68
  XmlDecoder,
69
+ ZipfileDecoder,
69
70
  )
70
71
  from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
71
72
  CompositeRawDecoder,
72
73
  CsvParser,
73
74
  GzipParser,
74
75
  JsonLineParser,
76
+ JsonParser,
77
+ Parser,
75
78
  )
76
79
  from airbyte_cdk.sources.declarative.extractors import (
77
80
  DpathExtractor,
@@ -84,8 +87,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
84
87
  )
85
88
  from airbyte_cdk.sources.declarative.incremental import (
86
89
  ChildPartitionResumableFullRefreshCursor,
87
- ConcurrentCursorFactory,
88
- ConcurrentPerPartitionCursor,
89
90
  CursorFactory,
90
91
  DatetimeBasedCursor,
91
92
  DeclarativeCursor,
@@ -123,6 +124,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
123
124
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
124
125
  BearerAuthenticator as BearerAuthenticatorModel,
125
126
  )
127
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
128
+ CheckDynamicStream as CheckDynamicStreamModel,
129
+ )
126
130
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
127
131
  CheckStream as CheckStreamModel,
128
132
  )
@@ -249,6 +253,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
249
253
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
250
254
  JsonLineParser as JsonLineParserModel,
251
255
  )
256
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
257
+ JsonParser as JsonParserModel,
258
+ )
252
259
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
253
260
  JwtAuthenticator as JwtAuthenticatorModel,
254
261
  )
@@ -350,6 +357,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
350
357
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
351
358
  XmlDecoder as XmlDecoderModel,
352
359
  )
360
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
361
+ ZipfileDecoder as ZipfileDecoderModel,
362
+ )
353
363
  from airbyte_cdk.sources.declarative.partition_routers import (
354
364
  CartesianProductStreamSlicer,
355
365
  ListPartitionRouter,
@@ -440,7 +450,6 @@ from airbyte_cdk.sources.message import (
440
450
  InMemoryMessageRepository,
441
451
  LogAppenderMessageRepositoryDecorator,
442
452
  MessageRepository,
443
- NoopMessageRepository,
444
453
  )
445
454
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
446
455
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -491,6 +500,7 @@ class ModelToComponentFactory:
491
500
  BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
492
501
  BearerAuthenticatorModel: self.create_bearer_authenticator,
493
502
  CheckStreamModel: self.create_check_stream,
503
+ CheckDynamicStreamModel: self.create_check_dynamic_stream,
494
504
  CompositeErrorHandlerModel: self.create_composite_error_handler,
495
505
  CompositeRawDecoderModel: self.create_composite_raw_decoder,
496
506
  ConcurrencyLevelModel: self.create_concurrency_level,
@@ -525,6 +535,7 @@ class ModelToComponentFactory:
525
535
  JsonDecoderModel: self.create_json_decoder,
526
536
  JsonlDecoderModel: self.create_jsonl_decoder,
527
537
  JsonLineParserModel: self.create_json_line_parser,
538
+ JsonParserModel: self.create_json_parser,
528
539
  GzipJsonDecoderModel: self.create_gzipjson_decoder,
529
540
  GzipParserModel: self.create_gzip_parser,
530
541
  KeysToLowerModel: self.create_keys_to_lower_transformation,
@@ -564,6 +575,7 @@ class ModelToComponentFactory:
564
575
  ConfigComponentsResolverModel: self.create_config_components_resolver,
565
576
  StreamConfigModel: self.create_stream_config,
566
577
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
578
+ ZipfileDecoderModel: self.create_zipfile_decoder,
567
579
  }
568
580
 
569
581
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -843,6 +855,12 @@ class ModelToComponentFactory:
843
855
  def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
844
856
  return CheckStream(stream_names=model.stream_names, parameters={})
845
857
 
858
+ @staticmethod
859
+ def create_check_dynamic_stream(
860
+ model: CheckDynamicStreamModel, config: Config, **kwargs: Any
861
+ ) -> CheckDynamicStream:
862
+ return CheckDynamicStream(stream_count=model.stream_count, parameters={})
863
+
846
864
  def create_composite_error_handler(
847
865
  self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
848
866
  ) -> CompositeErrorHandler:
@@ -874,8 +892,6 @@ class ModelToComponentFactory:
874
892
  stream_namespace: Optional[str],
875
893
  config: Config,
876
894
  stream_state: MutableMapping[str, Any],
877
- message_repository: Optional[MessageRepository] = None,
878
- runtime_lookback_window: Optional[int] = None,
879
895
  **kwargs: Any,
880
896
  ) -> ConcurrentCursor:
881
897
  component_type = component_definition.get("type")
@@ -933,11 +949,6 @@ class ModelToComponentFactory:
933
949
  if evaluated_lookback_window:
934
950
  lookback_window = parse_duration(evaluated_lookback_window)
935
951
 
936
- if runtime_lookback_window and lookback_window:
937
- lookback_window = max(lookback_window, runtime_lookback_window)
938
- elif runtime_lookback_window:
939
- lookback_window = runtime_lookback_window
940
-
941
952
  connector_state_converter: DateTimeStreamStateConverter
942
953
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
943
954
  datetime_format=datetime_format,
@@ -1016,7 +1027,7 @@ class ModelToComponentFactory:
1016
1027
  stream_name=stream_name,
1017
1028
  stream_namespace=stream_namespace,
1018
1029
  stream_state=stream_state,
1019
- message_repository=message_repository or self._message_repository,
1030
+ message_repository=self._message_repository,
1020
1031
  connector_state_manager=state_manager,
1021
1032
  connector_state_converter=connector_state_converter,
1022
1033
  cursor_field=cursor_field,
@@ -1028,63 +1039,6 @@ class ModelToComponentFactory:
1028
1039
  cursor_granularity=cursor_granularity,
1029
1040
  )
1030
1041
 
1031
- def create_concurrent_cursor_from_perpartition_cursor(
1032
- self,
1033
- state_manager: ConnectorStateManager,
1034
- model_type: Type[BaseModel],
1035
- component_definition: ComponentDefinition,
1036
- stream_name: str,
1037
- stream_namespace: Optional[str],
1038
- config: Config,
1039
- stream_state: MutableMapping[str, Any],
1040
- partition_router: PartitionRouter,
1041
- **kwargs: Any,
1042
- ) -> ConcurrentPerPartitionCursor:
1043
- component_type = component_definition.get("type")
1044
- if component_definition.get("type") != model_type.__name__:
1045
- raise ValueError(
1046
- f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1047
- )
1048
-
1049
- datetime_based_cursor_model = model_type.parse_obj(component_definition)
1050
-
1051
- if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1052
- raise ValueError(
1053
- f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1054
- )
1055
-
1056
- interpolated_cursor_field = InterpolatedString.create(
1057
- datetime_based_cursor_model.cursor_field,
1058
- parameters=datetime_based_cursor_model.parameters or {},
1059
- )
1060
- cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1061
-
1062
- # Create the cursor factory
1063
- cursor_factory = ConcurrentCursorFactory(
1064
- partial(
1065
- self.create_concurrent_cursor_from_datetime_based_cursor,
1066
- state_manager=state_manager,
1067
- model_type=model_type,
1068
- component_definition=component_definition,
1069
- stream_name=stream_name,
1070
- stream_namespace=stream_namespace,
1071
- config=config,
1072
- message_repository=NoopMessageRepository(),
1073
- )
1074
- )
1075
-
1076
- # Return the concurrent cursor and state converter
1077
- return ConcurrentPerPartitionCursor(
1078
- cursor_factory=cursor_factory,
1079
- partition_router=partition_router,
1080
- stream_name=stream_name,
1081
- stream_namespace=stream_namespace,
1082
- stream_state=stream_state,
1083
- message_repository=self._message_repository, # type: ignore
1084
- connector_state_manager=state_manager,
1085
- cursor_field=cursor_field,
1086
- )
1087
-
1088
1042
  @staticmethod
1089
1043
  def create_constant_backoff_strategy(
1090
1044
  model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1099,17 +1053,17 @@ class ModelToComponentFactory:
1099
1053
  self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1100
1054
  ) -> CursorPaginationStrategy:
1101
1055
  if isinstance(decoder, PaginationDecoderDecorator):
1102
- if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
1103
- raise ValueError(
1104
- f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1105
- )
1056
+ inner_decoder = decoder.decoder
1057
+ else:
1058
+ inner_decoder = decoder
1059
+ decoder = PaginationDecoderDecorator(decoder=decoder)
1060
+
1061
+ if self._is_supported_decoder_for_pagination(inner_decoder):
1106
1062
  decoder_to_use = decoder
1107
1063
  else:
1108
- if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1109
- raise ValueError(
1110
- f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1111
- )
1112
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1064
+ raise ValueError(
1065
+ self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1066
+ )
1113
1067
 
1114
1068
  return CursorPaginationStrategy(
1115
1069
  cursor_value=model.cursor_value,
@@ -1367,15 +1321,18 @@ class ModelToComponentFactory:
1367
1321
  raise ValueError(
1368
1322
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1369
1323
  )
1370
- cursor = (
1371
- combined_slicers
1372
- if isinstance(
1373
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1374
- )
1375
- else self._create_component_from_model(model=model.incremental_sync, config=config)
1376
- )
1377
-
1378
- client_side_incremental_sync = {"cursor": cursor}
1324
+ client_side_incremental_sync = {
1325
+ "date_time_based_cursor": self._create_component_from_model(
1326
+ model=model.incremental_sync, config=config
1327
+ ),
1328
+ "substream_cursor": (
1329
+ combined_slicers
1330
+ if isinstance(
1331
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1332
+ )
1333
+ else None
1334
+ ),
1335
+ }
1379
1336
 
1380
1337
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1381
1338
  cursor_model = model.incremental_sync
@@ -1579,11 +1536,10 @@ class ModelToComponentFactory:
1579
1536
  cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
1580
1537
  ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
1581
1538
  if decoder:
1582
- if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1583
- raise ValueError(
1584
- f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1585
- )
1586
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1539
+ if self._is_supported_decoder_for_pagination(decoder):
1540
+ decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1541
+ else:
1542
+ raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
1587
1543
  else:
1588
1544
  decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
1589
1545
  page_size_option = (
@@ -1745,7 +1701,11 @@ class ModelToComponentFactory:
1745
1701
 
1746
1702
  @staticmethod
1747
1703
  def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
1748
- return TypesMap(target_type=model.target_type, current_type=model.current_type)
1704
+ return TypesMap(
1705
+ target_type=model.target_type,
1706
+ current_type=model.current_type,
1707
+ condition=model.condition if model.condition is not None else "True",
1708
+ )
1749
1709
 
1750
1710
  def create_schema_type_identifier(
1751
1711
  self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
@@ -1812,6 +1772,11 @@ class ModelToComponentFactory:
1812
1772
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
1813
1773
  return JsonDecoder(parameters={})
1814
1774
 
1775
+ @staticmethod
1776
+ def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
1777
+ encoding = model.encoding if model.encoding else "utf-8"
1778
+ return JsonParser(encoding=encoding)
1779
+
1815
1780
  @staticmethod
1816
1781
  def create_jsonl_decoder(
1817
1782
  model: JsonlDecoderModel, config: Config, **kwargs: Any
@@ -1840,6 +1805,12 @@ class ModelToComponentFactory:
1840
1805
  ) -> GzipJsonDecoder:
1841
1806
  return GzipJsonDecoder(parameters={}, encoding=model.encoding)
1842
1807
 
1808
+ def create_zipfile_decoder(
1809
+ self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
1810
+ ) -> ZipfileDecoder:
1811
+ parser = self._create_component_from_model(model=model.parser, config=config)
1812
+ return ZipfileDecoder(parser=parser)
1813
+
1843
1814
  def create_gzip_parser(
1844
1815
  self, model: GzipParserModel, config: Config, **kwargs: Any
1845
1816
  ) -> GzipParser:
@@ -1949,21 +1920,33 @@ class ModelToComponentFactory:
1949
1920
  expires_in_name=InterpolatedString.create(
1950
1921
  model.expires_in_name or "expires_in", parameters=model.parameters or {}
1951
1922
  ).eval(config),
1923
+ client_id_name=InterpolatedString.create(
1924
+ model.client_id_name or "client_id", parameters=model.parameters or {}
1925
+ ).eval(config),
1952
1926
  client_id=InterpolatedString.create(
1953
1927
  model.client_id, parameters=model.parameters or {}
1954
1928
  ).eval(config),
1929
+ client_secret_name=InterpolatedString.create(
1930
+ model.client_secret_name or "client_secret", parameters=model.parameters or {}
1931
+ ).eval(config),
1955
1932
  client_secret=InterpolatedString.create(
1956
1933
  model.client_secret, parameters=model.parameters or {}
1957
1934
  ).eval(config),
1958
1935
  access_token_config_path=model.refresh_token_updater.access_token_config_path,
1959
1936
  refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
1960
1937
  token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
1938
+ grant_type_name=InterpolatedString.create(
1939
+ model.grant_type_name or "grant_type", parameters=model.parameters or {}
1940
+ ).eval(config),
1961
1941
  grant_type=InterpolatedString.create(
1962
1942
  model.grant_type or "refresh_token", parameters=model.parameters or {}
1963
1943
  ).eval(config),
1964
1944
  refresh_request_body=InterpolatedMapping(
1965
1945
  model.refresh_request_body or {}, parameters=model.parameters or {}
1966
1946
  ).eval(config),
1947
+ refresh_request_headers=InterpolatedMapping(
1948
+ model.refresh_request_headers or {}, parameters=model.parameters or {}
1949
+ ).eval(config),
1967
1950
  scopes=model.scopes,
1968
1951
  token_expiry_date_format=model.token_expiry_date_format,
1969
1952
  message_repository=self._message_repository,
@@ -1975,11 +1958,16 @@ class ModelToComponentFactory:
1975
1958
  return DeclarativeOauth2Authenticator( # type: ignore
1976
1959
  access_token_name=model.access_token_name or "access_token",
1977
1960
  access_token_value=model.access_token_value,
1961
+ client_id_name=model.client_id_name or "client_id",
1978
1962
  client_id=model.client_id,
1963
+ client_secret_name=model.client_secret_name or "client_secret",
1979
1964
  client_secret=model.client_secret,
1980
1965
  expires_in_name=model.expires_in_name or "expires_in",
1966
+ grant_type_name=model.grant_type_name or "grant_type",
1981
1967
  grant_type=model.grant_type or "refresh_token",
1982
1968
  refresh_request_body=model.refresh_request_body,
1969
+ refresh_request_headers=model.refresh_request_headers,
1970
+ refresh_token_name=model.refresh_token_name or "refresh_token",
1983
1971
  refresh_token=model.refresh_token,
1984
1972
  scopes=model.scopes,
1985
1973
  token_expiry_date=model.token_expiry_date,
@@ -1991,22 +1979,22 @@ class ModelToComponentFactory:
1991
1979
  message_repository=self._message_repository,
1992
1980
  )
1993
1981
 
1994
- @staticmethod
1995
1982
  def create_offset_increment(
1996
- model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
1983
+ self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
1997
1984
  ) -> OffsetIncrement:
1998
1985
  if isinstance(decoder, PaginationDecoderDecorator):
1999
- if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
2000
- raise ValueError(
2001
- f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
2002
- )
1986
+ inner_decoder = decoder.decoder
1987
+ else:
1988
+ inner_decoder = decoder
1989
+ decoder = PaginationDecoderDecorator(decoder=decoder)
1990
+
1991
+ if self._is_supported_decoder_for_pagination(inner_decoder):
2003
1992
  decoder_to_use = decoder
2004
1993
  else:
2005
- if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
2006
- raise ValueError(
2007
- f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
2008
- )
2009
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1994
+ raise ValueError(
1995
+ self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1996
+ )
1997
+
2010
1998
  return OffsetIncrement(
2011
1999
  page_size=model.page_size,
2012
2000
  config=config,
@@ -2191,7 +2179,7 @@ class ModelToComponentFactory:
2191
2179
  if (
2192
2180
  not isinstance(stream_slicer, DatetimeBasedCursor)
2193
2181
  or type(stream_slicer) is not DatetimeBasedCursor
2194
- ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2182
+ ):
2195
2183
  # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
2196
2184
  # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
2197
2185
  # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
@@ -2351,7 +2339,7 @@ class ModelToComponentFactory:
2351
2339
  extractor=download_extractor,
2352
2340
  name=name,
2353
2341
  record_filter=None,
2354
- transformations=[],
2342
+ transformations=transformations,
2355
2343
  schema_normalization=TypeTransformer(TransformConfig.NoTransform),
2356
2344
  config=config,
2357
2345
  parameters={},
@@ -2388,6 +2376,16 @@ class ModelToComponentFactory:
2388
2376
  if model.delete_requester
2389
2377
  else None
2390
2378
  )
2379
+ url_requester = (
2380
+ self._create_component_from_model(
2381
+ model=model.url_requester,
2382
+ decoder=decoder,
2383
+ config=config,
2384
+ name=f"job extract_url - {name}",
2385
+ )
2386
+ if model.url_requester
2387
+ else None
2388
+ )
2391
2389
  status_extractor = self._create_component_from_model(
2392
2390
  model=model.status_extractor, decoder=decoder, config=config, name=name
2393
2391
  )
@@ -2398,6 +2396,7 @@ class ModelToComponentFactory:
2398
2396
  creation_requester=creation_requester,
2399
2397
  polling_requester=polling_requester,
2400
2398
  download_retriever=download_retriever,
2399
+ url_requester=url_requester,
2401
2400
  abort_requester=abort_requester,
2402
2401
  delete_requester=delete_requester,
2403
2402
  status_extractor=status_extractor,
@@ -2595,3 +2594,25 @@ class ModelToComponentFactory:
2595
2594
  components_mapping=components_mapping,
2596
2595
  parameters=model.parameters or {},
2597
2596
  )
2597
+
2598
+ _UNSUPPORTED_DECODER_ERROR = (
2599
+ "Specified decoder of {decoder_type} is not supported for pagination."
2600
+ "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
2601
+ "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
2602
+ )
2603
+
2604
+ def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
2605
+ if isinstance(decoder, (JsonDecoder, XmlDecoder)):
2606
+ return True
2607
+ elif isinstance(decoder, CompositeRawDecoder):
2608
+ return self._is_supported_parser_for_pagination(decoder.parser)
2609
+ else:
2610
+ return False
2611
+
2612
+ def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
2613
+ if isinstance(parser, JsonParser):
2614
+ return True
2615
+ elif isinstance(parser, GzipParser):
2616
+ return isinstance(parser.inner_parser, JsonParser)
2617
+ else:
2618
+ return False
@@ -0,0 +1,56 @@
1
+ # AsyncHttpJobRepository sequence diagram
2
+
3
+ - Components marked as optional are not required and can be ignored.
4
+ - if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
5
+ - interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
6
+
7
+ ```mermaid
8
+ ---
9
+ title: AsyncHttpJobRepository Sequence Diagram
10
+ ---
11
+ sequenceDiagram
12
+ participant AsyncHttpJobRepository as AsyncOrchestrator
13
+ participant CreationRequester as creation_requester
14
+ participant PollingRequester as polling_requester
15
+ participant UrlRequester as url_requester (Optional)
16
+ participant DownloadRetriever as download_retriever
17
+ participant AbortRequester as abort_requester (Optional)
18
+ participant DeleteRequester as delete_requester (Optional)
19
+ participant Reporting Server as Async Reporting Server
20
+
21
+ AsyncHttpJobRepository ->> CreationRequester: Initiate job creation
22
+ CreationRequester ->> Reporting Server: Create job request
23
+ Reporting Server -->> CreationRequester: Job ID response
24
+ CreationRequester -->> AsyncHttpJobRepository: Job ID
25
+
26
+ loop Poll for job status
27
+ AsyncHttpJobRepository ->> PollingRequester: Check job status
28
+ PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
29
+ Reporting Server -->> PollingRequester: Status response
30
+ PollingRequester -->> AsyncHttpJobRepository: Job status
31
+ end
32
+
33
+ alt Status: Ready
34
+ AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
35
+ UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
36
+ Reporting Server -->> UrlRequester: Download URLs
37
+ UrlRequester -->> AsyncHttpJobRepository: Download URLs
38
+
39
+ AsyncHttpJobRepository ->> DownloadRetriever: Download reports
40
+ DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `url`)
41
+ Reporting Server -->> DownloadRetriever: Report data
42
+ DownloadRetriever -->> AsyncHttpJobRepository: Report data
43
+ else Status: Failed
44
+ AsyncHttpJobRepository ->> AbortRequester: Send abort request
45
+ AbortRequester ->> Reporting Server: Abort job
46
+ Reporting Server -->> AbortRequester: Abort confirmation
47
+ AbortRequester -->> AsyncHttpJobRepository: Confirmation
48
+ end
49
+
50
+ AsyncHttpJobRepository ->> DeleteRequester: Send delete job request
51
+ DeleteRequester ->> Reporting Server: Delete job
52
+ Reporting Server -->> DeleteRequester: Deletion confirmation
53
+ DeleteRequester -->> AsyncHttpJobRepository: Confirmation
54
+
55
+
56
+ ```
@@ -31,6 +31,10 @@ LOGGER = logging.getLogger("airbyte")
31
31
 
32
32
  @dataclass
33
33
  class AsyncHttpJobRepository(AsyncJobRepository):
34
+ """
35
+ See Readme file for more details about flow.
36
+ """
37
+
34
38
  creation_requester: Requester
35
39
  polling_requester: Requester
36
40
  download_retriever: SimpleRetriever
@@ -44,6 +48,9 @@ class AsyncHttpJobRepository(AsyncJobRepository):
44
48
  record_extractor: RecordExtractor = field(
45
49
  init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
46
50
  )
51
+ url_requester: Optional[Requester] = (
52
+ None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
53
+ )
47
54
 
48
55
  def __post_init__(self) -> None:
49
56
  self._create_job_response_by_id: Dict[str, Response] = {}
@@ -186,10 +193,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
186
193
 
187
194
  """
188
195
 
189
- for url in self.urls_extractor.extract_records(
190
- self._polling_job_response_by_id[job.api_job_id()]
191
- ):
192
- stream_slice: StreamSlice = StreamSlice(partition={"url": url}, cursor_slice={})
196
+ for url in self._get_download_url(job):
197
+ job_slice = job.job_parameters()
198
+ stream_slice = StreamSlice(
199
+ partition=job_slice.partition,
200
+ cursor_slice=job_slice.cursor_slice,
201
+ extra_fields={**job_slice.extra_fields, "url": url},
202
+ )
193
203
  for message in self.download_retriever.read_records({}, stream_slice):
194
204
  if isinstance(message, Record):
195
205
  yield message.data
@@ -226,3 +236,22 @@ class AsyncHttpJobRepository(AsyncJobRepository):
226
236
  cursor_slice={},
227
237
  )
228
238
  return stream_slice
239
+
240
+ def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
241
+ if not self.url_requester:
242
+ url_response = self._polling_job_response_by_id[job.api_job_id()]
243
+ else:
244
+ stream_slice: StreamSlice = StreamSlice(
245
+ partition={
246
+ "polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
247
+ },
248
+ cursor_slice={},
249
+ )
250
+ url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
251
+ if not url_response:
252
+ raise AirbyteTracedException(
253
+ internal_message="Always expect a response or an exception from url_requester",
254
+ failure_type=FailureType.system_error,
255
+ )
256
+
257
+ yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
160
160
  stream_slice,
161
161
  next_page_token,
162
162
  self._paginator.get_request_headers,
163
- self.request_option_provider.get_request_headers,
163
+ self.stream_slicer.get_request_headers,
164
164
  )
165
165
  if isinstance(headers, str):
166
166
  raise ValueError("Request headers cannot be a string")
@@ -10,6 +10,7 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Union
10
10
  import dpath
11
11
  from typing_extensions import deprecated
12
12
 
13
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
13
14
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14
15
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
15
16
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
@@ -53,6 +54,7 @@ class TypesMap:
53
54
 
54
55
  target_type: Union[List[str], str]
55
56
  current_type: Union[List[str], str]
57
+ condition: Optional[str]
56
58
 
57
59
 
58
60
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
@@ -177,7 +179,7 @@ class DynamicSchemaLoader(SchemaLoader):
177
179
  if field_type_path
178
180
  else "string"
179
181
  )
180
- mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
182
+ mapped_field_type = self._replace_type_if_not_valid(raw_field_type, raw_schema)
181
183
  if (
182
184
  isinstance(mapped_field_type, list)
183
185
  and len(mapped_field_type) == 2
@@ -194,14 +196,22 @@ class DynamicSchemaLoader(SchemaLoader):
194
196
  )
195
197
 
196
198
  def _replace_type_if_not_valid(
197
- self, field_type: Union[List[str], str]
199
+ self,
200
+ field_type: Union[List[str], str],
201
+ raw_schema: MutableMapping[str, Any],
198
202
  ) -> Union[List[str], str]:
199
203
  """
200
204
  Replaces a field type if it matches a type mapping in `types_map`.
201
205
  """
202
206
  if self.schema_type_identifier.types_mapping:
203
207
  for types_map in self.schema_type_identifier.types_mapping:
204
- if field_type == types_map.current_type:
208
+ # conditional is optional param, setting to true if not provided
209
+ condition = InterpolatedBoolean(
210
+ condition=types_map.condition if types_map.condition is not None else "True",
211
+ parameters={},
212
+ ).eval(config=self.config, raw_schema=raw_schema)
213
+
214
+ if field_type == types_map.current_type and condition:
205
215
  return types_map.target_type
206
216
  return field_type
207
217
 
@@ -31,6 +31,17 @@ class DeliverRawFiles(BaseModel):
31
31
 
32
32
  delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
33
33
 
34
+ preserve_directory_structure: bool = Field(
35
+ title="Preserve Sub-Directories in File Paths",
36
+ description=(
37
+ "If enabled, sends subdirectory folder structure "
38
+ "along with source file names to the destination. "
39
+ "Otherwise, files will be synced by their names only. "
40
+ "This option is ignored when file-based replication is not enabled."
41
+ ),
42
+ default=True,
43
+ )
44
+
34
45
 
35
46
  class AbstractFileBasedSpec(BaseModel):
36
47
  """