airbyte-cdk 6.20.0__py3-none-any.whl → 6.20.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. airbyte_cdk/sources/declarative/auth/oauth.py +0 -34
  2. airbyte_cdk/sources/declarative/checks/__init__.py +2 -18
  3. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
  4. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +21 -93
  5. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +0 -43
  6. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  7. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  8. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +331 -0
  9. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  10. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
  11. airbyte_cdk/sources/declarative/manifest_declarative_source.py +1 -2
  12. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -96
  13. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +105 -111
  14. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
  15. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  16. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -11
  17. airbyte_cdk/sources/file_based/exceptions.py +0 -34
  18. airbyte_cdk/sources/file_based/file_based_source.py +5 -28
  19. airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -18
  20. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
  21. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -30
  22. airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
  23. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -33
  24. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -42
  25. airbyte_cdk/sources/types.py +0 -3
  26. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/METADATA +1 -1
  27. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/RECORD +30 -31
  28. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +0 -51
  29. airbyte_cdk/sources/declarative/requesters/README.md +0 -56
  30. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/LICENSE.txt +0 -0
  31. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/WHEEL +0 -0
  32. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -54,7 +54,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
54
54
  SessionTokenProvider,
55
55
  TokenProvider,
56
56
  )
57
- from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
57
+ from airbyte_cdk.sources.declarative.checks import CheckStream
58
58
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
59
59
  from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
60
60
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
@@ -72,8 +72,6 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
72
72
  CsvParser,
73
73
  GzipParser,
74
74
  JsonLineParser,
75
- JsonParser,
76
- Parser,
77
75
  )
78
76
  from airbyte_cdk.sources.declarative.extractors import (
79
77
  DpathExtractor,
@@ -86,6 +84,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
86
84
  )
87
85
  from airbyte_cdk.sources.declarative.incremental import (
88
86
  ChildPartitionResumableFullRefreshCursor,
87
+ ConcurrentCursorFactory,
88
+ ConcurrentPerPartitionCursor,
89
89
  CursorFactory,
90
90
  DatetimeBasedCursor,
91
91
  DeclarativeCursor,
@@ -123,9 +123,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
123
123
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
124
124
  BearerAuthenticator as BearerAuthenticatorModel,
125
125
  )
126
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
127
- CheckDynamicStream as CheckDynamicStreamModel,
128
- )
129
126
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
130
127
  CheckStream as CheckStreamModel,
131
128
  )
@@ -252,9 +249,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
252
249
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
253
250
  JsonLineParser as JsonLineParserModel,
254
251
  )
255
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
256
- JsonParser as JsonParserModel,
257
- )
258
252
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
259
253
  JwtAuthenticator as JwtAuthenticatorModel,
260
254
  )
@@ -446,6 +440,7 @@ from airbyte_cdk.sources.message import (
446
440
  InMemoryMessageRepository,
447
441
  LogAppenderMessageRepositoryDecorator,
448
442
  MessageRepository,
443
+ NoopMessageRepository,
449
444
  )
450
445
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
451
446
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -496,7 +491,6 @@ class ModelToComponentFactory:
496
491
  BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
497
492
  BearerAuthenticatorModel: self.create_bearer_authenticator,
498
493
  CheckStreamModel: self.create_check_stream,
499
- CheckDynamicStreamModel: self.create_check_dynamic_stream,
500
494
  CompositeErrorHandlerModel: self.create_composite_error_handler,
501
495
  CompositeRawDecoderModel: self.create_composite_raw_decoder,
502
496
  ConcurrencyLevelModel: self.create_concurrency_level,
@@ -531,7 +525,6 @@ class ModelToComponentFactory:
531
525
  JsonDecoderModel: self.create_json_decoder,
532
526
  JsonlDecoderModel: self.create_jsonl_decoder,
533
527
  JsonLineParserModel: self.create_json_line_parser,
534
- JsonParserModel: self.create_json_parser,
535
528
  GzipJsonDecoderModel: self.create_gzipjson_decoder,
536
529
  GzipParserModel: self.create_gzip_parser,
537
530
  KeysToLowerModel: self.create_keys_to_lower_transformation,
@@ -850,12 +843,6 @@ class ModelToComponentFactory:
850
843
  def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
851
844
  return CheckStream(stream_names=model.stream_names, parameters={})
852
845
 
853
- @staticmethod
854
- def create_check_dynamic_stream(
855
- model: CheckDynamicStreamModel, config: Config, **kwargs: Any
856
- ) -> CheckDynamicStream:
857
- return CheckDynamicStream(stream_count=model.stream_count, parameters={})
858
-
859
846
  def create_composite_error_handler(
860
847
  self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
861
848
  ) -> CompositeErrorHandler:
@@ -887,6 +874,8 @@ class ModelToComponentFactory:
887
874
  stream_namespace: Optional[str],
888
875
  config: Config,
889
876
  stream_state: MutableMapping[str, Any],
877
+ message_repository: Optional[MessageRepository] = None,
878
+ runtime_lookback_window: Optional[int] = None,
890
879
  **kwargs: Any,
891
880
  ) -> ConcurrentCursor:
892
881
  component_type = component_definition.get("type")
@@ -944,6 +933,11 @@ class ModelToComponentFactory:
944
933
  if evaluated_lookback_window:
945
934
  lookback_window = parse_duration(evaluated_lookback_window)
946
935
 
936
+ if runtime_lookback_window and lookback_window:
937
+ lookback_window = max(lookback_window, runtime_lookback_window)
938
+ elif runtime_lookback_window:
939
+ lookback_window = runtime_lookback_window
940
+
947
941
  connector_state_converter: DateTimeStreamStateConverter
948
942
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
949
943
  datetime_format=datetime_format,
@@ -1022,7 +1016,7 @@ class ModelToComponentFactory:
1022
1016
  stream_name=stream_name,
1023
1017
  stream_namespace=stream_namespace,
1024
1018
  stream_state=stream_state,
1025
- message_repository=self._message_repository,
1019
+ message_repository=message_repository or self._message_repository,
1026
1020
  connector_state_manager=state_manager,
1027
1021
  connector_state_converter=connector_state_converter,
1028
1022
  cursor_field=cursor_field,
@@ -1034,6 +1028,63 @@ class ModelToComponentFactory:
1034
1028
  cursor_granularity=cursor_granularity,
1035
1029
  )
1036
1030
 
1031
+ def create_concurrent_cursor_from_perpartition_cursor(
1032
+ self,
1033
+ state_manager: ConnectorStateManager,
1034
+ model_type: Type[BaseModel],
1035
+ component_definition: ComponentDefinition,
1036
+ stream_name: str,
1037
+ stream_namespace: Optional[str],
1038
+ config: Config,
1039
+ stream_state: MutableMapping[str, Any],
1040
+ partition_router: PartitionRouter,
1041
+ **kwargs: Any,
1042
+ ) -> ConcurrentPerPartitionCursor:
1043
+ component_type = component_definition.get("type")
1044
+ if component_definition.get("type") != model_type.__name__:
1045
+ raise ValueError(
1046
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1047
+ )
1048
+
1049
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1050
+
1051
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1052
+ raise ValueError(
1053
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1054
+ )
1055
+
1056
+ interpolated_cursor_field = InterpolatedString.create(
1057
+ datetime_based_cursor_model.cursor_field,
1058
+ parameters=datetime_based_cursor_model.parameters or {},
1059
+ )
1060
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1061
+
1062
+ # Create the cursor factory
1063
+ cursor_factory = ConcurrentCursorFactory(
1064
+ partial(
1065
+ self.create_concurrent_cursor_from_datetime_based_cursor,
1066
+ state_manager=state_manager,
1067
+ model_type=model_type,
1068
+ component_definition=component_definition,
1069
+ stream_name=stream_name,
1070
+ stream_namespace=stream_namespace,
1071
+ config=config,
1072
+ message_repository=NoopMessageRepository(),
1073
+ )
1074
+ )
1075
+
1076
+ # Return the concurrent cursor and state converter
1077
+ return ConcurrentPerPartitionCursor(
1078
+ cursor_factory=cursor_factory,
1079
+ partition_router=partition_router,
1080
+ stream_name=stream_name,
1081
+ stream_namespace=stream_namespace,
1082
+ stream_state=stream_state,
1083
+ message_repository=self._message_repository, # type: ignore
1084
+ connector_state_manager=state_manager,
1085
+ cursor_field=cursor_field,
1086
+ )
1087
+
1037
1088
  @staticmethod
1038
1089
  def create_constant_backoff_strategy(
1039
1090
  model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
@@ -1048,17 +1099,17 @@ class ModelToComponentFactory:
1048
1099
  self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1049
1100
  ) -> CursorPaginationStrategy:
1050
1101
  if isinstance(decoder, PaginationDecoderDecorator):
1051
- inner_decoder = decoder.decoder
1052
- else:
1053
- inner_decoder = decoder
1054
- decoder = PaginationDecoderDecorator(decoder=decoder)
1055
-
1056
- if self._is_supported_decoder_for_pagination(inner_decoder):
1102
+ if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
1103
+ raise ValueError(
1104
+ f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1105
+ )
1057
1106
  decoder_to_use = decoder
1058
1107
  else:
1059
- raise ValueError(
1060
- self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1061
- )
1108
+ if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1109
+ raise ValueError(
1110
+ f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1111
+ )
1112
+ decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1062
1113
 
1063
1114
  return CursorPaginationStrategy(
1064
1115
  cursor_value=model.cursor_value,
@@ -1316,18 +1367,15 @@ class ModelToComponentFactory:
1316
1367
  raise ValueError(
1317
1368
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1318
1369
  )
1319
- client_side_incremental_sync = {
1320
- "date_time_based_cursor": self._create_component_from_model(
1321
- model=model.incremental_sync, config=config
1322
- ),
1323
- "substream_cursor": (
1324
- combined_slicers
1325
- if isinstance(
1326
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1327
- )
1328
- else None
1329
- ),
1330
- }
1370
+ cursor = (
1371
+ combined_slicers
1372
+ if isinstance(
1373
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1374
+ )
1375
+ else self._create_component_from_model(model=model.incremental_sync, config=config)
1376
+ )
1377
+
1378
+ client_side_incremental_sync = {"cursor": cursor}
1331
1379
 
1332
1380
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1333
1381
  cursor_model = model.incremental_sync
@@ -1531,10 +1579,11 @@ class ModelToComponentFactory:
1531
1579
  cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
1532
1580
  ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
1533
1581
  if decoder:
1534
- if self._is_supported_decoder_for_pagination(decoder):
1535
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1536
- else:
1537
- raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
1582
+ if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1583
+ raise ValueError(
1584
+ f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1585
+ )
1586
+ decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1538
1587
  else:
1539
1588
  decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
1540
1589
  page_size_option = (
@@ -1763,11 +1812,6 @@ class ModelToComponentFactory:
1763
1812
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
1764
1813
  return JsonDecoder(parameters={})
1765
1814
 
1766
- @staticmethod
1767
- def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
1768
- encoding = model.encoding if model.encoding else "utf-8"
1769
- return JsonParser(encoding=encoding)
1770
-
1771
1815
  @staticmethod
1772
1816
  def create_jsonl_decoder(
1773
1817
  model: JsonlDecoderModel, config: Config, **kwargs: Any
@@ -1905,33 +1949,21 @@ class ModelToComponentFactory:
1905
1949
  expires_in_name=InterpolatedString.create(
1906
1950
  model.expires_in_name or "expires_in", parameters=model.parameters or {}
1907
1951
  ).eval(config),
1908
- client_id_name=InterpolatedString.create(
1909
- model.client_id_name or "client_id", parameters=model.parameters or {}
1910
- ).eval(config),
1911
1952
  client_id=InterpolatedString.create(
1912
1953
  model.client_id, parameters=model.parameters or {}
1913
1954
  ).eval(config),
1914
- client_secret_name=InterpolatedString.create(
1915
- model.client_secret_name or "client_secret", parameters=model.parameters or {}
1916
- ).eval(config),
1917
1955
  client_secret=InterpolatedString.create(
1918
1956
  model.client_secret, parameters=model.parameters or {}
1919
1957
  ).eval(config),
1920
1958
  access_token_config_path=model.refresh_token_updater.access_token_config_path,
1921
1959
  refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
1922
1960
  token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
1923
- grant_type_name=InterpolatedString.create(
1924
- model.grant_type_name or "grant_type", parameters=model.parameters or {}
1925
- ).eval(config),
1926
1961
  grant_type=InterpolatedString.create(
1927
1962
  model.grant_type or "refresh_token", parameters=model.parameters or {}
1928
1963
  ).eval(config),
1929
1964
  refresh_request_body=InterpolatedMapping(
1930
1965
  model.refresh_request_body or {}, parameters=model.parameters or {}
1931
1966
  ).eval(config),
1932
- refresh_request_headers=InterpolatedMapping(
1933
- model.refresh_request_headers or {}, parameters=model.parameters or {}
1934
- ).eval(config),
1935
1967
  scopes=model.scopes,
1936
1968
  token_expiry_date_format=model.token_expiry_date_format,
1937
1969
  message_repository=self._message_repository,
@@ -1943,16 +1975,11 @@ class ModelToComponentFactory:
1943
1975
  return DeclarativeOauth2Authenticator( # type: ignore
1944
1976
  access_token_name=model.access_token_name or "access_token",
1945
1977
  access_token_value=model.access_token_value,
1946
- client_id_name=model.client_id_name or "client_id",
1947
1978
  client_id=model.client_id,
1948
- client_secret_name=model.client_secret_name or "client_secret",
1949
1979
  client_secret=model.client_secret,
1950
1980
  expires_in_name=model.expires_in_name or "expires_in",
1951
- grant_type_name=model.grant_type_name or "grant_type",
1952
1981
  grant_type=model.grant_type or "refresh_token",
1953
1982
  refresh_request_body=model.refresh_request_body,
1954
- refresh_request_headers=model.refresh_request_headers,
1955
- refresh_token_name=model.refresh_token_name or "refresh_token",
1956
1983
  refresh_token=model.refresh_token,
1957
1984
  scopes=model.scopes,
1958
1985
  token_expiry_date=model.token_expiry_date,
@@ -1964,22 +1991,22 @@ class ModelToComponentFactory:
1964
1991
  message_repository=self._message_repository,
1965
1992
  )
1966
1993
 
1994
+ @staticmethod
1967
1995
  def create_offset_increment(
1968
- self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
1996
+ model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
1969
1997
  ) -> OffsetIncrement:
1970
1998
  if isinstance(decoder, PaginationDecoderDecorator):
1971
- inner_decoder = decoder.decoder
1972
- else:
1973
- inner_decoder = decoder
1974
- decoder = PaginationDecoderDecorator(decoder=decoder)
1975
-
1976
- if self._is_supported_decoder_for_pagination(inner_decoder):
1999
+ if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
2000
+ raise ValueError(
2001
+ f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
2002
+ )
1977
2003
  decoder_to_use = decoder
1978
2004
  else:
1979
- raise ValueError(
1980
- self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1981
- )
1982
-
2005
+ if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
2006
+ raise ValueError(
2007
+ f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
2008
+ )
2009
+ decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1983
2010
  return OffsetIncrement(
1984
2011
  page_size=model.page_size,
1985
2012
  config=config,
@@ -2164,7 +2191,7 @@ class ModelToComponentFactory:
2164
2191
  if (
2165
2192
  not isinstance(stream_slicer, DatetimeBasedCursor)
2166
2193
  or type(stream_slicer) is not DatetimeBasedCursor
2167
- ):
2194
+ ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2168
2195
  # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
2169
2196
  # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
2170
2197
  # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
@@ -2324,7 +2351,7 @@ class ModelToComponentFactory:
2324
2351
  extractor=download_extractor,
2325
2352
  name=name,
2326
2353
  record_filter=None,
2327
- transformations=transformations,
2354
+ transformations=[],
2328
2355
  schema_normalization=TypeTransformer(TransformConfig.NoTransform),
2329
2356
  config=config,
2330
2357
  parameters={},
@@ -2361,16 +2388,6 @@ class ModelToComponentFactory:
2361
2388
  if model.delete_requester
2362
2389
  else None
2363
2390
  )
2364
- url_requester = (
2365
- self._create_component_from_model(
2366
- model=model.url_requester,
2367
- decoder=decoder,
2368
- config=config,
2369
- name=f"job extract_url - {name}",
2370
- )
2371
- if model.url_requester
2372
- else None
2373
- )
2374
2391
  status_extractor = self._create_component_from_model(
2375
2392
  model=model.status_extractor, decoder=decoder, config=config, name=name
2376
2393
  )
@@ -2381,7 +2398,6 @@ class ModelToComponentFactory:
2381
2398
  creation_requester=creation_requester,
2382
2399
  polling_requester=polling_requester,
2383
2400
  download_retriever=download_retriever,
2384
- url_requester=url_requester,
2385
2401
  abort_requester=abort_requester,
2386
2402
  delete_requester=delete_requester,
2387
2403
  status_extractor=status_extractor,
@@ -2579,25 +2595,3 @@ class ModelToComponentFactory:
2579
2595
  components_mapping=components_mapping,
2580
2596
  parameters=model.parameters or {},
2581
2597
  )
2582
-
2583
- _UNSUPPORTED_DECODER_ERROR = (
2584
- "Specified decoder of {decoder_type} is not supported for pagination."
2585
- "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
2586
- "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
2587
- )
2588
-
2589
- def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
2590
- if isinstance(decoder, (JsonDecoder, XmlDecoder)):
2591
- return True
2592
- elif isinstance(decoder, CompositeRawDecoder):
2593
- return self._is_supported_parser_for_pagination(decoder.parser)
2594
- else:
2595
- return False
2596
-
2597
- def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
2598
- if isinstance(parser, JsonParser):
2599
- return True
2600
- elif isinstance(parser, GzipParser):
2601
- return isinstance(parser.inner_parser, JsonParser)
2602
- else:
2603
- return False
@@ -31,10 +31,6 @@ LOGGER = logging.getLogger("airbyte")
31
31
 
32
32
  @dataclass
33
33
  class AsyncHttpJobRepository(AsyncJobRepository):
34
- """
35
- See Readme file for more details about flow.
36
- """
37
-
38
34
  creation_requester: Requester
39
35
  polling_requester: Requester
40
36
  download_retriever: SimpleRetriever
@@ -48,9 +44,6 @@ class AsyncHttpJobRepository(AsyncJobRepository):
48
44
  record_extractor: RecordExtractor = field(
49
45
  init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
50
46
  )
51
- url_requester: Optional[Requester] = (
52
- None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
53
- )
54
47
 
55
48
  def __post_init__(self) -> None:
56
49
  self._create_job_response_by_id: Dict[str, Response] = {}
@@ -193,13 +186,10 @@ class AsyncHttpJobRepository(AsyncJobRepository):
193
186
 
194
187
  """
195
188
 
196
- for url in self._get_download_url(job):
197
- job_slice = job.job_parameters()
198
- stream_slice = StreamSlice(
199
- partition=job_slice.partition,
200
- cursor_slice=job_slice.cursor_slice,
201
- extra_fields={**job_slice.extra_fields, "url": url},
202
- )
189
+ for url in self.urls_extractor.extract_records(
190
+ self._polling_job_response_by_id[job.api_job_id()]
191
+ ):
192
+ stream_slice: StreamSlice = StreamSlice(partition={"url": url}, cursor_slice={})
203
193
  for message in self.download_retriever.read_records({}, stream_slice):
204
194
  if isinstance(message, Record):
205
195
  yield message.data
@@ -236,22 +226,3 @@ class AsyncHttpJobRepository(AsyncJobRepository):
236
226
  cursor_slice={},
237
227
  )
238
228
  return stream_slice
239
-
240
- def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
241
- if not self.url_requester:
242
- url_response = self._polling_job_response_by_id[job.api_job_id()]
243
- else:
244
- stream_slice: StreamSlice = StreamSlice(
245
- partition={
246
- "polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
247
- },
248
- cursor_slice={},
249
- )
250
- url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
251
- if not url_response:
252
- raise AirbyteTracedException(
253
- internal_message="Always expect a response or an exception from url_requester",
254
- failure_type=FailureType.system_error,
255
- )
256
-
257
- yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
160
160
  stream_slice,
161
161
  next_page_token,
162
162
  self._paginator.get_request_headers,
163
- self.stream_slicer.get_request_headers,
163
+ self.request_option_provider.get_request_headers,
164
164
  )
165
165
  if isinstance(headers, str):
166
166
  raise ValueError("Request headers cannot be a string")
@@ -31,17 +31,6 @@ class DeliverRawFiles(BaseModel):
31
31
 
32
32
  delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
33
33
 
34
- preserve_directory_structure: bool = Field(
35
- title="Preserve Sub-Directories in File Paths",
36
- description=(
37
- "If enabled, sends subdirectory folder structure "
38
- "along with source file names to the destination. "
39
- "Otherwise, files will be synced by their names only. "
40
- "This option is ignored when file-based replication is not enabled."
41
- ),
42
- default=True,
43
- )
44
-
45
34
 
46
35
  class AbstractFileBasedSpec(BaseModel):
47
36
  """
@@ -111,40 +111,6 @@ class ErrorListingFiles(BaseFileBasedSourceError):
111
111
  pass
112
112
 
113
113
 
114
- class DuplicatedFilesError(BaseFileBasedSourceError):
115
- def __init__(self, duplicated_files_names: List[dict[str, List[str]]], **kwargs: Any):
116
- self._duplicated_files_names = duplicated_files_names
117
- self._stream_name: str = kwargs["stream"]
118
- super().__init__(self._format_duplicate_files_error_message(), **kwargs)
119
-
120
- def _format_duplicate_files_error_message(self) -> str:
121
- duplicated_files_messages = []
122
- for duplicated_file in self._duplicated_files_names:
123
- for duplicated_file_name, file_paths in duplicated_file.items():
124
- file_duplicated_message = (
125
- f"{len(file_paths)} duplicates found for file name {duplicated_file_name}:\n\n"
126
- + "".join(f"\n - {file_paths}")
127
- )
128
- duplicated_files_messages.append(file_duplicated_message)
129
-
130
- error_message = (
131
- f"ERROR: Duplicate filenames found for stream {self._stream_name}. "
132
- "Duplicate file names are not allowed if the Preserve Sub-Directories in File Paths option is disabled. "
133
- "Please remove or rename the duplicate files before attempting to re-run the sync.\n\n"
134
- + "\n".join(duplicated_files_messages)
135
- )
136
-
137
- return error_message
138
-
139
- def __repr__(self) -> str:
140
- """Return a string representation of the exception."""
141
- class_name = self.__class__.__name__
142
- properties_str = ", ".join(
143
- f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
144
- )
145
- return f"{class_name}({properties_str})"
146
-
147
-
148
114
  class CustomFileBasedException(AirbyteTracedException):
149
115
  """
150
116
  A specialized exception for file-based connectors.
@@ -242,7 +242,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
242
242
  stream=self._make_default_stream(
243
243
  stream_config=stream_config,
244
244
  cursor=cursor,
245
- parsed_config=parsed_config,
245
+ use_file_transfer=self._use_file_transfer(parsed_config),
246
246
  ),
247
247
  source=self,
248
248
  logger=self.logger,
@@ -273,7 +273,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
273
273
  stream=self._make_default_stream(
274
274
  stream_config=stream_config,
275
275
  cursor=cursor,
276
- parsed_config=parsed_config,
276
+ use_file_transfer=self._use_file_transfer(parsed_config),
277
277
  ),
278
278
  source=self,
279
279
  logger=self.logger,
@@ -285,7 +285,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
285
285
  stream = self._make_default_stream(
286
286
  stream_config=stream_config,
287
287
  cursor=cursor,
288
- parsed_config=parsed_config,
288
+ use_file_transfer=self._use_file_transfer(parsed_config),
289
289
  )
290
290
 
291
291
  streams.append(stream)
@@ -298,7 +298,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
298
298
  self,
299
299
  stream_config: FileBasedStreamConfig,
300
300
  cursor: Optional[AbstractFileBasedCursor],
301
- parsed_config: AbstractFileBasedSpec,
301
+ use_file_transfer: bool = False,
302
302
  ) -> AbstractFileBasedStream:
303
303
  return DefaultFileBasedStream(
304
304
  config=stream_config,
@@ -310,8 +310,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
310
310
  validation_policy=self._validate_and_get_validation_policy(stream_config),
311
311
  errors_collector=self.errors_collector,
312
312
  cursor=cursor,
313
- use_file_transfer=self._use_file_transfer(parsed_config),
314
- preserve_directory_structure=self._preserve_directory_structure(parsed_config),
313
+ use_file_transfer=use_file_transfer,
315
314
  )
316
315
 
317
316
  def _get_stream_from_catalog(
@@ -386,25 +385,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
386
385
  and parsed_config.delivery_method.delivery_type == "use_file_transfer"
387
386
  )
388
387
  return use_file_transfer
389
-
390
- @staticmethod
391
- def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
392
- """
393
- Determines whether to preserve directory structure during file transfer.
394
-
395
- When enabled, files maintain their subdirectory paths in the destination.
396
- When disabled, files are flattened to the root of the destination.
397
-
398
- Args:
399
- parsed_config: The parsed configuration containing delivery method settings
400
-
401
- Returns:
402
- True if directory structure should be preserved (default), False otherwise
403
- """
404
- if (
405
- FileBasedSource._use_file_transfer(parsed_config)
406
- and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
407
- and parsed_config.delivery_method.preserve_directory_structure is not None
408
- ):
409
- return parsed_config.delivery_method.preserve_directory_structure
410
- return True
@@ -135,17 +135,6 @@ class AbstractFileBasedStreamReader(ABC):
135
135
  return use_file_transfer
136
136
  return False
137
137
 
138
- def preserve_directory_structure(self) -> bool:
139
- # fall back to preserve subdirectories if config is not present or incomplete
140
- if (
141
- self.use_file_transfer()
142
- and self.config
143
- and hasattr(self.config.delivery_method, "preserve_directory_structure")
144
- and self.config.delivery_method.preserve_directory_structure is not None
145
- ):
146
- return self.config.delivery_method.preserve_directory_structure
147
- return True
148
-
149
138
  @abstractmethod
150
139
  def get_file(
151
140
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
@@ -170,13 +159,10 @@ class AbstractFileBasedStreamReader(ABC):
170
159
  """
171
160
  ...
172
161
 
173
- def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
174
- preserve_directory_structure = self.preserve_directory_structure()
175
- if preserve_directory_structure:
176
- # Remove left slashes from source path format to make relative path for writing locally
177
- file_relative_path = file.uri.lstrip("/")
178
- else:
179
- file_relative_path = path.basename(file.uri)
162
+ @staticmethod
163
+ def _get_file_transfer_paths(file: RemoteFile, local_directory: str) -> List[str]:
164
+ # Remove left slashes from source path format to make relative path for writing locally
165
+ file_relative_path = file.uri.lstrip("/")
180
166
  local_file_path = path.join(local_directory, file_relative_path)
181
167
 
182
168
  # Ensure the local directory exists