airbyte-cdk 6.20.1__py3-none-any.whl → 6.20.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -34
- airbyte_cdk/sources/declarative/checks/__init__.py +2 -18
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +21 -97
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +0 -43
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +331 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -97
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +106 -116
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +3 -13
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -11
- airbyte_cdk/sources/file_based/exceptions.py +0 -34
- airbyte_cdk/sources/file_based/file_based_source.py +5 -28
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -30
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -33
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -42
- airbyte_cdk/sources/types.py +0 -3
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/RECORD +31 -32
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +0 -51
- airbyte_cdk/sources/declarative/requesters/README.md +0 -56
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -54,7 +54,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
54
54
|
SessionTokenProvider,
|
55
55
|
TokenProvider,
|
56
56
|
)
|
57
|
-
from airbyte_cdk.sources.declarative.checks import
|
57
|
+
from airbyte_cdk.sources.declarative.checks import CheckStream
|
58
58
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
59
59
|
from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
60
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
@@ -72,8 +72,6 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
|
72
72
|
CsvParser,
|
73
73
|
GzipParser,
|
74
74
|
JsonLineParser,
|
75
|
-
JsonParser,
|
76
|
-
Parser,
|
77
75
|
)
|
78
76
|
from airbyte_cdk.sources.declarative.extractors import (
|
79
77
|
DpathExtractor,
|
@@ -86,6 +84,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
86
84
|
)
|
87
85
|
from airbyte_cdk.sources.declarative.incremental import (
|
88
86
|
ChildPartitionResumableFullRefreshCursor,
|
87
|
+
ConcurrentCursorFactory,
|
88
|
+
ConcurrentPerPartitionCursor,
|
89
89
|
CursorFactory,
|
90
90
|
DatetimeBasedCursor,
|
91
91
|
DeclarativeCursor,
|
@@ -123,9 +123,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
123
123
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
124
124
|
BearerAuthenticator as BearerAuthenticatorModel,
|
125
125
|
)
|
126
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
127
|
-
CheckDynamicStream as CheckDynamicStreamModel,
|
128
|
-
)
|
129
126
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
130
127
|
CheckStream as CheckStreamModel,
|
131
128
|
)
|
@@ -252,9 +249,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
252
249
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
253
250
|
JsonLineParser as JsonLineParserModel,
|
254
251
|
)
|
255
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
256
|
-
JsonParser as JsonParserModel,
|
257
|
-
)
|
258
252
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
259
253
|
JwtAuthenticator as JwtAuthenticatorModel,
|
260
254
|
)
|
@@ -446,6 +440,7 @@ from airbyte_cdk.sources.message import (
|
|
446
440
|
InMemoryMessageRepository,
|
447
441
|
LogAppenderMessageRepositoryDecorator,
|
448
442
|
MessageRepository,
|
443
|
+
NoopMessageRepository,
|
449
444
|
)
|
450
445
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
451
446
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -496,7 +491,6 @@ class ModelToComponentFactory:
|
|
496
491
|
BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
|
497
492
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
498
493
|
CheckStreamModel: self.create_check_stream,
|
499
|
-
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
500
494
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
501
495
|
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
502
496
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
@@ -531,7 +525,6 @@ class ModelToComponentFactory:
|
|
531
525
|
JsonDecoderModel: self.create_json_decoder,
|
532
526
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
533
527
|
JsonLineParserModel: self.create_json_line_parser,
|
534
|
-
JsonParserModel: self.create_json_parser,
|
535
528
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
536
529
|
GzipParserModel: self.create_gzip_parser,
|
537
530
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
@@ -850,12 +843,6 @@ class ModelToComponentFactory:
|
|
850
843
|
def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
|
851
844
|
return CheckStream(stream_names=model.stream_names, parameters={})
|
852
845
|
|
853
|
-
@staticmethod
|
854
|
-
def create_check_dynamic_stream(
|
855
|
-
model: CheckDynamicStreamModel, config: Config, **kwargs: Any
|
856
|
-
) -> CheckDynamicStream:
|
857
|
-
return CheckDynamicStream(stream_count=model.stream_count, parameters={})
|
858
|
-
|
859
846
|
def create_composite_error_handler(
|
860
847
|
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
861
848
|
) -> CompositeErrorHandler:
|
@@ -887,6 +874,8 @@ class ModelToComponentFactory:
|
|
887
874
|
stream_namespace: Optional[str],
|
888
875
|
config: Config,
|
889
876
|
stream_state: MutableMapping[str, Any],
|
877
|
+
message_repository: Optional[MessageRepository] = None,
|
878
|
+
runtime_lookback_window: Optional[int] = None,
|
890
879
|
**kwargs: Any,
|
891
880
|
) -> ConcurrentCursor:
|
892
881
|
component_type = component_definition.get("type")
|
@@ -944,6 +933,11 @@ class ModelToComponentFactory:
|
|
944
933
|
if evaluated_lookback_window:
|
945
934
|
lookback_window = parse_duration(evaluated_lookback_window)
|
946
935
|
|
936
|
+
if runtime_lookback_window and lookback_window:
|
937
|
+
lookback_window = max(lookback_window, runtime_lookback_window)
|
938
|
+
elif runtime_lookback_window:
|
939
|
+
lookback_window = runtime_lookback_window
|
940
|
+
|
947
941
|
connector_state_converter: DateTimeStreamStateConverter
|
948
942
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
949
943
|
datetime_format=datetime_format,
|
@@ -1022,7 +1016,7 @@ class ModelToComponentFactory:
|
|
1022
1016
|
stream_name=stream_name,
|
1023
1017
|
stream_namespace=stream_namespace,
|
1024
1018
|
stream_state=stream_state,
|
1025
|
-
message_repository=self._message_repository,
|
1019
|
+
message_repository=message_repository or self._message_repository,
|
1026
1020
|
connector_state_manager=state_manager,
|
1027
1021
|
connector_state_converter=connector_state_converter,
|
1028
1022
|
cursor_field=cursor_field,
|
@@ -1034,6 +1028,63 @@ class ModelToComponentFactory:
|
|
1034
1028
|
cursor_granularity=cursor_granularity,
|
1035
1029
|
)
|
1036
1030
|
|
1031
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
1032
|
+
self,
|
1033
|
+
state_manager: ConnectorStateManager,
|
1034
|
+
model_type: Type[BaseModel],
|
1035
|
+
component_definition: ComponentDefinition,
|
1036
|
+
stream_name: str,
|
1037
|
+
stream_namespace: Optional[str],
|
1038
|
+
config: Config,
|
1039
|
+
stream_state: MutableMapping[str, Any],
|
1040
|
+
partition_router: PartitionRouter,
|
1041
|
+
**kwargs: Any,
|
1042
|
+
) -> ConcurrentPerPartitionCursor:
|
1043
|
+
component_type = component_definition.get("type")
|
1044
|
+
if component_definition.get("type") != model_type.__name__:
|
1045
|
+
raise ValueError(
|
1046
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1047
|
+
)
|
1048
|
+
|
1049
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1050
|
+
|
1051
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1052
|
+
raise ValueError(
|
1053
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1054
|
+
)
|
1055
|
+
|
1056
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1057
|
+
datetime_based_cursor_model.cursor_field,
|
1058
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1059
|
+
)
|
1060
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1061
|
+
|
1062
|
+
# Create the cursor factory
|
1063
|
+
cursor_factory = ConcurrentCursorFactory(
|
1064
|
+
partial(
|
1065
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1066
|
+
state_manager=state_manager,
|
1067
|
+
model_type=model_type,
|
1068
|
+
component_definition=component_definition,
|
1069
|
+
stream_name=stream_name,
|
1070
|
+
stream_namespace=stream_namespace,
|
1071
|
+
config=config,
|
1072
|
+
message_repository=NoopMessageRepository(),
|
1073
|
+
)
|
1074
|
+
)
|
1075
|
+
|
1076
|
+
# Return the concurrent cursor and state converter
|
1077
|
+
return ConcurrentPerPartitionCursor(
|
1078
|
+
cursor_factory=cursor_factory,
|
1079
|
+
partition_router=partition_router,
|
1080
|
+
stream_name=stream_name,
|
1081
|
+
stream_namespace=stream_namespace,
|
1082
|
+
stream_state=stream_state,
|
1083
|
+
message_repository=self._message_repository, # type: ignore
|
1084
|
+
connector_state_manager=state_manager,
|
1085
|
+
cursor_field=cursor_field,
|
1086
|
+
)
|
1087
|
+
|
1037
1088
|
@staticmethod
|
1038
1089
|
def create_constant_backoff_strategy(
|
1039
1090
|
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
@@ -1048,17 +1099,17 @@ class ModelToComponentFactory:
|
|
1048
1099
|
self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
|
1049
1100
|
) -> CursorPaginationStrategy:
|
1050
1101
|
if isinstance(decoder, PaginationDecoderDecorator):
|
1051
|
-
|
1052
|
-
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1056
|
-
if self._is_supported_decoder_for_pagination(inner_decoder):
|
1102
|
+
if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
|
1103
|
+
raise ValueError(
|
1104
|
+
f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1105
|
+
)
|
1057
1106
|
decoder_to_use = decoder
|
1058
1107
|
else:
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1108
|
+
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
1109
|
+
raise ValueError(
|
1110
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1111
|
+
)
|
1112
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1062
1113
|
|
1063
1114
|
return CursorPaginationStrategy(
|
1064
1115
|
cursor_value=model.cursor_value,
|
@@ -1316,18 +1367,15 @@ class ModelToComponentFactory:
|
|
1316
1367
|
raise ValueError(
|
1317
1368
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1318
1369
|
)
|
1319
|
-
|
1320
|
-
|
1321
|
-
|
1322
|
-
|
1323
|
-
|
1324
|
-
|
1325
|
-
|
1326
|
-
|
1327
|
-
|
1328
|
-
else None
|
1329
|
-
),
|
1330
|
-
}
|
1370
|
+
cursor = (
|
1371
|
+
combined_slicers
|
1372
|
+
if isinstance(
|
1373
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1374
|
+
)
|
1375
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
1376
|
+
)
|
1377
|
+
|
1378
|
+
client_side_incremental_sync = {"cursor": cursor}
|
1331
1379
|
|
1332
1380
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1333
1381
|
cursor_model = model.incremental_sync
|
@@ -1531,10 +1579,11 @@ class ModelToComponentFactory:
|
|
1531
1579
|
cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
|
1532
1580
|
) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
|
1533
1581
|
if decoder:
|
1534
|
-
if
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1582
|
+
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
1583
|
+
raise ValueError(
|
1584
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
1585
|
+
)
|
1586
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1538
1587
|
else:
|
1539
1588
|
decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
|
1540
1589
|
page_size_option = (
|
@@ -1696,11 +1745,7 @@ class ModelToComponentFactory:
|
|
1696
1745
|
|
1697
1746
|
@staticmethod
|
1698
1747
|
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1699
|
-
return TypesMap(
|
1700
|
-
target_type=model.target_type,
|
1701
|
-
current_type=model.current_type,
|
1702
|
-
condition=model.condition if model.condition is not None else "True",
|
1703
|
-
)
|
1748
|
+
return TypesMap(target_type=model.target_type, current_type=model.current_type)
|
1704
1749
|
|
1705
1750
|
def create_schema_type_identifier(
|
1706
1751
|
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
@@ -1767,11 +1812,6 @@ class ModelToComponentFactory:
|
|
1767
1812
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
1768
1813
|
return JsonDecoder(parameters={})
|
1769
1814
|
|
1770
|
-
@staticmethod
|
1771
|
-
def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
|
1772
|
-
encoding = model.encoding if model.encoding else "utf-8"
|
1773
|
-
return JsonParser(encoding=encoding)
|
1774
|
-
|
1775
1815
|
@staticmethod
|
1776
1816
|
def create_jsonl_decoder(
|
1777
1817
|
model: JsonlDecoderModel, config: Config, **kwargs: Any
|
@@ -1909,33 +1949,21 @@ class ModelToComponentFactory:
|
|
1909
1949
|
expires_in_name=InterpolatedString.create(
|
1910
1950
|
model.expires_in_name or "expires_in", parameters=model.parameters or {}
|
1911
1951
|
).eval(config),
|
1912
|
-
client_id_name=InterpolatedString.create(
|
1913
|
-
model.client_id_name or "client_id", parameters=model.parameters or {}
|
1914
|
-
).eval(config),
|
1915
1952
|
client_id=InterpolatedString.create(
|
1916
1953
|
model.client_id, parameters=model.parameters or {}
|
1917
1954
|
).eval(config),
|
1918
|
-
client_secret_name=InterpolatedString.create(
|
1919
|
-
model.client_secret_name or "client_secret", parameters=model.parameters or {}
|
1920
|
-
).eval(config),
|
1921
1955
|
client_secret=InterpolatedString.create(
|
1922
1956
|
model.client_secret, parameters=model.parameters or {}
|
1923
1957
|
).eval(config),
|
1924
1958
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
1925
1959
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
1926
1960
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
1927
|
-
grant_type_name=InterpolatedString.create(
|
1928
|
-
model.grant_type_name or "grant_type", parameters=model.parameters or {}
|
1929
|
-
).eval(config),
|
1930
1961
|
grant_type=InterpolatedString.create(
|
1931
1962
|
model.grant_type or "refresh_token", parameters=model.parameters or {}
|
1932
1963
|
).eval(config),
|
1933
1964
|
refresh_request_body=InterpolatedMapping(
|
1934
1965
|
model.refresh_request_body or {}, parameters=model.parameters or {}
|
1935
1966
|
).eval(config),
|
1936
|
-
refresh_request_headers=InterpolatedMapping(
|
1937
|
-
model.refresh_request_headers or {}, parameters=model.parameters or {}
|
1938
|
-
).eval(config),
|
1939
1967
|
scopes=model.scopes,
|
1940
1968
|
token_expiry_date_format=model.token_expiry_date_format,
|
1941
1969
|
message_repository=self._message_repository,
|
@@ -1947,16 +1975,11 @@ class ModelToComponentFactory:
|
|
1947
1975
|
return DeclarativeOauth2Authenticator( # type: ignore
|
1948
1976
|
access_token_name=model.access_token_name or "access_token",
|
1949
1977
|
access_token_value=model.access_token_value,
|
1950
|
-
client_id_name=model.client_id_name or "client_id",
|
1951
1978
|
client_id=model.client_id,
|
1952
|
-
client_secret_name=model.client_secret_name or "client_secret",
|
1953
1979
|
client_secret=model.client_secret,
|
1954
1980
|
expires_in_name=model.expires_in_name or "expires_in",
|
1955
|
-
grant_type_name=model.grant_type_name or "grant_type",
|
1956
1981
|
grant_type=model.grant_type or "refresh_token",
|
1957
1982
|
refresh_request_body=model.refresh_request_body,
|
1958
|
-
refresh_request_headers=model.refresh_request_headers,
|
1959
|
-
refresh_token_name=model.refresh_token_name or "refresh_token",
|
1960
1983
|
refresh_token=model.refresh_token,
|
1961
1984
|
scopes=model.scopes,
|
1962
1985
|
token_expiry_date=model.token_expiry_date,
|
@@ -1968,22 +1991,22 @@ class ModelToComponentFactory:
|
|
1968
1991
|
message_repository=self._message_repository,
|
1969
1992
|
)
|
1970
1993
|
|
1994
|
+
@staticmethod
|
1971
1995
|
def create_offset_increment(
|
1972
|
-
|
1996
|
+
model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
|
1973
1997
|
) -> OffsetIncrement:
|
1974
1998
|
if isinstance(decoder, PaginationDecoderDecorator):
|
1975
|
-
|
1976
|
-
|
1977
|
-
|
1978
|
-
|
1979
|
-
|
1980
|
-
if self._is_supported_decoder_for_pagination(inner_decoder):
|
1999
|
+
if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
|
2000
|
+
raise ValueError(
|
2001
|
+
f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
2002
|
+
)
|
1981
2003
|
decoder_to_use = decoder
|
1982
2004
|
else:
|
1983
|
-
|
1984
|
-
|
1985
|
-
|
1986
|
-
|
2005
|
+
if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
2006
|
+
raise ValueError(
|
2007
|
+
f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
|
2008
|
+
)
|
2009
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1987
2010
|
return OffsetIncrement(
|
1988
2011
|
page_size=model.page_size,
|
1989
2012
|
config=config,
|
@@ -2168,7 +2191,7 @@ class ModelToComponentFactory:
|
|
2168
2191
|
if (
|
2169
2192
|
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2170
2193
|
or type(stream_slicer) is not DatetimeBasedCursor
|
2171
|
-
):
|
2194
|
+
) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
|
2172
2195
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2173
2196
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2174
2197
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -2328,7 +2351,7 @@ class ModelToComponentFactory:
|
|
2328
2351
|
extractor=download_extractor,
|
2329
2352
|
name=name,
|
2330
2353
|
record_filter=None,
|
2331
|
-
transformations=
|
2354
|
+
transformations=[],
|
2332
2355
|
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2333
2356
|
config=config,
|
2334
2357
|
parameters={},
|
@@ -2365,16 +2388,6 @@ class ModelToComponentFactory:
|
|
2365
2388
|
if model.delete_requester
|
2366
2389
|
else None
|
2367
2390
|
)
|
2368
|
-
url_requester = (
|
2369
|
-
self._create_component_from_model(
|
2370
|
-
model=model.url_requester,
|
2371
|
-
decoder=decoder,
|
2372
|
-
config=config,
|
2373
|
-
name=f"job extract_url - {name}",
|
2374
|
-
)
|
2375
|
-
if model.url_requester
|
2376
|
-
else None
|
2377
|
-
)
|
2378
2391
|
status_extractor = self._create_component_from_model(
|
2379
2392
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2380
2393
|
)
|
@@ -2385,7 +2398,6 @@ class ModelToComponentFactory:
|
|
2385
2398
|
creation_requester=creation_requester,
|
2386
2399
|
polling_requester=polling_requester,
|
2387
2400
|
download_retriever=download_retriever,
|
2388
|
-
url_requester=url_requester,
|
2389
2401
|
abort_requester=abort_requester,
|
2390
2402
|
delete_requester=delete_requester,
|
2391
2403
|
status_extractor=status_extractor,
|
@@ -2583,25 +2595,3 @@ class ModelToComponentFactory:
|
|
2583
2595
|
components_mapping=components_mapping,
|
2584
2596
|
parameters=model.parameters or {},
|
2585
2597
|
)
|
2586
|
-
|
2587
|
-
_UNSUPPORTED_DECODER_ERROR = (
|
2588
|
-
"Specified decoder of {decoder_type} is not supported for pagination."
|
2589
|
-
"Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
|
2590
|
-
"If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
|
2591
|
-
)
|
2592
|
-
|
2593
|
-
def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
|
2594
|
-
if isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
2595
|
-
return True
|
2596
|
-
elif isinstance(decoder, CompositeRawDecoder):
|
2597
|
-
return self._is_supported_parser_for_pagination(decoder.parser)
|
2598
|
-
else:
|
2599
|
-
return False
|
2600
|
-
|
2601
|
-
def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
|
2602
|
-
if isinstance(parser, JsonParser):
|
2603
|
-
return True
|
2604
|
-
elif isinstance(parser, GzipParser):
|
2605
|
-
return isinstance(parser.inner_parser, JsonParser)
|
2606
|
-
else:
|
2607
|
-
return False
|
@@ -31,10 +31,6 @@ LOGGER = logging.getLogger("airbyte")
|
|
31
31
|
|
32
32
|
@dataclass
|
33
33
|
class AsyncHttpJobRepository(AsyncJobRepository):
|
34
|
-
"""
|
35
|
-
See Readme file for more details about flow.
|
36
|
-
"""
|
37
|
-
|
38
34
|
creation_requester: Requester
|
39
35
|
polling_requester: Requester
|
40
36
|
download_retriever: SimpleRetriever
|
@@ -48,9 +44,6 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
48
44
|
record_extractor: RecordExtractor = field(
|
49
45
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
50
46
|
)
|
51
|
-
url_requester: Optional[Requester] = (
|
52
|
-
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
53
|
-
)
|
54
47
|
|
55
48
|
def __post_init__(self) -> None:
|
56
49
|
self._create_job_response_by_id: Dict[str, Response] = {}
|
@@ -193,13 +186,10 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
193
186
|
|
194
187
|
"""
|
195
188
|
|
196
|
-
for url in self.
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
cursor_slice=job_slice.cursor_slice,
|
201
|
-
extra_fields={**job_slice.extra_fields, "url": url},
|
202
|
-
)
|
189
|
+
for url in self.urls_extractor.extract_records(
|
190
|
+
self._polling_job_response_by_id[job.api_job_id()]
|
191
|
+
):
|
192
|
+
stream_slice: StreamSlice = StreamSlice(partition={"url": url}, cursor_slice={})
|
203
193
|
for message in self.download_retriever.read_records({}, stream_slice):
|
204
194
|
if isinstance(message, Record):
|
205
195
|
yield message.data
|
@@ -236,22 +226,3 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
236
226
|
cursor_slice={},
|
237
227
|
)
|
238
228
|
return stream_slice
|
239
|
-
|
240
|
-
def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
|
241
|
-
if not self.url_requester:
|
242
|
-
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
243
|
-
else:
|
244
|
-
stream_slice: StreamSlice = StreamSlice(
|
245
|
-
partition={
|
246
|
-
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
247
|
-
},
|
248
|
-
cursor_slice={},
|
249
|
-
)
|
250
|
-
url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
251
|
-
if not url_response:
|
252
|
-
raise AirbyteTracedException(
|
253
|
-
internal_message="Always expect a response or an exception from url_requester",
|
254
|
-
failure_type=FailureType.system_error,
|
255
|
-
)
|
256
|
-
|
257
|
-
yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
|
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
160
160
|
stream_slice,
|
161
161
|
next_page_token,
|
162
162
|
self._paginator.get_request_headers,
|
163
|
-
self.
|
163
|
+
self.request_option_provider.get_request_headers,
|
164
164
|
)
|
165
165
|
if isinstance(headers, str):
|
166
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -10,7 +10,6 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
|
10
10
|
import dpath
|
11
11
|
from typing_extensions import deprecated
|
12
12
|
|
13
|
-
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
14
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
15
14
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
16
15
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
@@ -54,7 +53,6 @@ class TypesMap:
|
|
54
53
|
|
55
54
|
target_type: Union[List[str], str]
|
56
55
|
current_type: Union[List[str], str]
|
57
|
-
condition: Optional[str]
|
58
56
|
|
59
57
|
|
60
58
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
@@ -179,7 +177,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
179
177
|
if field_type_path
|
180
178
|
else "string"
|
181
179
|
)
|
182
|
-
mapped_field_type = self._replace_type_if_not_valid(raw_field_type
|
180
|
+
mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
|
183
181
|
if (
|
184
182
|
isinstance(mapped_field_type, list)
|
185
183
|
and len(mapped_field_type) == 2
|
@@ -196,22 +194,14 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
196
194
|
)
|
197
195
|
|
198
196
|
def _replace_type_if_not_valid(
|
199
|
-
self,
|
200
|
-
field_type: Union[List[str], str],
|
201
|
-
raw_schema: MutableMapping[str, Any],
|
197
|
+
self, field_type: Union[List[str], str]
|
202
198
|
) -> Union[List[str], str]:
|
203
199
|
"""
|
204
200
|
Replaces a field type if it matches a type mapping in `types_map`.
|
205
201
|
"""
|
206
202
|
if self.schema_type_identifier.types_mapping:
|
207
203
|
for types_map in self.schema_type_identifier.types_mapping:
|
208
|
-
|
209
|
-
condition = InterpolatedBoolean(
|
210
|
-
condition=types_map.condition if types_map.condition is not None else "True",
|
211
|
-
parameters={},
|
212
|
-
).eval(config=self.config, raw_schema=raw_schema)
|
213
|
-
|
214
|
-
if field_type == types_map.current_type and condition:
|
204
|
+
if field_type == types_map.current_type:
|
215
205
|
return types_map.target_type
|
216
206
|
return field_type
|
217
207
|
|
@@ -31,17 +31,6 @@ class DeliverRawFiles(BaseModel):
|
|
31
31
|
|
32
32
|
delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
|
33
33
|
|
34
|
-
preserve_directory_structure: bool = Field(
|
35
|
-
title="Preserve Sub-Directories in File Paths",
|
36
|
-
description=(
|
37
|
-
"If enabled, sends subdirectory folder structure "
|
38
|
-
"along with source file names to the destination. "
|
39
|
-
"Otherwise, files will be synced by their names only. "
|
40
|
-
"This option is ignored when file-based replication is not enabled."
|
41
|
-
),
|
42
|
-
default=True,
|
43
|
-
)
|
44
|
-
|
45
34
|
|
46
35
|
class AbstractFileBasedSpec(BaseModel):
|
47
36
|
"""
|
@@ -111,40 +111,6 @@ class ErrorListingFiles(BaseFileBasedSourceError):
|
|
111
111
|
pass
|
112
112
|
|
113
113
|
|
114
|
-
class DuplicatedFilesError(BaseFileBasedSourceError):
|
115
|
-
def __init__(self, duplicated_files_names: List[dict[str, List[str]]], **kwargs: Any):
|
116
|
-
self._duplicated_files_names = duplicated_files_names
|
117
|
-
self._stream_name: str = kwargs["stream"]
|
118
|
-
super().__init__(self._format_duplicate_files_error_message(), **kwargs)
|
119
|
-
|
120
|
-
def _format_duplicate_files_error_message(self) -> str:
|
121
|
-
duplicated_files_messages = []
|
122
|
-
for duplicated_file in self._duplicated_files_names:
|
123
|
-
for duplicated_file_name, file_paths in duplicated_file.items():
|
124
|
-
file_duplicated_message = (
|
125
|
-
f"{len(file_paths)} duplicates found for file name {duplicated_file_name}:\n\n"
|
126
|
-
+ "".join(f"\n - {file_paths}")
|
127
|
-
)
|
128
|
-
duplicated_files_messages.append(file_duplicated_message)
|
129
|
-
|
130
|
-
error_message = (
|
131
|
-
f"ERROR: Duplicate filenames found for stream {self._stream_name}. "
|
132
|
-
"Duplicate file names are not allowed if the Preserve Sub-Directories in File Paths option is disabled. "
|
133
|
-
"Please remove or rename the duplicate files before attempting to re-run the sync.\n\n"
|
134
|
-
+ "\n".join(duplicated_files_messages)
|
135
|
-
)
|
136
|
-
|
137
|
-
return error_message
|
138
|
-
|
139
|
-
def __repr__(self) -> str:
|
140
|
-
"""Return a string representation of the exception."""
|
141
|
-
class_name = self.__class__.__name__
|
142
|
-
properties_str = ", ".join(
|
143
|
-
f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_")
|
144
|
-
)
|
145
|
-
return f"{class_name}({properties_str})"
|
146
|
-
|
147
|
-
|
148
114
|
class CustomFileBasedException(AirbyteTracedException):
|
149
115
|
"""
|
150
116
|
A specialized exception for file-based connectors.
|
@@ -242,7 +242,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
242
242
|
stream=self._make_default_stream(
|
243
243
|
stream_config=stream_config,
|
244
244
|
cursor=cursor,
|
245
|
-
|
245
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
246
246
|
),
|
247
247
|
source=self,
|
248
248
|
logger=self.logger,
|
@@ -273,7 +273,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
273
273
|
stream=self._make_default_stream(
|
274
274
|
stream_config=stream_config,
|
275
275
|
cursor=cursor,
|
276
|
-
|
276
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
277
277
|
),
|
278
278
|
source=self,
|
279
279
|
logger=self.logger,
|
@@ -285,7 +285,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
285
285
|
stream = self._make_default_stream(
|
286
286
|
stream_config=stream_config,
|
287
287
|
cursor=cursor,
|
288
|
-
|
288
|
+
use_file_transfer=self._use_file_transfer(parsed_config),
|
289
289
|
)
|
290
290
|
|
291
291
|
streams.append(stream)
|
@@ -298,7 +298,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
298
298
|
self,
|
299
299
|
stream_config: FileBasedStreamConfig,
|
300
300
|
cursor: Optional[AbstractFileBasedCursor],
|
301
|
-
|
301
|
+
use_file_transfer: bool = False,
|
302
302
|
) -> AbstractFileBasedStream:
|
303
303
|
return DefaultFileBasedStream(
|
304
304
|
config=stream_config,
|
@@ -310,8 +310,7 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
310
310
|
validation_policy=self._validate_and_get_validation_policy(stream_config),
|
311
311
|
errors_collector=self.errors_collector,
|
312
312
|
cursor=cursor,
|
313
|
-
use_file_transfer=
|
314
|
-
preserve_directory_structure=self._preserve_directory_structure(parsed_config),
|
313
|
+
use_file_transfer=use_file_transfer,
|
315
314
|
)
|
316
315
|
|
317
316
|
def _get_stream_from_catalog(
|
@@ -386,25 +385,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
386
385
|
and parsed_config.delivery_method.delivery_type == "use_file_transfer"
|
387
386
|
)
|
388
387
|
return use_file_transfer
|
389
|
-
|
390
|
-
@staticmethod
|
391
|
-
def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
|
392
|
-
"""
|
393
|
-
Determines whether to preserve directory structure during file transfer.
|
394
|
-
|
395
|
-
When enabled, files maintain their subdirectory paths in the destination.
|
396
|
-
When disabled, files are flattened to the root of the destination.
|
397
|
-
|
398
|
-
Args:
|
399
|
-
parsed_config: The parsed configuration containing delivery method settings
|
400
|
-
|
401
|
-
Returns:
|
402
|
-
True if directory structure should be preserved (default), False otherwise
|
403
|
-
"""
|
404
|
-
if (
|
405
|
-
FileBasedSource._use_file_transfer(parsed_config)
|
406
|
-
and hasattr(parsed_config.delivery_method, "preserve_directory_structure")
|
407
|
-
and parsed_config.delivery_method.preserve_directory_structure is not None
|
408
|
-
):
|
409
|
-
return parsed_config.delivery_method.preserve_directory_structure
|
410
|
-
return True
|