airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
- airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
- airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
- airbyte_cdk/sources/declarative/requesters/README.md +56 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
- airbyte_cdk/sources/file_based/exceptions.py +34 -0
- airbyte_cdk/sources/file_based/file_based_source.py +28 -5
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
- airbyte_cdk/sources/types.py +3 -0
- airbyte_cdk/sources/utils/transform.py +29 -3
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
@@ -54,7 +54,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
54
54
|
SessionTokenProvider,
|
55
55
|
TokenProvider,
|
56
56
|
)
|
57
|
-
from airbyte_cdk.sources.declarative.checks import CheckStream
|
57
|
+
from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
|
58
58
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
59
59
|
from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
60
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
@@ -66,12 +66,15 @@ from airbyte_cdk.sources.declarative.decoders import (
|
|
66
66
|
JsonlDecoder,
|
67
67
|
PaginationDecoderDecorator,
|
68
68
|
XmlDecoder,
|
69
|
+
ZipfileDecoder,
|
69
70
|
)
|
70
71
|
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
71
72
|
CompositeRawDecoder,
|
72
73
|
CsvParser,
|
73
74
|
GzipParser,
|
74
75
|
JsonLineParser,
|
76
|
+
JsonParser,
|
77
|
+
Parser,
|
75
78
|
)
|
76
79
|
from airbyte_cdk.sources.declarative.extractors import (
|
77
80
|
DpathExtractor,
|
@@ -84,8 +87,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
84
87
|
)
|
85
88
|
from airbyte_cdk.sources.declarative.incremental import (
|
86
89
|
ChildPartitionResumableFullRefreshCursor,
|
87
|
-
ConcurrentCursorFactory,
|
88
|
-
ConcurrentPerPartitionCursor,
|
89
90
|
CursorFactory,
|
90
91
|
DatetimeBasedCursor,
|
91
92
|
DeclarativeCursor,
|
@@ -123,6 +124,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
123
124
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
124
125
|
BearerAuthenticator as BearerAuthenticatorModel,
|
125
126
|
)
|
127
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
128
|
+
CheckDynamicStream as CheckDynamicStreamModel,
|
129
|
+
)
|
126
130
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
127
131
|
CheckStream as CheckStreamModel,
|
128
132
|
)
|
@@ -249,6 +253,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
249
253
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
250
254
|
JsonLineParser as JsonLineParserModel,
|
251
255
|
)
|
256
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
257
|
+
JsonParser as JsonParserModel,
|
258
|
+
)
|
252
259
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
253
260
|
JwtAuthenticator as JwtAuthenticatorModel,
|
254
261
|
)
|
@@ -350,6 +357,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
350
357
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
351
358
|
XmlDecoder as XmlDecoderModel,
|
352
359
|
)
|
360
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
361
|
+
ZipfileDecoder as ZipfileDecoderModel,
|
362
|
+
)
|
353
363
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
354
364
|
CartesianProductStreamSlicer,
|
355
365
|
ListPartitionRouter,
|
@@ -440,7 +450,6 @@ from airbyte_cdk.sources.message import (
|
|
440
450
|
InMemoryMessageRepository,
|
441
451
|
LogAppenderMessageRepositoryDecorator,
|
442
452
|
MessageRepository,
|
443
|
-
NoopMessageRepository,
|
444
453
|
)
|
445
454
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
446
455
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -491,6 +500,7 @@ class ModelToComponentFactory:
|
|
491
500
|
BasicHttpAuthenticatorModel: self.create_basic_http_authenticator,
|
492
501
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
493
502
|
CheckStreamModel: self.create_check_stream,
|
503
|
+
CheckDynamicStreamModel: self.create_check_dynamic_stream,
|
494
504
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
495
505
|
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
496
506
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
@@ -525,6 +535,7 @@ class ModelToComponentFactory:
|
|
525
535
|
JsonDecoderModel: self.create_json_decoder,
|
526
536
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
527
537
|
JsonLineParserModel: self.create_json_line_parser,
|
538
|
+
JsonParserModel: self.create_json_parser,
|
528
539
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
529
540
|
GzipParserModel: self.create_gzip_parser,
|
530
541
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
@@ -564,6 +575,7 @@ class ModelToComponentFactory:
|
|
564
575
|
ConfigComponentsResolverModel: self.create_config_components_resolver,
|
565
576
|
StreamConfigModel: self.create_stream_config,
|
566
577
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
578
|
+
ZipfileDecoderModel: self.create_zipfile_decoder,
|
567
579
|
}
|
568
580
|
|
569
581
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -843,6 +855,12 @@ class ModelToComponentFactory:
|
|
843
855
|
def create_check_stream(model: CheckStreamModel, config: Config, **kwargs: Any) -> CheckStream:
|
844
856
|
return CheckStream(stream_names=model.stream_names, parameters={})
|
845
857
|
|
858
|
+
@staticmethod
|
859
|
+
def create_check_dynamic_stream(
|
860
|
+
model: CheckDynamicStreamModel, config: Config, **kwargs: Any
|
861
|
+
) -> CheckDynamicStream:
|
862
|
+
return CheckDynamicStream(stream_count=model.stream_count, parameters={})
|
863
|
+
|
846
864
|
def create_composite_error_handler(
|
847
865
|
self, model: CompositeErrorHandlerModel, config: Config, **kwargs: Any
|
848
866
|
) -> CompositeErrorHandler:
|
@@ -874,8 +892,6 @@ class ModelToComponentFactory:
|
|
874
892
|
stream_namespace: Optional[str],
|
875
893
|
config: Config,
|
876
894
|
stream_state: MutableMapping[str, Any],
|
877
|
-
message_repository: Optional[MessageRepository] = None,
|
878
|
-
runtime_lookback_window: Optional[int] = None,
|
879
895
|
**kwargs: Any,
|
880
896
|
) -> ConcurrentCursor:
|
881
897
|
component_type = component_definition.get("type")
|
@@ -933,11 +949,6 @@ class ModelToComponentFactory:
|
|
933
949
|
if evaluated_lookback_window:
|
934
950
|
lookback_window = parse_duration(evaluated_lookback_window)
|
935
951
|
|
936
|
-
if runtime_lookback_window and lookback_window:
|
937
|
-
lookback_window = max(lookback_window, runtime_lookback_window)
|
938
|
-
elif runtime_lookback_window:
|
939
|
-
lookback_window = runtime_lookback_window
|
940
|
-
|
941
952
|
connector_state_converter: DateTimeStreamStateConverter
|
942
953
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
943
954
|
datetime_format=datetime_format,
|
@@ -1016,7 +1027,7 @@ class ModelToComponentFactory:
|
|
1016
1027
|
stream_name=stream_name,
|
1017
1028
|
stream_namespace=stream_namespace,
|
1018
1029
|
stream_state=stream_state,
|
1019
|
-
message_repository=
|
1030
|
+
message_repository=self._message_repository,
|
1020
1031
|
connector_state_manager=state_manager,
|
1021
1032
|
connector_state_converter=connector_state_converter,
|
1022
1033
|
cursor_field=cursor_field,
|
@@ -1028,63 +1039,6 @@ class ModelToComponentFactory:
|
|
1028
1039
|
cursor_granularity=cursor_granularity,
|
1029
1040
|
)
|
1030
1041
|
|
1031
|
-
def create_concurrent_cursor_from_perpartition_cursor(
|
1032
|
-
self,
|
1033
|
-
state_manager: ConnectorStateManager,
|
1034
|
-
model_type: Type[BaseModel],
|
1035
|
-
component_definition: ComponentDefinition,
|
1036
|
-
stream_name: str,
|
1037
|
-
stream_namespace: Optional[str],
|
1038
|
-
config: Config,
|
1039
|
-
stream_state: MutableMapping[str, Any],
|
1040
|
-
partition_router: PartitionRouter,
|
1041
|
-
**kwargs: Any,
|
1042
|
-
) -> ConcurrentPerPartitionCursor:
|
1043
|
-
component_type = component_definition.get("type")
|
1044
|
-
if component_definition.get("type") != model_type.__name__:
|
1045
|
-
raise ValueError(
|
1046
|
-
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1047
|
-
)
|
1048
|
-
|
1049
|
-
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1050
|
-
|
1051
|
-
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1052
|
-
raise ValueError(
|
1053
|
-
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1054
|
-
)
|
1055
|
-
|
1056
|
-
interpolated_cursor_field = InterpolatedString.create(
|
1057
|
-
datetime_based_cursor_model.cursor_field,
|
1058
|
-
parameters=datetime_based_cursor_model.parameters or {},
|
1059
|
-
)
|
1060
|
-
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1061
|
-
|
1062
|
-
# Create the cursor factory
|
1063
|
-
cursor_factory = ConcurrentCursorFactory(
|
1064
|
-
partial(
|
1065
|
-
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1066
|
-
state_manager=state_manager,
|
1067
|
-
model_type=model_type,
|
1068
|
-
component_definition=component_definition,
|
1069
|
-
stream_name=stream_name,
|
1070
|
-
stream_namespace=stream_namespace,
|
1071
|
-
config=config,
|
1072
|
-
message_repository=NoopMessageRepository(),
|
1073
|
-
)
|
1074
|
-
)
|
1075
|
-
|
1076
|
-
# Return the concurrent cursor and state converter
|
1077
|
-
return ConcurrentPerPartitionCursor(
|
1078
|
-
cursor_factory=cursor_factory,
|
1079
|
-
partition_router=partition_router,
|
1080
|
-
stream_name=stream_name,
|
1081
|
-
stream_namespace=stream_namespace,
|
1082
|
-
stream_state=stream_state,
|
1083
|
-
message_repository=self._message_repository, # type: ignore
|
1084
|
-
connector_state_manager=state_manager,
|
1085
|
-
cursor_field=cursor_field,
|
1086
|
-
)
|
1087
|
-
|
1088
1042
|
@staticmethod
|
1089
1043
|
def create_constant_backoff_strategy(
|
1090
1044
|
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
@@ -1099,17 +1053,17 @@ class ModelToComponentFactory:
|
|
1099
1053
|
self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
|
1100
1054
|
) -> CursorPaginationStrategy:
|
1101
1055
|
if isinstance(decoder, PaginationDecoderDecorator):
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1056
|
+
inner_decoder = decoder.decoder
|
1057
|
+
else:
|
1058
|
+
inner_decoder = decoder
|
1059
|
+
decoder = PaginationDecoderDecorator(decoder=decoder)
|
1060
|
+
|
1061
|
+
if self._is_supported_decoder_for_pagination(inner_decoder):
|
1106
1062
|
decoder_to_use = decoder
|
1107
1063
|
else:
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
)
|
1112
|
-
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1064
|
+
raise ValueError(
|
1065
|
+
self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
|
1066
|
+
)
|
1113
1067
|
|
1114
1068
|
return CursorPaginationStrategy(
|
1115
1069
|
cursor_value=model.cursor_value,
|
@@ -1367,15 +1321,18 @@ class ModelToComponentFactory:
|
|
1367
1321
|
raise ValueError(
|
1368
1322
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1369
1323
|
)
|
1370
|
-
|
1371
|
-
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
|
1324
|
+
client_side_incremental_sync = {
|
1325
|
+
"date_time_based_cursor": self._create_component_from_model(
|
1326
|
+
model=model.incremental_sync, config=config
|
1327
|
+
),
|
1328
|
+
"substream_cursor": (
|
1329
|
+
combined_slicers
|
1330
|
+
if isinstance(
|
1331
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1332
|
+
)
|
1333
|
+
else None
|
1334
|
+
),
|
1335
|
+
}
|
1379
1336
|
|
1380
1337
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1381
1338
|
cursor_model = model.incremental_sync
|
@@ -1579,11 +1536,10 @@ class ModelToComponentFactory:
|
|
1579
1536
|
cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
|
1580
1537
|
) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
|
1581
1538
|
if decoder:
|
1582
|
-
if
|
1583
|
-
|
1584
|
-
|
1585
|
-
)
|
1586
|
-
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1539
|
+
if self._is_supported_decoder_for_pagination(decoder):
|
1540
|
+
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1541
|
+
else:
|
1542
|
+
raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
|
1587
1543
|
else:
|
1588
1544
|
decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
|
1589
1545
|
page_size_option = (
|
@@ -1745,7 +1701,11 @@ class ModelToComponentFactory:
|
|
1745
1701
|
|
1746
1702
|
@staticmethod
|
1747
1703
|
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1748
|
-
return TypesMap(
|
1704
|
+
return TypesMap(
|
1705
|
+
target_type=model.target_type,
|
1706
|
+
current_type=model.current_type,
|
1707
|
+
condition=model.condition if model.condition is not None else "True",
|
1708
|
+
)
|
1749
1709
|
|
1750
1710
|
def create_schema_type_identifier(
|
1751
1711
|
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
@@ -1812,6 +1772,11 @@ class ModelToComponentFactory:
|
|
1812
1772
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
1813
1773
|
return JsonDecoder(parameters={})
|
1814
1774
|
|
1775
|
+
@staticmethod
|
1776
|
+
def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
|
1777
|
+
encoding = model.encoding if model.encoding else "utf-8"
|
1778
|
+
return JsonParser(encoding=encoding)
|
1779
|
+
|
1815
1780
|
@staticmethod
|
1816
1781
|
def create_jsonl_decoder(
|
1817
1782
|
model: JsonlDecoderModel, config: Config, **kwargs: Any
|
@@ -1840,6 +1805,12 @@ class ModelToComponentFactory:
|
|
1840
1805
|
) -> GzipJsonDecoder:
|
1841
1806
|
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1842
1807
|
|
1808
|
+
def create_zipfile_decoder(
|
1809
|
+
self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
|
1810
|
+
) -> ZipfileDecoder:
|
1811
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
1812
|
+
return ZipfileDecoder(parser=parser)
|
1813
|
+
|
1843
1814
|
def create_gzip_parser(
|
1844
1815
|
self, model: GzipParserModel, config: Config, **kwargs: Any
|
1845
1816
|
) -> GzipParser:
|
@@ -1949,21 +1920,33 @@ class ModelToComponentFactory:
|
|
1949
1920
|
expires_in_name=InterpolatedString.create(
|
1950
1921
|
model.expires_in_name or "expires_in", parameters=model.parameters or {}
|
1951
1922
|
).eval(config),
|
1923
|
+
client_id_name=InterpolatedString.create(
|
1924
|
+
model.client_id_name or "client_id", parameters=model.parameters or {}
|
1925
|
+
).eval(config),
|
1952
1926
|
client_id=InterpolatedString.create(
|
1953
1927
|
model.client_id, parameters=model.parameters or {}
|
1954
1928
|
).eval(config),
|
1929
|
+
client_secret_name=InterpolatedString.create(
|
1930
|
+
model.client_secret_name or "client_secret", parameters=model.parameters or {}
|
1931
|
+
).eval(config),
|
1955
1932
|
client_secret=InterpolatedString.create(
|
1956
1933
|
model.client_secret, parameters=model.parameters or {}
|
1957
1934
|
).eval(config),
|
1958
1935
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
1959
1936
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
1960
1937
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
1938
|
+
grant_type_name=InterpolatedString.create(
|
1939
|
+
model.grant_type_name or "grant_type", parameters=model.parameters or {}
|
1940
|
+
).eval(config),
|
1961
1941
|
grant_type=InterpolatedString.create(
|
1962
1942
|
model.grant_type or "refresh_token", parameters=model.parameters or {}
|
1963
1943
|
).eval(config),
|
1964
1944
|
refresh_request_body=InterpolatedMapping(
|
1965
1945
|
model.refresh_request_body or {}, parameters=model.parameters or {}
|
1966
1946
|
).eval(config),
|
1947
|
+
refresh_request_headers=InterpolatedMapping(
|
1948
|
+
model.refresh_request_headers or {}, parameters=model.parameters or {}
|
1949
|
+
).eval(config),
|
1967
1950
|
scopes=model.scopes,
|
1968
1951
|
token_expiry_date_format=model.token_expiry_date_format,
|
1969
1952
|
message_repository=self._message_repository,
|
@@ -1975,11 +1958,16 @@ class ModelToComponentFactory:
|
|
1975
1958
|
return DeclarativeOauth2Authenticator( # type: ignore
|
1976
1959
|
access_token_name=model.access_token_name or "access_token",
|
1977
1960
|
access_token_value=model.access_token_value,
|
1961
|
+
client_id_name=model.client_id_name or "client_id",
|
1978
1962
|
client_id=model.client_id,
|
1963
|
+
client_secret_name=model.client_secret_name or "client_secret",
|
1979
1964
|
client_secret=model.client_secret,
|
1980
1965
|
expires_in_name=model.expires_in_name or "expires_in",
|
1966
|
+
grant_type_name=model.grant_type_name or "grant_type",
|
1981
1967
|
grant_type=model.grant_type or "refresh_token",
|
1982
1968
|
refresh_request_body=model.refresh_request_body,
|
1969
|
+
refresh_request_headers=model.refresh_request_headers,
|
1970
|
+
refresh_token_name=model.refresh_token_name or "refresh_token",
|
1983
1971
|
refresh_token=model.refresh_token,
|
1984
1972
|
scopes=model.scopes,
|
1985
1973
|
token_expiry_date=model.token_expiry_date,
|
@@ -1991,22 +1979,22 @@ class ModelToComponentFactory:
|
|
1991
1979
|
message_repository=self._message_repository,
|
1992
1980
|
)
|
1993
1981
|
|
1994
|
-
@staticmethod
|
1995
1982
|
def create_offset_increment(
|
1996
|
-
model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
|
1983
|
+
self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
|
1997
1984
|
) -> OffsetIncrement:
|
1998
1985
|
if isinstance(decoder, PaginationDecoderDecorator):
|
1999
|
-
|
2000
|
-
|
2001
|
-
|
2002
|
-
|
1986
|
+
inner_decoder = decoder.decoder
|
1987
|
+
else:
|
1988
|
+
inner_decoder = decoder
|
1989
|
+
decoder = PaginationDecoderDecorator(decoder=decoder)
|
1990
|
+
|
1991
|
+
if self._is_supported_decoder_for_pagination(inner_decoder):
|
2003
1992
|
decoder_to_use = decoder
|
2004
1993
|
else:
|
2005
|
-
|
2006
|
-
|
2007
|
-
|
2008
|
-
|
2009
|
-
decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
|
1994
|
+
raise ValueError(
|
1995
|
+
self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
|
1996
|
+
)
|
1997
|
+
|
2010
1998
|
return OffsetIncrement(
|
2011
1999
|
page_size=model.page_size,
|
2012
2000
|
config=config,
|
@@ -2191,7 +2179,7 @@ class ModelToComponentFactory:
|
|
2191
2179
|
if (
|
2192
2180
|
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2193
2181
|
or type(stream_slicer) is not DatetimeBasedCursor
|
2194
|
-
)
|
2182
|
+
):
|
2195
2183
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2196
2184
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2197
2185
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -2351,7 +2339,7 @@ class ModelToComponentFactory:
|
|
2351
2339
|
extractor=download_extractor,
|
2352
2340
|
name=name,
|
2353
2341
|
record_filter=None,
|
2354
|
-
transformations=
|
2342
|
+
transformations=transformations,
|
2355
2343
|
schema_normalization=TypeTransformer(TransformConfig.NoTransform),
|
2356
2344
|
config=config,
|
2357
2345
|
parameters={},
|
@@ -2388,6 +2376,16 @@ class ModelToComponentFactory:
|
|
2388
2376
|
if model.delete_requester
|
2389
2377
|
else None
|
2390
2378
|
)
|
2379
|
+
url_requester = (
|
2380
|
+
self._create_component_from_model(
|
2381
|
+
model=model.url_requester,
|
2382
|
+
decoder=decoder,
|
2383
|
+
config=config,
|
2384
|
+
name=f"job extract_url - {name}",
|
2385
|
+
)
|
2386
|
+
if model.url_requester
|
2387
|
+
else None
|
2388
|
+
)
|
2391
2389
|
status_extractor = self._create_component_from_model(
|
2392
2390
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2393
2391
|
)
|
@@ -2398,6 +2396,7 @@ class ModelToComponentFactory:
|
|
2398
2396
|
creation_requester=creation_requester,
|
2399
2397
|
polling_requester=polling_requester,
|
2400
2398
|
download_retriever=download_retriever,
|
2399
|
+
url_requester=url_requester,
|
2401
2400
|
abort_requester=abort_requester,
|
2402
2401
|
delete_requester=delete_requester,
|
2403
2402
|
status_extractor=status_extractor,
|
@@ -2595,3 +2594,25 @@ class ModelToComponentFactory:
|
|
2595
2594
|
components_mapping=components_mapping,
|
2596
2595
|
parameters=model.parameters or {},
|
2597
2596
|
)
|
2597
|
+
|
2598
|
+
_UNSUPPORTED_DECODER_ERROR = (
|
2599
|
+
"Specified decoder of {decoder_type} is not supported for pagination."
|
2600
|
+
"Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
|
2601
|
+
"If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
|
2602
|
+
)
|
2603
|
+
|
2604
|
+
def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
|
2605
|
+
if isinstance(decoder, (JsonDecoder, XmlDecoder)):
|
2606
|
+
return True
|
2607
|
+
elif isinstance(decoder, CompositeRawDecoder):
|
2608
|
+
return self._is_supported_parser_for_pagination(decoder.parser)
|
2609
|
+
else:
|
2610
|
+
return False
|
2611
|
+
|
2612
|
+
def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
|
2613
|
+
if isinstance(parser, JsonParser):
|
2614
|
+
return True
|
2615
|
+
elif isinstance(parser, GzipParser):
|
2616
|
+
return isinstance(parser.inner_parser, JsonParser)
|
2617
|
+
else:
|
2618
|
+
return False
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# AsyncHttpJobRepository sequence diagram
|
2
|
+
|
3
|
+
- Components marked as optional are not required and can be ignored.
|
4
|
+
- if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
|
5
|
+
- interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
|
6
|
+
|
7
|
+
```mermaid
|
8
|
+
---
|
9
|
+
title: AsyncHttpJobRepository Sequence Diagram
|
10
|
+
---
|
11
|
+
sequenceDiagram
|
12
|
+
participant AsyncHttpJobRepository as AsyncOrchestrator
|
13
|
+
participant CreationRequester as creation_requester
|
14
|
+
participant PollingRequester as polling_requester
|
15
|
+
participant UrlRequester as url_requester (Optional)
|
16
|
+
participant DownloadRetriever as download_retriever
|
17
|
+
participant AbortRequester as abort_requester (Optional)
|
18
|
+
participant DeleteRequester as delete_requester (Optional)
|
19
|
+
participant Reporting Server as Async Reporting Server
|
20
|
+
|
21
|
+
AsyncHttpJobRepository ->> CreationRequester: Initiate job creation
|
22
|
+
CreationRequester ->> Reporting Server: Create job request
|
23
|
+
Reporting Server -->> CreationRequester: Job ID response
|
24
|
+
CreationRequester -->> AsyncHttpJobRepository: Job ID
|
25
|
+
|
26
|
+
loop Poll for job status
|
27
|
+
AsyncHttpJobRepository ->> PollingRequester: Check job status
|
28
|
+
PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
|
29
|
+
Reporting Server -->> PollingRequester: Status response
|
30
|
+
PollingRequester -->> AsyncHttpJobRepository: Job status
|
31
|
+
end
|
32
|
+
|
33
|
+
alt Status: Ready
|
34
|
+
AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
|
35
|
+
UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
|
36
|
+
Reporting Server -->> UrlRequester: Download URLs
|
37
|
+
UrlRequester -->> AsyncHttpJobRepository: Download URLs
|
38
|
+
|
39
|
+
AsyncHttpJobRepository ->> DownloadRetriever: Download reports
|
40
|
+
DownloadRetriever ->> Reporting Server: Retrieve report data (interpolation_context: `url`)
|
41
|
+
Reporting Server -->> DownloadRetriever: Report data
|
42
|
+
DownloadRetriever -->> AsyncHttpJobRepository: Report data
|
43
|
+
else Status: Failed
|
44
|
+
AsyncHttpJobRepository ->> AbortRequester: Send abort request
|
45
|
+
AbortRequester ->> Reporting Server: Abort job
|
46
|
+
Reporting Server -->> AbortRequester: Abort confirmation
|
47
|
+
AbortRequester -->> AsyncHttpJobRepository: Confirmation
|
48
|
+
end
|
49
|
+
|
50
|
+
AsyncHttpJobRepository ->> DeleteRequester: Send delete job request
|
51
|
+
DeleteRequester ->> Reporting Server: Delete job
|
52
|
+
Reporting Server -->> DeleteRequester: Deletion confirmation
|
53
|
+
DeleteRequester -->> AsyncHttpJobRepository: Confirmation
|
54
|
+
|
55
|
+
|
56
|
+
```
|
@@ -31,6 +31,10 @@ LOGGER = logging.getLogger("airbyte")
|
|
31
31
|
|
32
32
|
@dataclass
|
33
33
|
class AsyncHttpJobRepository(AsyncJobRepository):
|
34
|
+
"""
|
35
|
+
See Readme file for more details about flow.
|
36
|
+
"""
|
37
|
+
|
34
38
|
creation_requester: Requester
|
35
39
|
polling_requester: Requester
|
36
40
|
download_retriever: SimpleRetriever
|
@@ -44,6 +48,9 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
44
48
|
record_extractor: RecordExtractor = field(
|
45
49
|
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
46
50
|
)
|
51
|
+
url_requester: Optional[Requester] = (
|
52
|
+
None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
|
53
|
+
)
|
47
54
|
|
48
55
|
def __post_init__(self) -> None:
|
49
56
|
self._create_job_response_by_id: Dict[str, Response] = {}
|
@@ -186,10 +193,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
186
193
|
|
187
194
|
"""
|
188
195
|
|
189
|
-
for url in self.
|
190
|
-
|
191
|
-
|
192
|
-
|
196
|
+
for url in self._get_download_url(job):
|
197
|
+
job_slice = job.job_parameters()
|
198
|
+
stream_slice = StreamSlice(
|
199
|
+
partition=job_slice.partition,
|
200
|
+
cursor_slice=job_slice.cursor_slice,
|
201
|
+
extra_fields={**job_slice.extra_fields, "url": url},
|
202
|
+
)
|
193
203
|
for message in self.download_retriever.read_records({}, stream_slice):
|
194
204
|
if isinstance(message, Record):
|
195
205
|
yield message.data
|
@@ -226,3 +236,22 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
226
236
|
cursor_slice={},
|
227
237
|
)
|
228
238
|
return stream_slice
|
239
|
+
|
240
|
+
def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
|
241
|
+
if not self.url_requester:
|
242
|
+
url_response = self._polling_job_response_by_id[job.api_job_id()]
|
243
|
+
else:
|
244
|
+
stream_slice: StreamSlice = StreamSlice(
|
245
|
+
partition={
|
246
|
+
"polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
|
247
|
+
},
|
248
|
+
cursor_slice={},
|
249
|
+
)
|
250
|
+
url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
|
251
|
+
if not url_response:
|
252
|
+
raise AirbyteTracedException(
|
253
|
+
internal_message="Always expect a response or an exception from url_requester",
|
254
|
+
failure_type=FailureType.system_error,
|
255
|
+
)
|
256
|
+
|
257
|
+
yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
|
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
160
160
|
stream_slice,
|
161
161
|
next_page_token,
|
162
162
|
self._paginator.get_request_headers,
|
163
|
-
self.
|
163
|
+
self.stream_slicer.get_request_headers,
|
164
164
|
)
|
165
165
|
if isinstance(headers, str):
|
166
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -10,6 +10,7 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
|
10
10
|
import dpath
|
11
11
|
from typing_extensions import deprecated
|
12
12
|
|
13
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
13
14
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
15
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
16
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
@@ -53,6 +54,7 @@ class TypesMap:
|
|
53
54
|
|
54
55
|
target_type: Union[List[str], str]
|
55
56
|
current_type: Union[List[str], str]
|
57
|
+
condition: Optional[str]
|
56
58
|
|
57
59
|
|
58
60
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
@@ -177,7 +179,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
177
179
|
if field_type_path
|
178
180
|
else "string"
|
179
181
|
)
|
180
|
-
mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
|
182
|
+
mapped_field_type = self._replace_type_if_not_valid(raw_field_type, raw_schema)
|
181
183
|
if (
|
182
184
|
isinstance(mapped_field_type, list)
|
183
185
|
and len(mapped_field_type) == 2
|
@@ -194,14 +196,22 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
194
196
|
)
|
195
197
|
|
196
198
|
def _replace_type_if_not_valid(
|
197
|
-
self,
|
199
|
+
self,
|
200
|
+
field_type: Union[List[str], str],
|
201
|
+
raw_schema: MutableMapping[str, Any],
|
198
202
|
) -> Union[List[str], str]:
|
199
203
|
"""
|
200
204
|
Replaces a field type if it matches a type mapping in `types_map`.
|
201
205
|
"""
|
202
206
|
if self.schema_type_identifier.types_mapping:
|
203
207
|
for types_map in self.schema_type_identifier.types_mapping:
|
204
|
-
if
|
208
|
+
# conditional is optional param, setting to true if not provided
|
209
|
+
condition = InterpolatedBoolean(
|
210
|
+
condition=types_map.condition if types_map.condition is not None else "True",
|
211
|
+
parameters={},
|
212
|
+
).eval(config=self.config, raw_schema=raw_schema)
|
213
|
+
|
214
|
+
if field_type == types_map.current_type and condition:
|
205
215
|
return types_map.target_type
|
206
216
|
return field_type
|
207
217
|
|
@@ -31,6 +31,17 @@ class DeliverRawFiles(BaseModel):
|
|
31
31
|
|
32
32
|
delivery_type: Literal["use_file_transfer"] = Field("use_file_transfer", const=True)
|
33
33
|
|
34
|
+
preserve_directory_structure: bool = Field(
|
35
|
+
title="Preserve Sub-Directories in File Paths",
|
36
|
+
description=(
|
37
|
+
"If enabled, sends subdirectory folder structure "
|
38
|
+
"along with source file names to the destination. "
|
39
|
+
"Otherwise, files will be synced by their names only. "
|
40
|
+
"This option is ignored when file-based replication is not enabled."
|
41
|
+
),
|
42
|
+
default=True,
|
43
|
+
)
|
44
|
+
|
34
45
|
|
35
46
|
class AbstractFileBasedSpec(BaseModel):
|
36
47
|
"""
|