airbyte-cdk 6.12.4.dev0__py3-none-any.whl → 6.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +93 -34
- airbyte_cdk/cli/source_declarative_manifest/__init__.py +0 -1
- airbyte_cdk/models/__init__.py +10 -11
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +1 -1
- airbyte_cdk/sources/declarative/auth/__init__.py +2 -5
- airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -57
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +78 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +21 -3
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +97 -0
- airbyte_cdk/sources/declarative/extractors/__init__.py +10 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +10 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -14
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +49 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +96 -80
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +23 -5
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py +19 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/__init__.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/__init__.py +14 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/__init__.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +4 -1
- airbyte_cdk/sources/declarative/requesters/request_options/__init__.py +12 -3
- airbyte_cdk/sources/declarative/resolvers/__init__.py +31 -8
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
- airbyte_cdk/sources/declarative/retrievers/__init__.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +9 -32
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/__init__.py +14 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/file_based/availability_strategy/__init__.py +9 -2
- airbyte_cdk/sources/file_based/discovery_policy/__init__.py +6 -2
- airbyte_cdk/sources/file_based/file_types/__init__.py +12 -3
- airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py +3 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +5 -1
- airbyte_cdk/sources/message/__init__.py +7 -1
- airbyte_cdk/sources/streams/__init__.py +1 -1
- airbyte_cdk/sources/streams/checkpoint/__init__.py +2 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +0 -1
- airbyte_cdk/sources/streams/http/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/error_handlers/__init__.py +2 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +8 -3
- airbyte_cdk/test/mock_http/__init__.py +1 -1
- airbyte_cdk/test/mock_http/mocker.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/__init__.py +1 -1
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/METADATA +3 -3
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/RECORD +53 -52
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -344
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.4.dev0.dist-info → airbyte_cdk-6.13.0.dist-info}/entry_points.txt +0 -0
@@ -6,6 +6,14 @@ from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtr
|
|
6
6
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
7
7
|
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
|
8
8
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
9
|
-
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import
|
9
|
+
from airbyte_cdk.sources.declarative.extractors.response_to_file_extractor import (
|
10
|
+
ResponseToFileExtractor,
|
11
|
+
)
|
10
12
|
|
11
|
-
__all__ = [
|
13
|
+
__all__ = [
|
14
|
+
"HttpSelector",
|
15
|
+
"DpathExtractor",
|
16
|
+
"RecordFilter",
|
17
|
+
"RecordSelector",
|
18
|
+
"ResponseToFileExtractor",
|
19
|
+
]
|
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
59
59
|
|
60
60
|
def __init__(
|
61
61
|
self,
|
62
|
-
|
62
|
+
date_time_based_cursor: DatetimeBasedCursor,
|
63
|
+
substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
|
63
64
|
**kwargs: Any,
|
64
65
|
):
|
65
66
|
super().__init__(**kwargs)
|
66
|
-
self.
|
67
|
+
self._date_time_based_cursor = date_time_based_cursor
|
68
|
+
self._substream_cursor = substream_cursor
|
67
69
|
|
68
70
|
def filter_records(
|
69
71
|
self,
|
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
75
77
|
records = (
|
76
78
|
record
|
77
79
|
for record in records
|
78
|
-
if self.
|
80
|
+
if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
|
79
81
|
# Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
|
80
82
|
# Record stream name is empty cause it is not used durig the filtering
|
81
83
|
Record(data=record, associated_slice=stream_slice, stream_name="")
|
@@ -2,12 +2,18 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import ConcurrentCursorFactory, ConcurrentPerPartitionCursor
|
6
5
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
7
6
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
8
|
-
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import
|
9
|
-
|
10
|
-
|
7
|
+
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
8
|
+
GlobalSubstreamCursor,
|
9
|
+
)
|
10
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import (
|
11
|
+
CursorFactory,
|
12
|
+
PerPartitionCursor,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
15
|
+
PerPartitionWithGlobalCursor,
|
16
|
+
)
|
11
17
|
from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor import (
|
12
18
|
ChildPartitionResumableFullRefreshCursor,
|
13
19
|
ResumableFullRefreshCursor,
|
@@ -15,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
|
|
15
21
|
|
16
22
|
__all__ = [
|
17
23
|
"CursorFactory",
|
18
|
-
"ConcurrentCursorFactory"
|
19
|
-
"ConcurrentPerPartitionCursor",
|
20
24
|
"DatetimeBasedCursor",
|
21
25
|
"DeclarativeCursor",
|
22
26
|
"GlobalSubstreamCursor",
|
@@ -303,20 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
303
303
|
raise ValueError("A partition needs to be provided in order to get request body json")
|
304
304
|
|
305
305
|
def should_be_synced(self, record: Record) -> bool:
|
306
|
-
if (
|
307
|
-
self._to_partition_key(record.associated_slice.partition)
|
308
|
-
not in self._cursor_per_partition
|
309
|
-
):
|
310
|
-
partition_state = (
|
311
|
-
self._state_to_migrate_from
|
312
|
-
if self._state_to_migrate_from
|
313
|
-
else self._NO_CURSOR_STATE
|
314
|
-
)
|
315
|
-
cursor = self._create_cursor(partition_state)
|
316
|
-
|
317
|
-
self._cursor_per_partition[
|
318
|
-
self._to_partition_key(record.associated_slice.partition)
|
319
|
-
] = cursor
|
320
306
|
return self._get_cursor(record).should_be_synced(
|
321
307
|
self._convert_record_to_cursor_record(record)
|
322
308
|
)
|
@@ -489,8 +489,8 @@ class OAuthAuthenticator(BaseModel):
|
|
489
489
|
],
|
490
490
|
title="Refresh Token",
|
491
491
|
)
|
492
|
-
token_refresh_endpoint: str = Field(
|
493
|
-
|
492
|
+
token_refresh_endpoint: Optional[str] = Field(
|
493
|
+
None,
|
494
494
|
description="The full URL to call to obtain a new access token.",
|
495
495
|
examples=["https://connect.squareup.com/oauth2/token"],
|
496
496
|
title="Token Refresh Endpoint",
|
@@ -501,6 +501,12 @@ class OAuthAuthenticator(BaseModel):
|
|
501
501
|
examples=["access_token"],
|
502
502
|
title="Access Token Property Name",
|
503
503
|
)
|
504
|
+
access_token_value: Optional[str] = Field(
|
505
|
+
None,
|
506
|
+
description="The value of the access_token to bypass the token refreshing using `refresh_token`.",
|
507
|
+
examples=["secret_access_token_value"],
|
508
|
+
title="Access Token Value",
|
509
|
+
)
|
504
510
|
expires_in_name: Optional[str] = Field(
|
505
511
|
"expires_in",
|
506
512
|
description="The name of the property which contains the expiry date in the response from the token refresh endpoint.",
|
@@ -1119,6 +1125,17 @@ class LegacySessionTokenAuthenticator(BaseModel):
|
|
1119
1125
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1120
1126
|
|
1121
1127
|
|
1128
|
+
class JsonLineParser(BaseModel):
|
1129
|
+
type: Literal["JsonLineParser"]
|
1130
|
+
encoding: Optional[str] = "utf-8"
|
1131
|
+
|
1132
|
+
|
1133
|
+
class CsvParser(BaseModel):
|
1134
|
+
type: Literal["CsvParser"]
|
1135
|
+
encoding: Optional[str] = "utf-8"
|
1136
|
+
delimiter: Optional[str] = ","
|
1137
|
+
|
1138
|
+
|
1122
1139
|
class AsyncJobStatusMap(BaseModel):
|
1123
1140
|
type: Optional[Literal["AsyncJobStatusMap"]] = None
|
1124
1141
|
running: List[str]
|
@@ -1202,6 +1219,8 @@ class ComponentMappingDefinition(BaseModel):
|
|
1202
1219
|
"{{ components_values['updates'] }}",
|
1203
1220
|
"{{ components_values['MetaData']['LastUpdatedTime'] }}",
|
1204
1221
|
"{{ config['segment_id'] }}",
|
1222
|
+
"{{ stream_slice['parent_id'] }}",
|
1223
|
+
"{{ stream_slice['extra_fields']['name'] }}",
|
1205
1224
|
],
|
1206
1225
|
title="Value",
|
1207
1226
|
)
|
@@ -1498,6 +1517,11 @@ class RecordSelector(BaseModel):
|
|
1498
1517
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1499
1518
|
|
1500
1519
|
|
1520
|
+
class GzipParser(BaseModel):
|
1521
|
+
type: Literal["GzipParser"]
|
1522
|
+
inner_parser: Union[JsonLineParser, CsvParser]
|
1523
|
+
|
1524
|
+
|
1501
1525
|
class Spec(BaseModel):
|
1502
1526
|
type: Literal["Spec"]
|
1503
1527
|
connection_specification: Dict[str, Any] = Field(
|
@@ -1528,6 +1552,11 @@ class CompositeErrorHandler(BaseModel):
|
|
1528
1552
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1529
1553
|
|
1530
1554
|
|
1555
|
+
class CompositeRawDecoder(BaseModel):
|
1556
|
+
type: Literal["CompositeRawDecoder"]
|
1557
|
+
parser: Union[GzipParser, JsonLineParser, CsvParser]
|
1558
|
+
|
1559
|
+
|
1531
1560
|
class DeclarativeSource1(BaseModel):
|
1532
1561
|
class Config:
|
1533
1562
|
extra = Extra.forbid
|
@@ -1671,6 +1700,7 @@ class DeclarativeStream(BaseModel):
|
|
1671
1700
|
RemoveFields,
|
1672
1701
|
KeysToLower,
|
1673
1702
|
KeysToSnakeCase,
|
1703
|
+
FlattenFields,
|
1674
1704
|
]
|
1675
1705
|
]
|
1676
1706
|
] = Field(
|
@@ -1836,6 +1866,22 @@ class DynamicSchemaLoader(BaseModel):
|
|
1836
1866
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1837
1867
|
title="Retriever",
|
1838
1868
|
)
|
1869
|
+
schema_transformations: Optional[
|
1870
|
+
List[
|
1871
|
+
Union[
|
1872
|
+
AddFields,
|
1873
|
+
CustomTransformation,
|
1874
|
+
RemoveFields,
|
1875
|
+
KeysToLower,
|
1876
|
+
KeysToSnakeCase,
|
1877
|
+
FlattenFields,
|
1878
|
+
]
|
1879
|
+
]
|
1880
|
+
] = Field(
|
1881
|
+
None,
|
1882
|
+
description="A list of transformations to be applied to the schema.",
|
1883
|
+
title="Schema Transformations",
|
1884
|
+
)
|
1839
1885
|
schema_type_identifier: SchemaTypeIdentifier
|
1840
1886
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1841
1887
|
|
@@ -1913,6 +1959,7 @@ class SimpleRetriever(BaseModel):
|
|
1913
1959
|
IterableDecoder,
|
1914
1960
|
XmlDecoder,
|
1915
1961
|
GzipJsonDecoder,
|
1962
|
+
CompositeRawDecoder,
|
1916
1963
|
]
|
1917
1964
|
] = Field(
|
1918
1965
|
None,
|
@@ -67,6 +67,12 @@ from airbyte_cdk.sources.declarative.decoders import (
|
|
67
67
|
PaginationDecoderDecorator,
|
68
68
|
XmlDecoder,
|
69
69
|
)
|
70
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
71
|
+
CompositeRawDecoder,
|
72
|
+
CsvParser,
|
73
|
+
GzipParser,
|
74
|
+
JsonLineParser,
|
75
|
+
)
|
70
76
|
from airbyte_cdk.sources.declarative.extractors import (
|
71
77
|
DpathExtractor,
|
72
78
|
RecordFilter,
|
@@ -81,8 +87,6 @@ from airbyte_cdk.sources.declarative.extractors.record_selector import (
|
|
81
87
|
)
|
82
88
|
from airbyte_cdk.sources.declarative.incremental import (
|
83
89
|
ChildPartitionResumableFullRefreshCursor,
|
84
|
-
ConcurrentCursorFactory,
|
85
|
-
ConcurrentPerPartitionCursor,
|
86
90
|
CursorFactory,
|
87
91
|
DatetimeBasedCursor,
|
88
92
|
DeclarativeCursor,
|
@@ -127,6 +131,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
127
131
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
128
132
|
CompositeErrorHandler as CompositeErrorHandlerModel,
|
129
133
|
)
|
134
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
135
|
+
CompositeRawDecoder as CompositeRawDecoderModel,
|
136
|
+
)
|
130
137
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
131
138
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
132
139
|
)
|
@@ -136,6 +143,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
136
143
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
137
144
|
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
138
145
|
)
|
146
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
147
|
+
CsvParser as CsvParserModel,
|
148
|
+
)
|
139
149
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
140
150
|
CursorPagination as CursorPaginationModel,
|
141
151
|
)
|
@@ -205,6 +215,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
205
215
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
206
216
|
GzipJsonDecoder as GzipJsonDecoderModel,
|
207
217
|
)
|
218
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
219
|
+
GzipParser as GzipParserModel,
|
220
|
+
)
|
208
221
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
209
222
|
HttpComponentsResolver as HttpComponentsResolverModel,
|
210
223
|
)
|
@@ -229,6 +242,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
229
242
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
230
243
|
JsonlDecoder as JsonlDecoderModel,
|
231
244
|
)
|
245
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
246
|
+
JsonLineParser as JsonLineParserModel,
|
247
|
+
)
|
232
248
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
233
249
|
JwtAuthenticator as JwtAuthenticatorModel,
|
234
250
|
)
|
@@ -241,6 +257,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
241
257
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
242
258
|
KeysToLower as KeysToLowerModel,
|
243
259
|
)
|
260
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
261
|
+
KeysToSnakeCase as KeysToSnakeCaseModel,
|
262
|
+
)
|
244
263
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
245
264
|
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
246
265
|
)
|
@@ -328,6 +347,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
|
|
328
347
|
SinglePartitionRouter,
|
329
348
|
SubstreamPartitionRouter,
|
330
349
|
)
|
350
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
351
|
+
AsyncJobPartitionRouter,
|
352
|
+
)
|
331
353
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
332
354
|
ParentStreamConfig,
|
333
355
|
)
|
@@ -398,11 +420,13 @@ from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
|
398
420
|
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
399
421
|
KeysToLowerTransformation,
|
400
422
|
)
|
423
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
|
424
|
+
KeysToSnakeCaseTransformation,
|
425
|
+
)
|
401
426
|
from airbyte_cdk.sources.message import (
|
402
427
|
InMemoryMessageRepository,
|
403
428
|
LogAppenderMessageRepositoryDecorator,
|
404
429
|
MessageRepository,
|
405
|
-
NoopMessageRepository,
|
406
430
|
)
|
407
431
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
408
432
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -449,6 +473,7 @@ class ModelToComponentFactory:
|
|
449
473
|
BearerAuthenticatorModel: self.create_bearer_authenticator,
|
450
474
|
CheckStreamModel: self.create_check_stream,
|
451
475
|
CompositeErrorHandlerModel: self.create_composite_error_handler,
|
476
|
+
CompositeRawDecoderModel: self.create_composite_raw_decoder,
|
452
477
|
ConcurrencyLevelModel: self.create_concurrency_level,
|
453
478
|
ConstantBackoffStrategyModel: self.create_constant_backoff_strategy,
|
454
479
|
CursorPaginationModel: self.create_cursor_pagination,
|
@@ -479,8 +504,11 @@ class ModelToComponentFactory:
|
|
479
504
|
InlineSchemaLoaderModel: self.create_inline_schema_loader,
|
480
505
|
JsonDecoderModel: self.create_json_decoder,
|
481
506
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
507
|
+
JsonLineParserModel: self.create_json_line_parser,
|
482
508
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
509
|
+
GzipParserModel: self.create_gzip_parser,
|
483
510
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
511
|
+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
484
512
|
FlattenFieldsModel: self.create_flatten_fields,
|
485
513
|
IterableDecoderModel: self.create_iterable_decoder,
|
486
514
|
XmlDecoderModel: self.create_xml_decoder,
|
@@ -597,6 +625,11 @@ class ModelToComponentFactory:
|
|
597
625
|
) -> KeysToLowerTransformation:
|
598
626
|
return KeysToLowerTransformation()
|
599
627
|
|
628
|
+
def create_keys_to_snake_transformation(
|
629
|
+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
|
630
|
+
) -> KeysToSnakeCaseTransformation:
|
631
|
+
return KeysToSnakeCaseTransformation()
|
632
|
+
|
600
633
|
def create_flatten_fields(
|
601
634
|
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
602
635
|
) -> FlattenFields:
|
@@ -811,7 +844,6 @@ class ModelToComponentFactory:
|
|
811
844
|
stream_namespace: Optional[str],
|
812
845
|
config: Config,
|
813
846
|
stream_state: MutableMapping[str, Any],
|
814
|
-
message_repository: Optional[MessageRepository] = None,
|
815
847
|
**kwargs: Any,
|
816
848
|
) -> ConcurrentCursor:
|
817
849
|
component_type = component_definition.get("type")
|
@@ -947,7 +979,7 @@ class ModelToComponentFactory:
|
|
947
979
|
stream_name=stream_name,
|
948
980
|
stream_namespace=stream_namespace,
|
949
981
|
stream_state=stream_state,
|
950
|
-
message_repository=
|
982
|
+
message_repository=self._message_repository,
|
951
983
|
connector_state_manager=state_manager,
|
952
984
|
connector_state_converter=connector_state_converter,
|
953
985
|
cursor_field=cursor_field,
|
@@ -959,63 +991,6 @@ class ModelToComponentFactory:
|
|
959
991
|
cursor_granularity=cursor_granularity,
|
960
992
|
)
|
961
993
|
|
962
|
-
def create_concurrent_cursor_from_perpartition_cursor(
|
963
|
-
self,
|
964
|
-
state_manager: ConnectorStateManager,
|
965
|
-
model_type: Type[BaseModel],
|
966
|
-
component_definition: ComponentDefinition,
|
967
|
-
stream_name: str,
|
968
|
-
stream_namespace: Optional[str],
|
969
|
-
config: Config,
|
970
|
-
stream_state: MutableMapping[str, Any],
|
971
|
-
partition_router,
|
972
|
-
**kwargs: Any,
|
973
|
-
) -> ConcurrentPerPartitionCursor:
|
974
|
-
component_type = component_definition.get("type")
|
975
|
-
if component_definition.get("type") != model_type.__name__:
|
976
|
-
raise ValueError(
|
977
|
-
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
978
|
-
)
|
979
|
-
|
980
|
-
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
981
|
-
|
982
|
-
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
983
|
-
raise ValueError(
|
984
|
-
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
985
|
-
)
|
986
|
-
|
987
|
-
interpolated_cursor_field = InterpolatedString.create(
|
988
|
-
datetime_based_cursor_model.cursor_field,
|
989
|
-
parameters=datetime_based_cursor_model.parameters or {},
|
990
|
-
)
|
991
|
-
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
992
|
-
|
993
|
-
# Create the cursor factory
|
994
|
-
cursor_factory = ConcurrentCursorFactory(
|
995
|
-
partial(
|
996
|
-
self.create_concurrent_cursor_from_datetime_based_cursor,
|
997
|
-
state_manager=state_manager,
|
998
|
-
model_type=model_type,
|
999
|
-
component_definition=component_definition,
|
1000
|
-
stream_name=stream_name,
|
1001
|
-
stream_namespace=stream_namespace,
|
1002
|
-
config=config,
|
1003
|
-
message_repository=NoopMessageRepository(),
|
1004
|
-
)
|
1005
|
-
)
|
1006
|
-
|
1007
|
-
# Return the concurrent cursor and state converter
|
1008
|
-
return ConcurrentPerPartitionCursor(
|
1009
|
-
cursor_factory=cursor_factory,
|
1010
|
-
partition_router=partition_router,
|
1011
|
-
stream_name=stream_name,
|
1012
|
-
stream_namespace=stream_namespace,
|
1013
|
-
stream_state=stream_state,
|
1014
|
-
message_repository=self._message_repository, # type: ignore
|
1015
|
-
connector_state_manager=state_manager,
|
1016
|
-
cursor_field=cursor_field,
|
1017
|
-
)
|
1018
|
-
|
1019
994
|
@staticmethod
|
1020
995
|
def create_constant_backoff_strategy(
|
1021
996
|
model: ConstantBackoffStrategyModel, config: Config, **kwargs: Any
|
@@ -1298,15 +1273,18 @@ class ModelToComponentFactory:
|
|
1298
1273
|
raise ValueError(
|
1299
1274
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1300
1275
|
)
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1276
|
+
client_side_incremental_sync = {
|
1277
|
+
"date_time_based_cursor": self._create_component_from_model(
|
1278
|
+
model=model.incremental_sync, config=config
|
1279
|
+
),
|
1280
|
+
"substream_cursor": (
|
1281
|
+
combined_slicers
|
1282
|
+
if isinstance(
|
1283
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1284
|
+
)
|
1285
|
+
else None
|
1286
|
+
),
|
1287
|
+
}
|
1310
1288
|
|
1311
1289
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1312
1290
|
cursor_model = model.incremental_sync
|
@@ -1708,6 +1686,13 @@ class ModelToComponentFactory:
|
|
1708
1686
|
model.retriever, stream_slicer
|
1709
1687
|
)
|
1710
1688
|
|
1689
|
+
schema_transformations = []
|
1690
|
+
if model.schema_transformations:
|
1691
|
+
for transformation_model in model.schema_transformations:
|
1692
|
+
schema_transformations.append(
|
1693
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1694
|
+
)
|
1695
|
+
|
1711
1696
|
retriever = self._create_component_from_model(
|
1712
1697
|
model=model.retriever,
|
1713
1698
|
config=config,
|
@@ -1722,6 +1707,7 @@ class ModelToComponentFactory:
|
|
1722
1707
|
return DynamicSchemaLoader(
|
1723
1708
|
retriever=retriever,
|
1724
1709
|
config=config,
|
1710
|
+
schema_transformations=schema_transformations,
|
1725
1711
|
schema_type_identifier=schema_type_identifier,
|
1726
1712
|
parameters=model.parameters or {},
|
1727
1713
|
)
|
@@ -1736,6 +1722,12 @@ class ModelToComponentFactory:
|
|
1736
1722
|
) -> JsonlDecoder:
|
1737
1723
|
return JsonlDecoder(parameters={})
|
1738
1724
|
|
1725
|
+
@staticmethod
|
1726
|
+
def create_json_line_parser(
|
1727
|
+
model: JsonLineParserModel, config: Config, **kwargs: Any
|
1728
|
+
) -> JsonLineParser:
|
1729
|
+
return JsonLineParser(encoding=model.encoding)
|
1730
|
+
|
1739
1731
|
@staticmethod
|
1740
1732
|
def create_iterable_decoder(
|
1741
1733
|
model: IterableDecoderModel, config: Config, **kwargs: Any
|
@@ -1752,6 +1744,22 @@ class ModelToComponentFactory:
|
|
1752
1744
|
) -> GzipJsonDecoder:
|
1753
1745
|
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1754
1746
|
|
1747
|
+
def create_gzip_parser(
|
1748
|
+
self, model: GzipParserModel, config: Config, **kwargs: Any
|
1749
|
+
) -> GzipParser:
|
1750
|
+
inner_parser = self._create_component_from_model(model=model.inner_parser, config=config)
|
1751
|
+
return GzipParser(inner_parser=inner_parser)
|
1752
|
+
|
1753
|
+
@staticmethod
|
1754
|
+
def create_csv_parser(model: CsvParserModel, config: Config, **kwargs: Any) -> CsvParser:
|
1755
|
+
return CsvParser(encoding=model.encoding, delimiter=model.delimiter)
|
1756
|
+
|
1757
|
+
def create_composite_raw_decoder(
|
1758
|
+
self, model: CompositeRawDecoderModel, config: Config, **kwargs: Any
|
1759
|
+
) -> CompositeRawDecoder:
|
1760
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
1761
|
+
return CompositeRawDecoder(parser=parser)
|
1762
|
+
|
1755
1763
|
@staticmethod
|
1756
1764
|
def create_json_file_schema_loader(
|
1757
1765
|
model: JsonFileSchemaLoaderModel, config: Config, **kwargs: Any
|
@@ -1835,7 +1843,8 @@ class ModelToComponentFactory:
|
|
1835
1843
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
1836
1844
|
config,
|
1837
1845
|
InterpolatedString.create(
|
1838
|
-
model.token_refresh_endpoint,
|
1846
|
+
model.token_refresh_endpoint, # type: ignore
|
1847
|
+
parameters=model.parameters or {},
|
1839
1848
|
).eval(config),
|
1840
1849
|
access_token_name=InterpolatedString.create(
|
1841
1850
|
model.access_token_name or "access_token", parameters=model.parameters or {}
|
@@ -1869,6 +1878,7 @@ class ModelToComponentFactory:
|
|
1869
1878
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
1870
1879
|
return DeclarativeOauth2Authenticator( # type: ignore
|
1871
1880
|
access_token_name=model.access_token_name or "access_token",
|
1881
|
+
access_token_value=model.access_token_value,
|
1872
1882
|
client_id=model.client_id,
|
1873
1883
|
client_secret=model.client_secret,
|
1874
1884
|
expires_in_name=model.expires_in_name or "expires_in",
|
@@ -2084,7 +2094,7 @@ class ModelToComponentFactory:
|
|
2084
2094
|
if (
|
2085
2095
|
not isinstance(stream_slicer, DatetimeBasedCursor)
|
2086
2096
|
or type(stream_slicer) is not DatetimeBasedCursor
|
2087
|
-
)
|
2097
|
+
):
|
2088
2098
|
# Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
|
2089
2099
|
# Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
|
2090
2100
|
# their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
|
@@ -2298,22 +2308,28 @@ class ModelToComponentFactory:
|
|
2298
2308
|
urls_extractor=urls_extractor,
|
2299
2309
|
)
|
2300
2310
|
|
2301
|
-
|
2311
|
+
async_job_partition_router = AsyncJobPartitionRouter(
|
2302
2312
|
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
2303
2313
|
job_repository,
|
2304
2314
|
stream_slices,
|
2305
|
-
JobTracker(
|
2306
|
-
|
2307
|
-
), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2315
|
+
JobTracker(1),
|
2316
|
+
# FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2308
2317
|
self._message_repository,
|
2309
|
-
has_bulk_parent=False,
|
2318
|
+
has_bulk_parent=False,
|
2319
|
+
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
2310
2320
|
),
|
2311
|
-
record_selector=record_selector,
|
2312
2321
|
stream_slicer=stream_slicer,
|
2313
2322
|
config=config,
|
2314
2323
|
parameters=model.parameters or {},
|
2315
2324
|
)
|
2316
2325
|
|
2326
|
+
return AsyncRetriever(
|
2327
|
+
record_selector=record_selector,
|
2328
|
+
stream_slicer=async_job_partition_router,
|
2329
|
+
config=config,
|
2330
|
+
parameters=model.parameters or {},
|
2331
|
+
)
|
2332
|
+
|
2317
2333
|
@staticmethod
|
2318
2334
|
def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
|
2319
2335
|
return Spec(
|
@@ -2423,7 +2439,7 @@ class ModelToComponentFactory:
|
|
2423
2439
|
config=config,
|
2424
2440
|
name="",
|
2425
2441
|
primary_key=None,
|
2426
|
-
stream_slicer=combined_slicers,
|
2442
|
+
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
2427
2443
|
transformations=[],
|
2428
2444
|
)
|
2429
2445
|
|
@@ -2,10 +2,28 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.sources.declarative.partition_routers.
|
6
|
-
|
7
|
-
|
8
|
-
from airbyte_cdk.sources.declarative.partition_routers.
|
5
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
6
|
+
AsyncJobPartitionRouter,
|
7
|
+
)
|
8
|
+
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import (
|
9
|
+
CartesianProductStreamSlicer,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import (
|
12
|
+
ListPartitionRouter,
|
13
|
+
)
|
9
14
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
16
|
+
SinglePartitionRouter,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
19
|
+
SubstreamPartitionRouter,
|
20
|
+
)
|
10
21
|
|
11
|
-
__all__ = [
|
22
|
+
__all__ = [
|
23
|
+
"AsyncJobPartitionRouter",
|
24
|
+
"CartesianProductStreamSlicer",
|
25
|
+
"ListPartitionRouter",
|
26
|
+
"SinglePartitionRouter",
|
27
|
+
"SubstreamPartitionRouter",
|
28
|
+
"PartitionRouter",
|
29
|
+
]
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass, field
|
4
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
|
+
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
|
+
SinglePartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
16
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class AsyncJobPartitionRouter(StreamSlicer):
|
21
|
+
"""
|
22
|
+
Partition router that creates async jobs in a source API, periodically polls for job
|
23
|
+
completion, and supplies the completed job URL locations as stream slices so that
|
24
|
+
records can be extracted.
|
25
|
+
"""
|
26
|
+
|
27
|
+
config: Config
|
28
|
+
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
30
|
+
stream_slicer: StreamSlicer = field(
|
31
|
+
default_factory=lambda: SinglePartitionRouter(parameters={})
|
32
|
+
)
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._job_orchestrator_factory = self.job_orchestrator_factory
|
36
|
+
self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
37
|
+
self._parameters = parameters
|
38
|
+
|
39
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
40
|
+
slices = self.stream_slicer.stream_slices()
|
41
|
+
self._job_orchestrator = self._job_orchestrator_factory(slices)
|
42
|
+
|
43
|
+
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
|
+
yield StreamSlice(
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
47
|
+
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
48
|
+
)
|
49
|
+
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
|
+
"""
|
52
|
+
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
|
+
be responsible for. However, this was added in because the JobOrchestrator is required to
|
54
|
+
retrieve records. And without defining fetch_records() on this class, we're stuck with either
|
55
|
+
passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
|
56
|
+
"""
|
57
|
+
|
58
|
+
if not self._job_orchestrator:
|
59
|
+
raise AirbyteTracedException(
|
60
|
+
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
61
|
+
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
62
|
+
failure_type=FailureType.system_error,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|