airbyte-cdk 6.12.1.dev0__py3-none-any.whl → 6.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +27 -12
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +83 -36
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +73 -46
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +51 -8
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +9 -1
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +65 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +20 -14
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +8 -31
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +20 -3
- airbyte_cdk/sources/declarative/transformations/flatten_fields.py +50 -0
- airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +68 -0
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http_client.py +4 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +12 -3
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.3.dist-info}/METADATA +3 -2
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.3.dist-info}/RECORD +19 -16
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.12.1.dev0.dist-info → airbyte_cdk-6.12.3.dist-info}/entry_points.txt +0 -0
@@ -197,6 +197,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
197
197
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
198
198
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
199
199
|
)
|
200
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
201
|
+
FlattenFields as FlattenFieldsModel,
|
202
|
+
)
|
200
203
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
201
204
|
GzipJsonDecoder as GzipJsonDecoderModel,
|
202
205
|
)
|
@@ -236,6 +239,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
236
239
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
237
240
|
KeysToLower as KeysToLowerModel,
|
238
241
|
)
|
242
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
243
|
+
KeysToSnakeCase as KeysToSnakeCaseModel,
|
244
|
+
)
|
239
245
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
240
246
|
LegacySessionTokenAuthenticator as LegacySessionTokenAuthenticatorModel,
|
241
247
|
)
|
@@ -323,6 +329,9 @@ from airbyte_cdk.sources.declarative.partition_routers import (
|
|
323
329
|
SinglePartitionRouter,
|
324
330
|
SubstreamPartitionRouter,
|
325
331
|
)
|
332
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
333
|
+
AsyncJobPartitionRouter,
|
334
|
+
)
|
326
335
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
327
336
|
ParentStreamConfig,
|
328
337
|
)
|
@@ -387,9 +396,15 @@ from airbyte_cdk.sources.declarative.transformations import (
|
|
387
396
|
RemoveFields,
|
388
397
|
)
|
389
398
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddedFieldDefinition
|
399
|
+
from airbyte_cdk.sources.declarative.transformations.flatten_fields import (
|
400
|
+
FlattenFields,
|
401
|
+
)
|
390
402
|
from airbyte_cdk.sources.declarative.transformations.keys_to_lower_transformation import (
|
391
403
|
KeysToLowerTransformation,
|
392
404
|
)
|
405
|
+
from airbyte_cdk.sources.declarative.transformations.keys_to_snake_transformation import (
|
406
|
+
KeysToSnakeCaseTransformation,
|
407
|
+
)
|
393
408
|
from airbyte_cdk.sources.message import (
|
394
409
|
InMemoryMessageRepository,
|
395
410
|
LogAppenderMessageRepositoryDecorator,
|
@@ -472,6 +487,8 @@ class ModelToComponentFactory:
|
|
472
487
|
JsonlDecoderModel: self.create_jsonl_decoder,
|
473
488
|
GzipJsonDecoderModel: self.create_gzipjson_decoder,
|
474
489
|
KeysToLowerModel: self.create_keys_to_lower_transformation,
|
490
|
+
KeysToSnakeCaseModel: self.create_keys_to_snake_transformation,
|
491
|
+
FlattenFieldsModel: self.create_flatten_fields,
|
475
492
|
IterableDecoderModel: self.create_iterable_decoder,
|
476
493
|
XmlDecoderModel: self.create_xml_decoder,
|
477
494
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
@@ -587,6 +604,16 @@ class ModelToComponentFactory:
|
|
587
604
|
) -> KeysToLowerTransformation:
|
588
605
|
return KeysToLowerTransformation()
|
589
606
|
|
607
|
+
def create_keys_to_snake_transformation(
|
608
|
+
self, model: KeysToSnakeCaseModel, config: Config, **kwargs: Any
|
609
|
+
) -> KeysToSnakeCaseTransformation:
|
610
|
+
return KeysToSnakeCaseTransformation()
|
611
|
+
|
612
|
+
def create_flatten_fields(
|
613
|
+
self, model: FlattenFieldsModel, config: Config, **kwargs: Any
|
614
|
+
) -> FlattenFields:
|
615
|
+
return FlattenFields()
|
616
|
+
|
590
617
|
@staticmethod
|
591
618
|
def _json_schema_type_name_to_type(value_type: Optional[ValueType]) -> Optional[Type[Any]]:
|
592
619
|
if not value_type:
|
@@ -1638,6 +1665,13 @@ class ModelToComponentFactory:
|
|
1638
1665
|
model.retriever, stream_slicer
|
1639
1666
|
)
|
1640
1667
|
|
1668
|
+
schema_transformations = []
|
1669
|
+
if model.schema_transformations:
|
1670
|
+
for transformation_model in model.schema_transformations:
|
1671
|
+
schema_transformations.append(
|
1672
|
+
self._create_component_from_model(model=transformation_model, config=config)
|
1673
|
+
)
|
1674
|
+
|
1641
1675
|
retriever = self._create_component_from_model(
|
1642
1676
|
model=model.retriever,
|
1643
1677
|
config=config,
|
@@ -1652,6 +1686,7 @@ class ModelToComponentFactory:
|
|
1652
1686
|
return DynamicSchemaLoader(
|
1653
1687
|
retriever=retriever,
|
1654
1688
|
config=config,
|
1689
|
+
schema_transformations=schema_transformations,
|
1655
1690
|
schema_type_identifier=schema_type_identifier,
|
1656
1691
|
parameters=model.parameters or {},
|
1657
1692
|
)
|
@@ -1765,7 +1800,8 @@ class ModelToComponentFactory:
|
|
1765
1800
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
1766
1801
|
config,
|
1767
1802
|
InterpolatedString.create(
|
1768
|
-
model.token_refresh_endpoint,
|
1803
|
+
model.token_refresh_endpoint, # type: ignore
|
1804
|
+
parameters=model.parameters or {},
|
1769
1805
|
).eval(config),
|
1770
1806
|
access_token_name=InterpolatedString.create(
|
1771
1807
|
model.access_token_name or "access_token", parameters=model.parameters or {}
|
@@ -1799,6 +1835,7 @@ class ModelToComponentFactory:
|
|
1799
1835
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
1800
1836
|
return DeclarativeOauth2Authenticator( # type: ignore
|
1801
1837
|
access_token_name=model.access_token_name or "access_token",
|
1838
|
+
access_token_value=model.access_token_value,
|
1802
1839
|
client_id=model.client_id,
|
1803
1840
|
client_secret=model.client_secret,
|
1804
1841
|
expires_in_name=model.expires_in_name or "expires_in",
|
@@ -2228,22 +2265,28 @@ class ModelToComponentFactory:
|
|
2228
2265
|
urls_extractor=urls_extractor,
|
2229
2266
|
)
|
2230
2267
|
|
2231
|
-
|
2268
|
+
async_job_partition_router = AsyncJobPartitionRouter(
|
2232
2269
|
job_orchestrator_factory=lambda stream_slices: AsyncJobOrchestrator(
|
2233
2270
|
job_repository,
|
2234
2271
|
stream_slices,
|
2235
|
-
JobTracker(
|
2236
|
-
|
2237
|
-
), # FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2272
|
+
JobTracker(1),
|
2273
|
+
# FIXME eventually make the number of concurrent jobs in the API configurable. Until then, we limit to 1
|
2238
2274
|
self._message_repository,
|
2239
|
-
has_bulk_parent=False,
|
2275
|
+
has_bulk_parent=False,
|
2276
|
+
# FIXME work would need to be done here in order to detect if a stream as a parent stream that is bulk
|
2240
2277
|
),
|
2241
|
-
record_selector=record_selector,
|
2242
2278
|
stream_slicer=stream_slicer,
|
2243
2279
|
config=config,
|
2244
2280
|
parameters=model.parameters or {},
|
2245
2281
|
)
|
2246
2282
|
|
2283
|
+
return AsyncRetriever(
|
2284
|
+
record_selector=record_selector,
|
2285
|
+
stream_slicer=async_job_partition_router,
|
2286
|
+
config=config,
|
2287
|
+
parameters=model.parameters or {},
|
2288
|
+
)
|
2289
|
+
|
2247
2290
|
@staticmethod
|
2248
2291
|
def create_spec(model: SpecModel, config: Config, **kwargs: Any) -> Spec:
|
2249
2292
|
return Spec(
|
@@ -2353,7 +2396,7 @@ class ModelToComponentFactory:
|
|
2353
2396
|
config=config,
|
2354
2397
|
name="",
|
2355
2398
|
primary_key=None,
|
2356
|
-
stream_slicer=combined_slicers,
|
2399
|
+
stream_slicer=stream_slicer if stream_slicer else combined_slicers,
|
2357
2400
|
transformations=[],
|
2358
2401
|
)
|
2359
2402
|
|
@@ -2,10 +2,18 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import AsyncJobPartitionRouter
|
5
6
|
from airbyte_cdk.sources.declarative.partition_routers.cartesian_product_stream_slicer import CartesianProductStreamSlicer
|
6
7
|
from airbyte_cdk.sources.declarative.partition_routers.list_partition_router import ListPartitionRouter
|
7
8
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
|
8
9
|
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import SubstreamPartitionRouter
|
9
10
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
10
11
|
|
11
|
-
__all__ = [
|
12
|
+
__all__ = [
|
13
|
+
"AsyncJobPartitionRouter",
|
14
|
+
"CartesianProductStreamSlicer",
|
15
|
+
"ListPartitionRouter",
|
16
|
+
"SinglePartitionRouter",
|
17
|
+
"SubstreamPartitionRouter",
|
18
|
+
"PartitionRouter"
|
19
|
+
]
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import InitVar, dataclass, field
|
4
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
5
|
+
|
6
|
+
from airbyte_cdk.models import FailureType
|
7
|
+
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
8
|
+
AsyncJobOrchestrator,
|
9
|
+
AsyncPartition,
|
10
|
+
)
|
11
|
+
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
12
|
+
SinglePartitionRouter,
|
13
|
+
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
|
+
from airbyte_cdk.sources.types import Config, StreamSlice
|
16
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
17
|
+
|
18
|
+
|
19
|
+
@dataclass
|
20
|
+
class AsyncJobPartitionRouter(StreamSlicer):
|
21
|
+
"""
|
22
|
+
Partition router that creates async jobs in a source API, periodically polls for job
|
23
|
+
completion, and supplies the completed job URL locations as stream slices so that
|
24
|
+
records can be extracted.
|
25
|
+
"""
|
26
|
+
|
27
|
+
config: Config
|
28
|
+
parameters: InitVar[Mapping[str, Any]]
|
29
|
+
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
30
|
+
stream_slicer: StreamSlicer = field(
|
31
|
+
default_factory=lambda: SinglePartitionRouter(parameters={})
|
32
|
+
)
|
33
|
+
|
34
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
35
|
+
self._job_orchestrator_factory = self.job_orchestrator_factory
|
36
|
+
self._job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
37
|
+
self._parameters = parameters
|
38
|
+
|
39
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
40
|
+
slices = self.stream_slicer.stream_slices()
|
41
|
+
self._job_orchestrator = self._job_orchestrator_factory(slices)
|
42
|
+
|
43
|
+
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
44
|
+
yield StreamSlice(
|
45
|
+
partition=dict(completed_partition.stream_slice.partition)
|
46
|
+
| {"partition": completed_partition},
|
47
|
+
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
48
|
+
)
|
49
|
+
|
50
|
+
def fetch_records(self, partition: AsyncPartition) -> Iterable[Mapping[str, Any]]:
|
51
|
+
"""
|
52
|
+
This method of fetching records extends beyond what a PartitionRouter/StreamSlicer should
|
53
|
+
be responsible for. However, this was added in because the JobOrchestrator is required to
|
54
|
+
retrieve records. And without defining fetch_records() on this class, we're stuck with either
|
55
|
+
passing the JobOrchestrator to the AsyncRetriever or storing it on multiple classes.
|
56
|
+
"""
|
57
|
+
|
58
|
+
if not self._job_orchestrator:
|
59
|
+
raise AirbyteTracedException(
|
60
|
+
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
61
|
+
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
62
|
+
failure_type=FailureType.system_error,
|
63
|
+
)
|
64
|
+
|
65
|
+
return self._job_orchestrator.fetch_records(partition=partition)
|
@@ -88,19 +88,25 @@ class HttpComponentsResolver(ComponentsResolver):
|
|
88
88
|
"""
|
89
89
|
kwargs = {"stream_template_config": stream_template_config}
|
90
90
|
|
91
|
-
for
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
91
|
+
for stream_slice in self.retriever.stream_slices():
|
92
|
+
for components_values in self.retriever.read_records(
|
93
|
+
records_schema={}, stream_slice=stream_slice
|
94
|
+
):
|
95
|
+
updated_config = deepcopy(stream_template_config)
|
96
|
+
kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
|
97
|
+
kwargs["stream_slice"] = stream_slice # type: ignore[assignment] # stream_slice will always be of type Mapping[str, Any]
|
98
|
+
|
99
|
+
for resolved_component in self._resolved_components:
|
100
|
+
valid_types = (
|
101
|
+
(resolved_component.value_type,) if resolved_component.value_type else None
|
102
|
+
)
|
103
|
+
value = resolved_component.value.eval(
|
104
|
+
self.config, valid_types=valid_types, **kwargs
|
105
|
+
)
|
102
106
|
|
103
|
-
|
104
|
-
|
107
|
+
path = [
|
108
|
+
path.eval(self.config, **kwargs) for path in resolved_component.field_path
|
109
|
+
]
|
110
|
+
dpath.set(updated_config, path, value)
|
105
111
|
|
106
|
-
|
112
|
+
yield updated_config
|
@@ -1,8 +1,8 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
3
|
|
4
|
-
from dataclasses import InitVar, dataclass
|
5
|
-
from typing import Any,
|
4
|
+
from dataclasses import InitVar, dataclass
|
5
|
+
from typing import Any, Iterable, Mapping, Optional
|
6
6
|
|
7
7
|
from typing_extensions import deprecated
|
8
8
|
|
@@ -12,9 +12,10 @@ from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
|
12
12
|
AsyncPartition,
|
13
13
|
)
|
14
14
|
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
|
15
|
-
from airbyte_cdk.sources.declarative.partition_routers import
|
15
|
+
from airbyte_cdk.sources.declarative.partition_routers.async_job_partition_router import (
|
16
|
+
AsyncJobPartitionRouter,
|
17
|
+
)
|
16
18
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
17
|
-
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
18
19
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
20
|
from airbyte_cdk.sources.streams.core import StreamData
|
20
21
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
@@ -29,15 +30,10 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
29
30
|
class AsyncRetriever(Retriever):
|
30
31
|
config: Config
|
31
32
|
parameters: InitVar[Mapping[str, Any]]
|
32
|
-
job_orchestrator_factory: Callable[[Iterable[StreamSlice]], AsyncJobOrchestrator]
|
33
33
|
record_selector: RecordSelector
|
34
|
-
stream_slicer:
|
35
|
-
default_factory=lambda: SinglePartitionRouter(parameters={})
|
36
|
-
)
|
34
|
+
stream_slicer: AsyncJobPartitionRouter
|
37
35
|
|
38
36
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
39
|
-
self._job_orchestrator_factory = self.job_orchestrator_factory
|
40
|
-
self.__job_orchestrator: Optional[AsyncJobOrchestrator] = None
|
41
37
|
self._parameters = parameters
|
42
38
|
|
43
39
|
@property
|
@@ -54,17 +50,6 @@ class AsyncRetriever(Retriever):
|
|
54
50
|
"""
|
55
51
|
pass
|
56
52
|
|
57
|
-
@property
|
58
|
-
def _job_orchestrator(self) -> AsyncJobOrchestrator:
|
59
|
-
if not self.__job_orchestrator:
|
60
|
-
raise AirbyteTracedException(
|
61
|
-
message="Invalid state within AsyncJobRetriever. Please contact Airbyte Support",
|
62
|
-
internal_message="AsyncPartitionRepository is expected to be accessed only after `stream_slices`",
|
63
|
-
failure_type=FailureType.system_error,
|
64
|
-
)
|
65
|
-
|
66
|
-
return self.__job_orchestrator
|
67
|
-
|
68
53
|
def _get_stream_state(self) -> StreamState:
|
69
54
|
"""
|
70
55
|
Gets the current state of the stream.
|
@@ -99,15 +84,7 @@ class AsyncRetriever(Retriever):
|
|
99
84
|
return stream_slice["partition"] # type: ignore # stream_slice["partition"] has been added as an AsyncPartition as part of stream_slices
|
100
85
|
|
101
86
|
def stream_slices(self) -> Iterable[Optional[StreamSlice]]:
|
102
|
-
|
103
|
-
self.__job_orchestrator = self._job_orchestrator_factory(slices)
|
104
|
-
|
105
|
-
for completed_partition in self._job_orchestrator.create_and_get_completed_partitions():
|
106
|
-
yield StreamSlice(
|
107
|
-
partition=dict(completed_partition.stream_slice.partition)
|
108
|
-
| {"partition": completed_partition},
|
109
|
-
cursor_slice=completed_partition.stream_slice.cursor_slice,
|
110
|
-
)
|
87
|
+
return self.stream_slicer.stream_slices()
|
111
88
|
|
112
89
|
def read_records(
|
113
90
|
self,
|
@@ -116,7 +93,7 @@ class AsyncRetriever(Retriever):
|
|
116
93
|
) -> Iterable[StreamData]:
|
117
94
|
stream_state: StreamState = self._get_stream_state()
|
118
95
|
partition: AsyncPartition = self._validate_and_get_stream_slice_partition(stream_slice)
|
119
|
-
records: Iterable[Mapping[str, Any]] = self.
|
96
|
+
records: Iterable[Mapping[str, Any]] = self.stream_slicer.fetch_records(partition)
|
120
97
|
|
121
98
|
yield from self.record_selector.filter_and_transform(
|
122
99
|
all_data=records,
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
|
6
6
|
from copy import deepcopy
|
7
|
-
from dataclasses import InitVar, dataclass
|
7
|
+
from dataclasses import InitVar, dataclass, field
|
8
8
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
import dpath
|
@@ -13,8 +13,9 @@ from typing_extensions import deprecated
|
|
13
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
14
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
15
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
16
17
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
-
from airbyte_cdk.sources.types import Config
|
18
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
18
19
|
|
19
20
|
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
21
|
"string": {"type": ["null", "string"]},
|
@@ -103,6 +104,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
103
104
|
config: Config
|
104
105
|
parameters: InitVar[Mapping[str, Any]]
|
105
106
|
schema_type_identifier: SchemaTypeIdentifier
|
107
|
+
schema_transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
106
108
|
|
107
109
|
def get_json_schema(self) -> Mapping[str, Any]:
|
108
110
|
"""
|
@@ -128,12 +130,27 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
128
130
|
)
|
129
131
|
properties[key] = value
|
130
132
|
|
133
|
+
transformed_properties = self._transform(properties, {})
|
134
|
+
|
131
135
|
return {
|
132
136
|
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
137
|
"type": "object",
|
134
|
-
"properties":
|
138
|
+
"properties": transformed_properties,
|
135
139
|
}
|
136
140
|
|
141
|
+
def _transform(
|
142
|
+
self,
|
143
|
+
properties: Mapping[str, Any],
|
144
|
+
stream_state: StreamState,
|
145
|
+
stream_slice: Optional[StreamSlice] = None,
|
146
|
+
) -> Mapping[str, Any]:
|
147
|
+
for transformation in self.schema_transformations:
|
148
|
+
transformation.transform(
|
149
|
+
properties, # type: ignore # properties has type Mapping[str, Any], but Dict[str, Any] expected
|
150
|
+
config=self.config,
|
151
|
+
)
|
152
|
+
return properties
|
153
|
+
|
137
154
|
def _get_key(
|
138
155
|
self,
|
139
156
|
raw_schema: MutableMapping[str, Any],
|
@@ -0,0 +1,50 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from dataclasses import dataclass
|
6
|
+
from typing import Any, Dict, Optional
|
7
|
+
|
8
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
9
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
10
|
+
|
11
|
+
|
12
|
+
@dataclass
|
13
|
+
class FlattenFields(RecordTransformation):
|
14
|
+
def transform(
|
15
|
+
self,
|
16
|
+
record: Dict[str, Any],
|
17
|
+
config: Optional[Config] = None,
|
18
|
+
stream_state: Optional[StreamState] = None,
|
19
|
+
stream_slice: Optional[StreamSlice] = None,
|
20
|
+
) -> None:
|
21
|
+
transformed_record = self.flatten_record(record)
|
22
|
+
record.clear()
|
23
|
+
record.update(transformed_record)
|
24
|
+
|
25
|
+
def flatten_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
26
|
+
stack = [(record, "_")]
|
27
|
+
transformed_record: Dict[str, Any] = {}
|
28
|
+
force_with_parent_name = False
|
29
|
+
|
30
|
+
while stack:
|
31
|
+
current_record, parent_key = stack.pop()
|
32
|
+
|
33
|
+
if isinstance(current_record, dict):
|
34
|
+
for current_key, value in current_record.items():
|
35
|
+
new_key = (
|
36
|
+
f"{parent_key}.{current_key}"
|
37
|
+
if (current_key in transformed_record or force_with_parent_name)
|
38
|
+
else current_key
|
39
|
+
)
|
40
|
+
stack.append((value, new_key))
|
41
|
+
|
42
|
+
elif isinstance(current_record, list):
|
43
|
+
for i, item in enumerate(current_record):
|
44
|
+
force_with_parent_name = True
|
45
|
+
stack.append((item, f"{parent_key}.{i}"))
|
46
|
+
|
47
|
+
else:
|
48
|
+
transformed_record[parent_key] = current_record
|
49
|
+
|
50
|
+
return transformed_record
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import re
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from typing import Any, Dict, List, Optional
|
8
|
+
|
9
|
+
import unidecode
|
10
|
+
|
11
|
+
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
13
|
+
|
14
|
+
|
15
|
+
@dataclass
|
16
|
+
class KeysToSnakeCaseTransformation(RecordTransformation):
|
17
|
+
token_pattern: re.Pattern[str] = re.compile(
|
18
|
+
r"[A-Z]+[a-z]*|[a-z]+|\d+|(?P<NoToken>[^a-zA-Z\d]+)"
|
19
|
+
)
|
20
|
+
|
21
|
+
def transform(
|
22
|
+
self,
|
23
|
+
record: Dict[str, Any],
|
24
|
+
config: Optional[Config] = None,
|
25
|
+
stream_state: Optional[StreamState] = None,
|
26
|
+
stream_slice: Optional[StreamSlice] = None,
|
27
|
+
) -> None:
|
28
|
+
transformed_record = self._transform_record(record)
|
29
|
+
record.clear()
|
30
|
+
record.update(transformed_record)
|
31
|
+
|
32
|
+
def _transform_record(self, record: Dict[str, Any]) -> Dict[str, Any]:
|
33
|
+
transformed_record = {}
|
34
|
+
for key, value in record.items():
|
35
|
+
transformed_key = self.process_key(key)
|
36
|
+
transformed_value = value
|
37
|
+
|
38
|
+
if isinstance(value, dict):
|
39
|
+
transformed_value = self._transform_record(value)
|
40
|
+
|
41
|
+
transformed_record[transformed_key] = transformed_value
|
42
|
+
return transformed_record
|
43
|
+
|
44
|
+
def process_key(self, key: str) -> str:
|
45
|
+
key = self.normalize_key(key)
|
46
|
+
tokens = self.tokenize_key(key)
|
47
|
+
tokens = self.filter_tokens(tokens)
|
48
|
+
return self.tokens_to_snake_case(tokens)
|
49
|
+
|
50
|
+
def normalize_key(self, key: str) -> str:
|
51
|
+
return unidecode.unidecode(key)
|
52
|
+
|
53
|
+
def tokenize_key(self, key: str) -> List[str]:
|
54
|
+
tokens = []
|
55
|
+
for match in self.token_pattern.finditer(key):
|
56
|
+
token = match.group(0) if match.group("NoToken") is None else ""
|
57
|
+
tokens.append(token)
|
58
|
+
return tokens
|
59
|
+
|
60
|
+
def filter_tokens(self, tokens: List[str]) -> List[str]:
|
61
|
+
if len(tokens) >= 3:
|
62
|
+
tokens = tokens[:1] + [t for t in tokens[1:-1] if t] + tokens[-1:]
|
63
|
+
if tokens and tokens[0].isdigit():
|
64
|
+
tokens.insert(0, "")
|
65
|
+
return tokens
|
66
|
+
|
67
|
+
def tokens_to_snake_case(self, tokens: List[str]) -> str:
|
68
|
+
return "_".join(token.lower() for token in tokens)
|
@@ -223,17 +223,17 @@ class Stream(ABC):
|
|
223
223
|
record_counter += 1
|
224
224
|
|
225
225
|
checkpoint_interval = self.state_checkpoint_interval
|
226
|
+
checkpoint = checkpoint_reader.get_checkpoint()
|
226
227
|
if (
|
227
228
|
should_checkpoint
|
228
229
|
and checkpoint_interval
|
229
230
|
and record_counter % checkpoint_interval == 0
|
231
|
+
and checkpoint is not None
|
230
232
|
):
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
)
|
236
|
-
yield airbyte_state_message
|
233
|
+
airbyte_state_message = self._checkpoint_state(
|
234
|
+
checkpoint, state_manager=state_manager
|
235
|
+
)
|
236
|
+
yield airbyte_state_message
|
237
237
|
|
238
238
|
if internal_config.is_limit_reached(record_counter):
|
239
239
|
break
|
@@ -262,7 +262,7 @@ class HttpClient:
|
|
262
262
|
user_backoff_handler = user_defined_backoff_handler(max_tries=max_tries, max_time=max_time)(
|
263
263
|
self._send
|
264
264
|
)
|
265
|
-
rate_limit_backoff_handler = rate_limit_default_backoff_handler()
|
265
|
+
rate_limit_backoff_handler = rate_limit_default_backoff_handler(max_tries=max_tries)
|
266
266
|
backoff_handler = http_client_default_backoff_handler(
|
267
267
|
max_tries=max_tries, max_time=max_time
|
268
268
|
)
|
@@ -472,7 +472,9 @@ class HttpClient:
|
|
472
472
|
|
473
473
|
elif retry_endlessly:
|
474
474
|
raise RateLimitBackoffException(
|
475
|
-
request=request,
|
475
|
+
request=request,
|
476
|
+
response=(response if response is not None else exc),
|
477
|
+
error_message=error_message,
|
476
478
|
)
|
477
479
|
|
478
480
|
raise DefaultBackoffException(
|
@@ -54,7 +54,16 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
54
54
|
|
55
55
|
def get_auth_header(self) -> Mapping[str, Any]:
|
56
56
|
"""HTTP header to set on the requests"""
|
57
|
-
|
57
|
+
token = (
|
58
|
+
self.access_token
|
59
|
+
if (
|
60
|
+
not self.get_token_refresh_endpoint()
|
61
|
+
or not self.get_refresh_token()
|
62
|
+
and self.access_token
|
63
|
+
)
|
64
|
+
else self.get_access_token()
|
65
|
+
)
|
66
|
+
return {"Authorization": f"Bearer {token}"}
|
58
67
|
|
59
68
|
def get_access_token(self) -> str:
|
60
69
|
"""Returns the access token"""
|
@@ -121,7 +130,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
121
130
|
try:
|
122
131
|
response = requests.request(
|
123
132
|
method="POST",
|
124
|
-
url=self.get_token_refresh_endpoint(),
|
133
|
+
url=self.get_token_refresh_endpoint(), # type: ignore # returns None, if not provided, but str | bytes is expected.
|
125
134
|
data=self.build_refresh_request_body(),
|
126
135
|
)
|
127
136
|
if response.ok:
|
@@ -198,7 +207,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
198
207
|
return None
|
199
208
|
|
200
209
|
@abstractmethod
|
201
|
-
def get_token_refresh_endpoint(self) -> str:
|
210
|
+
def get_token_refresh_endpoint(self) -> Optional[str]:
|
202
211
|
"""Returns the endpoint to refresh the access token"""
|
203
212
|
|
204
213
|
@abstractmethod
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.12.
|
3
|
+
Version: 6.12.3
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,6 +22,7 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
+
Requires-Dist: Unidecode (>=1.3,<2.0)
|
25
26
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
27
|
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
27
28
|
Requires-Dist: backoff
|
@@ -56,7 +57,7 @@ Requires-Dist: python-calamine (==0.2.3) ; extra == "file-based"
|
|
56
57
|
Requires-Dist: python-dateutil
|
57
58
|
Requires-Dist: python-snappy (==0.7.3) ; extra == "file-based"
|
58
59
|
Requires-Dist: python-ulid (>=3.0.0,<4.0.0)
|
59
|
-
Requires-Dist: pytz (==2024.
|
60
|
+
Requires-Dist: pytz (==2024.2)
|
60
61
|
Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
|
61
62
|
Requires-Dist: requests
|
62
63
|
Requires-Dist: requests_cache
|