airbyte-cdk 6.10.0__py3-none-any.whl → 6.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -1
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +170 -37
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +22 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +104 -55
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +8 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +121 -1
- airbyte_cdk/sources/declarative/resolvers/__init__.py +8 -3
- airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py +136 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +219 -0
- {airbyte_cdk-6.10.0.dist-info → airbyte_cdk-6.11.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.10.0.dist-info → airbyte_cdk-6.11.1.dist-info}/RECORD +15 -13
- {airbyte_cdk-6.10.0.dist-info → airbyte_cdk-6.11.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.10.0.dist-info → airbyte_cdk-6.11.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.10.0.dist-info → airbyte_cdk-6.11.1.dist-info}/entry_points.txt +0 -0
@@ -33,10 +33,13 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
33
33
|
"DeclarativeStream.schema_loader": "JsonFileSchemaLoader",
|
34
34
|
# DynamicDeclarativeStream
|
35
35
|
"DynamicDeclarativeStream.stream_template": "DeclarativeStream",
|
36
|
-
"DynamicDeclarativeStream.components_resolver": "
|
36
|
+
"DynamicDeclarativeStream.components_resolver": "ConfigComponentResolver",
|
37
37
|
# HttpComponentsResolver
|
38
38
|
"HttpComponentsResolver.retriever": "SimpleRetriever",
|
39
39
|
"HttpComponentsResolver.components_mapping": "ComponentMappingDefinition",
|
40
|
+
# ConfigComponentResolver
|
41
|
+
"ConfigComponentsResolver.stream_config": "StreamConfig",
|
42
|
+
"ConfigComponentsResolver.components_mapping": "ComponentMappingDefinition",
|
40
43
|
# DefaultErrorHandler
|
41
44
|
"DefaultErrorHandler.response_filters": "HttpResponseFilter",
|
42
45
|
# DefaultPaginator
|
@@ -64,6 +67,10 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
64
67
|
"AddFields.fields": "AddedFieldDefinition",
|
65
68
|
# CustomPartitionRouter
|
66
69
|
"CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
|
70
|
+
# DynamicSchemaLoader
|
71
|
+
"DynamicSchemaLoader.retriever": "SimpleRetriever",
|
72
|
+
# SchemaTypeIdentifier
|
73
|
+
"SchemaTypeIdentifier.types_map": "TypesMap",
|
67
74
|
}
|
68
75
|
|
69
76
|
# We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
|
@@ -128,6 +128,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
128
128
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
129
129
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
130
130
|
)
|
131
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
132
|
+
ConfigComponentsResolver as ConfigComponentsResolverModel,
|
133
|
+
)
|
131
134
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
132
135
|
ConstantBackoffStrategy as ConstantBackoffStrategyModel,
|
133
136
|
)
|
@@ -188,6 +191,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
188
191
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
189
192
|
DpathExtractor as DpathExtractorModel,
|
190
193
|
)
|
194
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
195
|
+
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
196
|
+
)
|
191
197
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
192
198
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
193
199
|
)
|
@@ -278,6 +284,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
278
284
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
279
285
|
ResponseToFileExtractor as ResponseToFileExtractorModel,
|
280
286
|
)
|
287
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
288
|
+
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
|
289
|
+
)
|
281
290
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
282
291
|
SelectiveAuthenticator as SelectiveAuthenticatorModel,
|
283
292
|
)
|
@@ -288,9 +297,15 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
288
297
|
SimpleRetriever as SimpleRetrieverModel,
|
289
298
|
)
|
290
299
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
300
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
301
|
+
StreamConfig as StreamConfigModel,
|
302
|
+
)
|
291
303
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
292
304
|
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
|
293
305
|
)
|
306
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
307
|
+
TypesMap as TypesMapModel,
|
308
|
+
)
|
294
309
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
295
310
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
296
311
|
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
@@ -347,7 +362,9 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
|
|
347
362
|
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
|
348
363
|
from airbyte_cdk.sources.declarative.resolvers import (
|
349
364
|
ComponentMappingDefinition,
|
365
|
+
ConfigComponentsResolver,
|
350
366
|
HttpComponentsResolver,
|
367
|
+
StreamConfig,
|
351
368
|
)
|
352
369
|
from airbyte_cdk.sources.declarative.retrievers import (
|
353
370
|
AsyncRetriever,
|
@@ -356,8 +373,11 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
356
373
|
)
|
357
374
|
from airbyte_cdk.sources.declarative.schema import (
|
358
375
|
DefaultSchemaLoader,
|
376
|
+
DynamicSchemaLoader,
|
359
377
|
InlineSchemaLoader,
|
360
378
|
JsonFileSchemaLoader,
|
379
|
+
SchemaTypeIdentifier,
|
380
|
+
TypesMap,
|
361
381
|
)
|
362
382
|
from airbyte_cdk.sources.declarative.spec import Spec
|
363
383
|
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
@@ -455,6 +475,9 @@ class ModelToComponentFactory:
|
|
455
475
|
IterableDecoderModel: self.create_iterable_decoder,
|
456
476
|
XmlDecoderModel: self.create_xml_decoder,
|
457
477
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
478
|
+
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
479
|
+
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
480
|
+
TypesMapModel: self.create_types_map,
|
458
481
|
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
459
482
|
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
460
483
|
ListPartitionRouterModel: self.create_list_partition_router,
|
@@ -479,6 +502,8 @@ class ModelToComponentFactory:
|
|
479
502
|
WaitUntilTimeFromHeaderModel: self.create_wait_until_time_from_header,
|
480
503
|
AsyncRetrieverModel: self.create_async_retriever,
|
481
504
|
HttpComponentsResolverModel: self.create_http_components_resolver,
|
505
|
+
ConfigComponentsResolverModel: self.create_config_components_resolver,
|
506
|
+
StreamConfigModel: self.create_stream_config,
|
482
507
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
483
508
|
}
|
484
509
|
|
@@ -1574,6 +1599,63 @@ class ModelToComponentFactory:
|
|
1574
1599
|
) -> InlineSchemaLoader:
|
1575
1600
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1576
1601
|
|
1602
|
+
@staticmethod
|
1603
|
+
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1604
|
+
return TypesMap(target_type=model.target_type, current_type=model.current_type)
|
1605
|
+
|
1606
|
+
def create_schema_type_identifier(
|
1607
|
+
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
1608
|
+
) -> SchemaTypeIdentifier:
|
1609
|
+
types_mapping = []
|
1610
|
+
if model.types_mapping:
|
1611
|
+
types_mapping.extend(
|
1612
|
+
[
|
1613
|
+
self._create_component_from_model(types_map, config=config)
|
1614
|
+
for types_map in model.types_mapping
|
1615
|
+
]
|
1616
|
+
)
|
1617
|
+
model_schema_pointer: List[Union[InterpolatedString, str]] = (
|
1618
|
+
[x for x in model.schema_pointer] if model.schema_pointer else []
|
1619
|
+
)
|
1620
|
+
model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
|
1621
|
+
model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
|
1622
|
+
[x for x in model.type_pointer] if model.type_pointer else None
|
1623
|
+
)
|
1624
|
+
|
1625
|
+
return SchemaTypeIdentifier(
|
1626
|
+
schema_pointer=model_schema_pointer,
|
1627
|
+
key_pointer=model_key_pointer,
|
1628
|
+
type_pointer=model_type_pointer,
|
1629
|
+
types_mapping=types_mapping,
|
1630
|
+
parameters=model.parameters or {},
|
1631
|
+
)
|
1632
|
+
|
1633
|
+
def create_dynamic_schema_loader(
|
1634
|
+
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
1635
|
+
) -> DynamicSchemaLoader:
|
1636
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1637
|
+
combined_slicers = self._build_resumable_cursor_from_paginator(
|
1638
|
+
model.retriever, stream_slicer
|
1639
|
+
)
|
1640
|
+
|
1641
|
+
retriever = self._create_component_from_model(
|
1642
|
+
model=model.retriever,
|
1643
|
+
config=config,
|
1644
|
+
name="",
|
1645
|
+
primary_key=None,
|
1646
|
+
stream_slicer=combined_slicers,
|
1647
|
+
transformations=[],
|
1648
|
+
)
|
1649
|
+
schema_type_identifier = self._create_component_from_model(
|
1650
|
+
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
1651
|
+
)
|
1652
|
+
return DynamicSchemaLoader(
|
1653
|
+
retriever=retriever,
|
1654
|
+
config=config,
|
1655
|
+
schema_type_identifier=schema_type_identifier,
|
1656
|
+
parameters=model.parameters or {},
|
1657
|
+
)
|
1658
|
+
|
1577
1659
|
@staticmethod
|
1578
1660
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
1579
1661
|
return JsonDecoder(parameters={})
|
@@ -1812,8 +1894,8 @@ class ModelToComponentFactory:
|
|
1812
1894
|
self,
|
1813
1895
|
model: RecordSelectorModel,
|
1814
1896
|
config: Config,
|
1815
|
-
name: str,
|
1816
1897
|
*,
|
1898
|
+
name: str,
|
1817
1899
|
transformations: List[RecordTransformation],
|
1818
1900
|
decoder: Optional[Decoder] = None,
|
1819
1901
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
@@ -2292,3 +2374,41 @@ class ModelToComponentFactory:
|
|
2292
2374
|
components_mapping=components_mapping,
|
2293
2375
|
parameters=model.parameters or {},
|
2294
2376
|
)
|
2377
|
+
|
2378
|
+
@staticmethod
|
2379
|
+
def create_stream_config(
|
2380
|
+
model: StreamConfigModel, config: Config, **kwargs: Any
|
2381
|
+
) -> StreamConfig:
|
2382
|
+
model_configs_pointer: List[Union[InterpolatedString, str]] = (
|
2383
|
+
[x for x in model.configs_pointer] if model.configs_pointer else []
|
2384
|
+
)
|
2385
|
+
|
2386
|
+
return StreamConfig(
|
2387
|
+
configs_pointer=model_configs_pointer,
|
2388
|
+
parameters=model.parameters or {},
|
2389
|
+
)
|
2390
|
+
|
2391
|
+
def create_config_components_resolver(
|
2392
|
+
self, model: ConfigComponentsResolverModel, config: Config
|
2393
|
+
) -> Any:
|
2394
|
+
stream_config = self._create_component_from_model(
|
2395
|
+
model.stream_config, config=config, parameters=model.parameters or {}
|
2396
|
+
)
|
2397
|
+
|
2398
|
+
components_mapping = [
|
2399
|
+
self._create_component_from_model(
|
2400
|
+
model=components_mapping_definition_model,
|
2401
|
+
value_type=ModelToComponentFactory._json_schema_type_name_to_type(
|
2402
|
+
components_mapping_definition_model.value_type
|
2403
|
+
),
|
2404
|
+
config=config,
|
2405
|
+
)
|
2406
|
+
for components_mapping_definition_model in model.components_mapping
|
2407
|
+
]
|
2408
|
+
|
2409
|
+
return ConfigComponentsResolver(
|
2410
|
+
stream_config=stream_config,
|
2411
|
+
config=config,
|
2412
|
+
components_mapping=components_mapping,
|
2413
|
+
parameters=model.parameters or {},
|
2414
|
+
)
|
@@ -4,10 +4,15 @@
|
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.resolvers.components_resolver import ComponentsResolver, ComponentMappingDefinition, ResolvedComponentMappingDefinition
|
6
6
|
from airbyte_cdk.sources.declarative.resolvers.http_components_resolver import HttpComponentsResolver
|
7
|
+
from airbyte_cdk.sources.declarative.resolvers.config_components_resolver import ConfigComponentsResolver, StreamConfig
|
7
8
|
from airbyte_cdk.sources.declarative.models import HttpComponentsResolver as HttpComponentsResolverModel
|
9
|
+
from airbyte_cdk.sources.declarative.models import ConfigComponentsResolver as ConfigComponentsResolverModel
|
10
|
+
from pydantic.v1 import BaseModel
|
11
|
+
from typing import Mapping
|
8
12
|
|
9
|
-
COMPONENTS_RESOLVER_TYPE_MAPPING = {
|
10
|
-
"HttpComponentsResolver": HttpComponentsResolverModel
|
13
|
+
COMPONENTS_RESOLVER_TYPE_MAPPING: Mapping[str, type[BaseModel]] = {
|
14
|
+
"HttpComponentsResolver": HttpComponentsResolverModel,
|
15
|
+
"ConfigComponentsResolver": ConfigComponentsResolverModel
|
11
16
|
}
|
12
17
|
|
13
|
-
__all__ = ["ComponentsResolver", "HttpComponentsResolver", "ComponentMappingDefinition", "ResolvedComponentMappingDefinition", "COMPONENTS_RESOLVER_TYPE_MAPPING"]
|
18
|
+
__all__ = ["ComponentsResolver", "HttpComponentsResolver", "ComponentMappingDefinition", "ResolvedComponentMappingDefinition", "StreamConfig", "ConfigComponentsResolver", "COMPONENTS_RESOLVER_TYPE_MAPPING"]
|
@@ -0,0 +1,136 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from copy import deepcopy
|
6
|
+
from dataclasses import InitVar, dataclass, field
|
7
|
+
from typing import Any, Dict, Iterable, List, Mapping, Union
|
8
|
+
|
9
|
+
import dpath
|
10
|
+
from typing_extensions import deprecated
|
11
|
+
|
12
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
13
|
+
from airbyte_cdk.sources.declarative.resolvers.components_resolver import (
|
14
|
+
ComponentMappingDefinition,
|
15
|
+
ComponentsResolver,
|
16
|
+
ResolvedComponentMappingDefinition,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
|
+
from airbyte_cdk.sources.types import Config
|
20
|
+
|
21
|
+
|
22
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
23
|
+
@dataclass
|
24
|
+
class StreamConfig:
|
25
|
+
"""
|
26
|
+
Identifies stream config details for dynamic schema extraction and processing.
|
27
|
+
"""
|
28
|
+
|
29
|
+
configs_pointer: List[Union[InterpolatedString, str]]
|
30
|
+
parameters: InitVar[Mapping[str, Any]]
|
31
|
+
|
32
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
33
|
+
self.configs_pointer = [
|
34
|
+
InterpolatedString.create(path, parameters=parameters) for path in self.configs_pointer
|
35
|
+
]
|
36
|
+
|
37
|
+
|
38
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
39
|
+
@dataclass
|
40
|
+
class ConfigComponentsResolver(ComponentsResolver):
|
41
|
+
"""
|
42
|
+
Resolves and populates stream templates with components fetched via source config.
|
43
|
+
|
44
|
+
Attributes:
|
45
|
+
stream_config (StreamConfig): The description of stream configuration used to fetch stream config from source config.
|
46
|
+
config (Config): Configuration object for the resolver.
|
47
|
+
components_mapping (List[ComponentMappingDefinition]): List of mappings to resolve.
|
48
|
+
parameters (InitVar[Mapping[str, Any]]): Additional parameters for interpolation.
|
49
|
+
"""
|
50
|
+
|
51
|
+
stream_config: StreamConfig
|
52
|
+
config: Config
|
53
|
+
components_mapping: List[ComponentMappingDefinition]
|
54
|
+
parameters: InitVar[Mapping[str, Any]]
|
55
|
+
_resolved_components: List[ResolvedComponentMappingDefinition] = field(
|
56
|
+
init=False, repr=False, default_factory=list
|
57
|
+
)
|
58
|
+
|
59
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
60
|
+
"""
|
61
|
+
Initializes and parses component mappings, converting them to resolved definitions.
|
62
|
+
|
63
|
+
Args:
|
64
|
+
parameters (Mapping[str, Any]): Parameters for interpolation.
|
65
|
+
"""
|
66
|
+
|
67
|
+
for component_mapping in self.components_mapping:
|
68
|
+
if isinstance(component_mapping.value, (str, InterpolatedString)):
|
69
|
+
interpolated_value = (
|
70
|
+
InterpolatedString.create(component_mapping.value, parameters=parameters)
|
71
|
+
if isinstance(component_mapping.value, str)
|
72
|
+
else component_mapping.value
|
73
|
+
)
|
74
|
+
|
75
|
+
field_path = [
|
76
|
+
InterpolatedString.create(path, parameters=parameters)
|
77
|
+
for path in component_mapping.field_path
|
78
|
+
]
|
79
|
+
|
80
|
+
self._resolved_components.append(
|
81
|
+
ResolvedComponentMappingDefinition(
|
82
|
+
field_path=field_path,
|
83
|
+
value=interpolated_value,
|
84
|
+
value_type=component_mapping.value_type,
|
85
|
+
parameters=parameters,
|
86
|
+
)
|
87
|
+
)
|
88
|
+
else:
|
89
|
+
raise ValueError(
|
90
|
+
f"Expected a string or InterpolatedString for value in mapping: {component_mapping}"
|
91
|
+
)
|
92
|
+
|
93
|
+
@property
|
94
|
+
def _stream_config(self) -> Iterable[Mapping[str, Any]]:
|
95
|
+
path = [
|
96
|
+
node.eval(self.config) if not isinstance(node, str) else node
|
97
|
+
for node in self.stream_config.configs_pointer
|
98
|
+
]
|
99
|
+
stream_config = dpath.get(dict(self.config), path, default=[])
|
100
|
+
|
101
|
+
if not isinstance(stream_config, list):
|
102
|
+
stream_config = [stream_config]
|
103
|
+
|
104
|
+
return stream_config
|
105
|
+
|
106
|
+
def resolve_components(
|
107
|
+
self, stream_template_config: Dict[str, Any]
|
108
|
+
) -> Iterable[Dict[str, Any]]:
|
109
|
+
"""
|
110
|
+
Resolves components in the stream template configuration by populating values.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
stream_template_config (Dict[str, Any]): Stream template to populate.
|
114
|
+
|
115
|
+
Yields:
|
116
|
+
Dict[str, Any]: Updated configurations with resolved components.
|
117
|
+
"""
|
118
|
+
kwargs = {"stream_template_config": stream_template_config}
|
119
|
+
|
120
|
+
for components_values in self._stream_config:
|
121
|
+
updated_config = deepcopy(stream_template_config)
|
122
|
+
kwargs["components_values"] = components_values # type: ignore[assignment] # component_values will always be of type Mapping[str, Any]
|
123
|
+
|
124
|
+
for resolved_component in self._resolved_components:
|
125
|
+
valid_types = (
|
126
|
+
(resolved_component.value_type,) if resolved_component.value_type else None
|
127
|
+
)
|
128
|
+
value = resolved_component.value.eval(
|
129
|
+
self.config, valid_types=valid_types, **kwargs
|
130
|
+
)
|
131
|
+
|
132
|
+
path = [path.eval(self.config, **kwargs) for path in resolved_component.field_path]
|
133
|
+
|
134
|
+
dpath.set(updated_config, path, value)
|
135
|
+
|
136
|
+
yield updated_config
|
@@ -6,5 +6,6 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
|
|
6
6
|
from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
|
7
7
|
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
|
8
8
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
9
|
+
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
|
9
10
|
|
10
|
-
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
|
11
|
+
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]
|
@@ -0,0 +1,219 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
|
6
|
+
from copy import deepcopy
|
7
|
+
from dataclasses import InitVar, dataclass
|
8
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
|
+
|
10
|
+
import dpath
|
11
|
+
from typing_extensions import deprecated
|
12
|
+
|
13
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
|
+
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
+
from airbyte_cdk.sources.types import Config
|
18
|
+
|
19
|
+
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
|
+
"string": {"type": ["null", "string"]},
|
21
|
+
"boolean": {"type": ["null", "boolean"]},
|
22
|
+
"date": {"type": ["null", "string"], "format": "date"},
|
23
|
+
"timestamp_without_timezone": {
|
24
|
+
"type": ["null", "string"],
|
25
|
+
"format": "date-time",
|
26
|
+
"airbyte_type": "timestamp_without_timezone",
|
27
|
+
},
|
28
|
+
"timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
|
29
|
+
"time_without_timezone": {
|
30
|
+
"type": ["null", "string"],
|
31
|
+
"format": "time",
|
32
|
+
"airbyte_type": "time_without_timezone",
|
33
|
+
},
|
34
|
+
"time_with_timezone": {
|
35
|
+
"type": ["null", "string"],
|
36
|
+
"format": "time",
|
37
|
+
"airbyte_type": "time_with_timezone",
|
38
|
+
},
|
39
|
+
"integer": {"type": ["null", "integer"]},
|
40
|
+
"number": {"type": ["null", "number"]},
|
41
|
+
"array": {"type": ["null", "array"]},
|
42
|
+
"object": {"type": ["null", "object"]},
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
47
|
+
@dataclass(frozen=True)
|
48
|
+
class TypesMap:
|
49
|
+
"""
|
50
|
+
Represents a mapping between a current type and its corresponding target type.
|
51
|
+
"""
|
52
|
+
|
53
|
+
target_type: Union[List[str], str]
|
54
|
+
current_type: Union[List[str], str]
|
55
|
+
|
56
|
+
|
57
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
58
|
+
@dataclass
|
59
|
+
class SchemaTypeIdentifier:
|
60
|
+
"""
|
61
|
+
Identifies schema details for dynamic schema extraction and processing.
|
62
|
+
"""
|
63
|
+
|
64
|
+
key_pointer: List[Union[InterpolatedString, str]]
|
65
|
+
parameters: InitVar[Mapping[str, Any]]
|
66
|
+
type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
67
|
+
types_mapping: Optional[List[TypesMap]] = None
|
68
|
+
schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
69
|
+
|
70
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
71
|
+
self.schema_pointer = (
|
72
|
+
self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
|
73
|
+
) # type: ignore[assignment] # This is reqired field in model
|
74
|
+
self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
|
75
|
+
self.type_pointer = (
|
76
|
+
self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
|
77
|
+
)
|
78
|
+
|
79
|
+
@staticmethod
|
80
|
+
def _update_pointer(
|
81
|
+
pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
|
82
|
+
) -> Optional[List[Union[InterpolatedString, str]]]:
|
83
|
+
return (
|
84
|
+
[
|
85
|
+
InterpolatedString.create(path, parameters=parameters)
|
86
|
+
if isinstance(path, str)
|
87
|
+
else path
|
88
|
+
for path in pointer
|
89
|
+
]
|
90
|
+
if pointer
|
91
|
+
else None
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
96
|
+
@dataclass
|
97
|
+
class DynamicSchemaLoader(SchemaLoader):
|
98
|
+
"""
|
99
|
+
Dynamically loads a JSON Schema by extracting data from retrieved records.
|
100
|
+
"""
|
101
|
+
|
102
|
+
retriever: Retriever
|
103
|
+
config: Config
|
104
|
+
parameters: InitVar[Mapping[str, Any]]
|
105
|
+
schema_type_identifier: SchemaTypeIdentifier
|
106
|
+
|
107
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
108
|
+
"""
|
109
|
+
Constructs a JSON Schema based on retrieved data.
|
110
|
+
"""
|
111
|
+
properties = {}
|
112
|
+
retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
|
113
|
+
|
114
|
+
raw_schema = (
|
115
|
+
self._extract_data(
|
116
|
+
retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
|
117
|
+
self.schema_type_identifier.schema_pointer,
|
118
|
+
)
|
119
|
+
if retrieved_record
|
120
|
+
else []
|
121
|
+
)
|
122
|
+
|
123
|
+
for property_definition in raw_schema:
|
124
|
+
key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
|
125
|
+
value = self._get_type(
|
126
|
+
property_definition,
|
127
|
+
self.schema_type_identifier.type_pointer,
|
128
|
+
)
|
129
|
+
properties[key] = value
|
130
|
+
|
131
|
+
return {
|
132
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
|
+
"type": "object",
|
134
|
+
"properties": properties,
|
135
|
+
}
|
136
|
+
|
137
|
+
def _get_key(
|
138
|
+
self,
|
139
|
+
raw_schema: MutableMapping[str, Any],
|
140
|
+
field_key_path: List[Union[InterpolatedString, str]],
|
141
|
+
) -> str:
|
142
|
+
"""
|
143
|
+
Extracts the key field from the schema using the specified path.
|
144
|
+
"""
|
145
|
+
field_key = self._extract_data(raw_schema, field_key_path)
|
146
|
+
if not isinstance(field_key, str):
|
147
|
+
raise ValueError(f"Expected key to be a string. Got {field_key}")
|
148
|
+
return field_key
|
149
|
+
|
150
|
+
def _get_type(
|
151
|
+
self,
|
152
|
+
raw_schema: MutableMapping[str, Any],
|
153
|
+
field_type_path: Optional[List[Union[InterpolatedString, str]]],
|
154
|
+
) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
|
155
|
+
"""
|
156
|
+
Determines the JSON Schema type for a field, supporting nullable and combined types.
|
157
|
+
"""
|
158
|
+
raw_field_type = (
|
159
|
+
self._extract_data(raw_schema, field_type_path, default="string")
|
160
|
+
if field_type_path
|
161
|
+
else "string"
|
162
|
+
)
|
163
|
+
mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
|
164
|
+
if (
|
165
|
+
isinstance(mapped_field_type, list)
|
166
|
+
and len(mapped_field_type) == 2
|
167
|
+
and all(isinstance(item, str) for item in mapped_field_type)
|
168
|
+
):
|
169
|
+
first_type = self._get_airbyte_type(mapped_field_type[0])
|
170
|
+
second_type = self._get_airbyte_type(mapped_field_type[1])
|
171
|
+
return {"oneOf": [first_type, second_type]}
|
172
|
+
elif isinstance(mapped_field_type, str):
|
173
|
+
return self._get_airbyte_type(mapped_field_type)
|
174
|
+
else:
|
175
|
+
raise ValueError(
|
176
|
+
f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
|
177
|
+
)
|
178
|
+
|
179
|
+
def _replace_type_if_not_valid(
|
180
|
+
self, field_type: Union[List[str], str]
|
181
|
+
) -> Union[List[str], str]:
|
182
|
+
"""
|
183
|
+
Replaces a field type if it matches a type mapping in `types_map`.
|
184
|
+
"""
|
185
|
+
if self.schema_type_identifier.types_mapping:
|
186
|
+
for types_map in self.schema_type_identifier.types_mapping:
|
187
|
+
if field_type == types_map.current_type:
|
188
|
+
return types_map.target_type
|
189
|
+
return field_type
|
190
|
+
|
191
|
+
@staticmethod
|
192
|
+
def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
|
193
|
+
"""
|
194
|
+
Maps a field type to its corresponding Airbyte type definition.
|
195
|
+
"""
|
196
|
+
if field_type not in AIRBYTE_DATA_TYPES:
|
197
|
+
raise ValueError(f"Invalid Airbyte data type: {field_type}")
|
198
|
+
|
199
|
+
return deepcopy(AIRBYTE_DATA_TYPES[field_type])
|
200
|
+
|
201
|
+
def _extract_data(
|
202
|
+
self,
|
203
|
+
body: Mapping[str, Any],
|
204
|
+
extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
|
205
|
+
default: Any = None,
|
206
|
+
) -> Any:
|
207
|
+
"""
|
208
|
+
Extracts data from the body based on the provided extraction path.
|
209
|
+
"""
|
210
|
+
|
211
|
+
if not extraction_path:
|
212
|
+
return body
|
213
|
+
|
214
|
+
path = [
|
215
|
+
node.eval(self.config) if not isinstance(node, str) else node
|
216
|
+
for node in extraction_path
|
217
|
+
]
|
218
|
+
|
219
|
+
return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure
|