airbyte-cdk 6.9.1rc2__py3-none-any.whl → 6.9.2.dev4100__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +87 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +43 -5
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +4 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +72 -0
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +219 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- {airbyte_cdk-6.9.1rc2.dist-info → airbyte_cdk-6.9.2.dev4100.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.9.1rc2.dist-info → airbyte_cdk-6.9.2.dev4100.dist-info}/RECORD +14 -13
- {airbyte_cdk-6.9.1rc2.dist-info → airbyte_cdk-6.9.2.dev4100.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.9.1rc2.dist-info → airbyte_cdk-6.9.2.dev4100.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.9.1rc2.dist-info → airbyte_cdk-6.9.2.dev4100.dist-info}/entry_points.txt +0 -0
@@ -37,17 +37,17 @@ class DatetimeParser:
|
|
37
37
|
return parsed_datetime.replace(tzinfo=datetime.timezone.utc)
|
38
38
|
return parsed_datetime
|
39
39
|
|
40
|
-
def format(self, dt: datetime.datetime, format: str) ->
|
40
|
+
def format(self, dt: datetime.datetime, format: str) -> str:
|
41
41
|
# strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on
|
42
42
|
# It's safer to use the timestamp() method than the %s directive
|
43
43
|
# See https://stackoverflow.com/a/4974930
|
44
44
|
if format == "%s":
|
45
|
-
return int(dt.timestamp())
|
45
|
+
return str(int(dt.timestamp()))
|
46
46
|
if format == "%s_as_float":
|
47
|
-
return float(dt.timestamp())
|
47
|
+
return str(float(dt.timestamp()))
|
48
48
|
if format == "%ms":
|
49
49
|
# timstamp() returns a float representing the number of seconds since the unix epoch
|
50
|
-
return int(dt.timestamp() * 1000)
|
50
|
+
return str(int(dt.timestamp() * 1000))
|
51
51
|
else:
|
52
52
|
return dt.strftime(format)
|
53
53
|
|
@@ -1221,6 +1221,7 @@ definitions:
|
|
1221
1221
|
- "$ref": "#/definitions/InlineSchemaLoader"
|
1222
1222
|
- "$ref": "#/definitions/JsonFileSchemaLoader"
|
1223
1223
|
- "$ref": "#/definitions/CustomSchemaLoader"
|
1224
|
+
- "$ref": "#/definitions/DynamicSchemaLoader"
|
1224
1225
|
# TODO we have move the transformation to the RecordSelector level in the code but kept this here for
|
1225
1226
|
# compatibility reason. We should eventually move this to align with the code.
|
1226
1227
|
transformations:
|
@@ -1684,6 +1685,92 @@ definitions:
|
|
1684
1685
|
$parameters:
|
1685
1686
|
type: object
|
1686
1687
|
additionalProperties: true
|
1688
|
+
TypesMap:
|
1689
|
+
title: Types Map
|
1690
|
+
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
|
1691
|
+
type: object
|
1692
|
+
required:
|
1693
|
+
- target_type
|
1694
|
+
- current_type
|
1695
|
+
properties:
|
1696
|
+
target_type:
|
1697
|
+
anyOf:
|
1698
|
+
- type: string
|
1699
|
+
- type: array
|
1700
|
+
items:
|
1701
|
+
type: string
|
1702
|
+
current_type:
|
1703
|
+
anyOf:
|
1704
|
+
- type: string
|
1705
|
+
- type: array
|
1706
|
+
items:
|
1707
|
+
type: string
|
1708
|
+
SchemaTypeIdentifier:
|
1709
|
+
title: Schema Type Identifier
|
1710
|
+
description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
|
1711
|
+
type: object
|
1712
|
+
required:
|
1713
|
+
- key_pointer
|
1714
|
+
properties:
|
1715
|
+
type:
|
1716
|
+
type: string
|
1717
|
+
enum: [SchemaTypeIdentifier]
|
1718
|
+
schema_pointer:
|
1719
|
+
title: Schema Path
|
1720
|
+
description: List of nested fields defining the schema field path to extract. Defaults to [].
|
1721
|
+
type: array
|
1722
|
+
default: []
|
1723
|
+
items:
|
1724
|
+
- type: string
|
1725
|
+
interpolation_context:
|
1726
|
+
- config
|
1727
|
+
key_pointer:
|
1728
|
+
title: Key Path
|
1729
|
+
description: List of potentially nested fields describing the full path of the field key to extract.
|
1730
|
+
type: array
|
1731
|
+
items:
|
1732
|
+
- type: string
|
1733
|
+
interpolation_context:
|
1734
|
+
- config
|
1735
|
+
type_pointer:
|
1736
|
+
title: Type Path
|
1737
|
+
description: List of potentially nested fields describing the full path of the field type to extract.
|
1738
|
+
type: array
|
1739
|
+
items:
|
1740
|
+
- type: string
|
1741
|
+
interpolation_context:
|
1742
|
+
- config
|
1743
|
+
types_mapping:
|
1744
|
+
type: array
|
1745
|
+
items:
|
1746
|
+
- "$ref": "#/definitions/TypesMap"
|
1747
|
+
$parameters:
|
1748
|
+
type: object
|
1749
|
+
additionalProperties: true
|
1750
|
+
DynamicSchemaLoader:
|
1751
|
+
title: Dynamic Schema Loader
|
1752
|
+
description: (This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.
|
1753
|
+
type: object
|
1754
|
+
required:
|
1755
|
+
- type
|
1756
|
+
- retriever
|
1757
|
+
- schema_type_identifier
|
1758
|
+
properties:
|
1759
|
+
type:
|
1760
|
+
type: string
|
1761
|
+
enum: [DynamicSchemaLoader]
|
1762
|
+
retriever:
|
1763
|
+
title: Retriever
|
1764
|
+
description: Component used to coordinate how records are extracted across stream slices and request pages.
|
1765
|
+
anyOf:
|
1766
|
+
- "$ref": "#/definitions/AsyncRetriever"
|
1767
|
+
- "$ref": "#/definitions/CustomRetriever"
|
1768
|
+
- "$ref": "#/definitions/SimpleRetriever"
|
1769
|
+
schema_type_identifier:
|
1770
|
+
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1771
|
+
$parameters:
|
1772
|
+
type: object
|
1773
|
+
additionalProperties: true
|
1687
1774
|
InlineSchemaLoader:
|
1688
1775
|
title: Inline Schema Loader
|
1689
1776
|
description: Loads a schema that is defined directly in the manifest file.
|
@@ -248,7 +248,7 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
248
248
|
return datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
|
249
249
|
|
250
250
|
def _format_datetime(self, dt: datetime.datetime) -> str:
|
251
|
-
return
|
251
|
+
return self._parser.format(dt, self.datetime_format)
|
252
252
|
|
253
253
|
def _partition_daterange(
|
254
254
|
self,
|
@@ -650,6 +650,32 @@ class HttpResponseFilter(BaseModel):
|
|
650
650
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
651
651
|
|
652
652
|
|
653
|
+
class TypesMap(BaseModel):
|
654
|
+
target_type: Union[str, List[str]]
|
655
|
+
current_type: Union[str, List[str]]
|
656
|
+
|
657
|
+
|
658
|
+
class SchemaTypeIdentifier(BaseModel):
|
659
|
+
type: Optional[Literal["SchemaTypeIdentifier"]] = None
|
660
|
+
schema_pointer: Optional[List[str]] = Field(
|
661
|
+
[],
|
662
|
+
description="List of nested fields defining the schema field path to extract. Defaults to [].",
|
663
|
+
title="Schema Path",
|
664
|
+
)
|
665
|
+
key_pointer: List[str] = Field(
|
666
|
+
...,
|
667
|
+
description="List of potentially nested fields describing the full path of the field key to extract.",
|
668
|
+
title="Key Path",
|
669
|
+
)
|
670
|
+
type_pointer: Optional[List[str]] = Field(
|
671
|
+
None,
|
672
|
+
description="List of potentially nested fields describing the full path of the field type to extract.",
|
673
|
+
title="Type Path",
|
674
|
+
)
|
675
|
+
types_mapping: Optional[List[TypesMap]] = None
|
676
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
677
|
+
|
678
|
+
|
653
679
|
class InlineSchemaLoader(BaseModel):
|
654
680
|
type: Literal["InlineSchemaLoader"]
|
655
681
|
schema_: Optional[Dict[str, Any]] = Field(
|
@@ -822,13 +848,13 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
822
848
|
)
|
823
849
|
extract_output: List[str] = Field(
|
824
850
|
...,
|
825
|
-
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.
|
851
|
+
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
|
826
852
|
examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
|
827
853
|
title="DeclarativeOAuth Extract Output",
|
828
854
|
)
|
829
855
|
state: Optional[State] = Field(
|
830
856
|
None,
|
831
|
-
description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.
|
857
|
+
description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
|
832
858
|
examples=[{"state": {"min": 7, "max": 128}}],
|
833
859
|
title="(Optional) DeclarativeOAuth Configurable State Query Param",
|
834
860
|
)
|
@@ -852,13 +878,13 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
852
878
|
)
|
853
879
|
state_key: Optional[str] = Field(
|
854
880
|
None,
|
855
|
-
description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.
|
881
|
+
description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
|
856
882
|
examples=[{"state_key": "my_custom_state_key_key_name"}],
|
857
883
|
title="(Optional) DeclarativeOAuth State Key Override",
|
858
884
|
)
|
859
885
|
auth_code_key: Optional[str] = Field(
|
860
886
|
None,
|
861
|
-
description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.
|
887
|
+
description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
|
862
888
|
examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
|
863
889
|
title="(Optional) DeclarativeOAuth Auth Code Key Override",
|
864
890
|
)
|
@@ -1609,7 +1635,7 @@ class DeclarativeStream(BaseModel):
|
|
1609
1635
|
primary_key: Optional[PrimaryKey] = Field(
|
1610
1636
|
"", description="The primary key of the stream.", title="Primary Key"
|
1611
1637
|
)
|
1612
|
-
schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader]] = (
|
1638
|
+
schema_loader: Optional[Union[InlineSchemaLoader, JsonFileSchemaLoader, CustomSchemaLoader, DynamicSchemaLoader]] = (
|
1613
1639
|
Field(
|
1614
1640
|
None,
|
1615
1641
|
description="Component used to retrieve the schema for the current stream.",
|
@@ -1774,6 +1800,17 @@ class HttpRequester(BaseModel):
|
|
1774
1800
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1775
1801
|
|
1776
1802
|
|
1803
|
+
class DynamicSchemaLoader(BaseModel):
|
1804
|
+
type: Literal["DynamicSchemaLoader"]
|
1805
|
+
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
|
1806
|
+
...,
|
1807
|
+
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1808
|
+
title="Retriever",
|
1809
|
+
)
|
1810
|
+
schema_type_identifier: SchemaTypeIdentifier
|
1811
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1812
|
+
|
1813
|
+
|
1777
1814
|
class ParentStreamConfig(BaseModel):
|
1778
1815
|
type: Literal["ParentStreamConfig"]
|
1779
1816
|
parent_key: str = Field(
|
@@ -1981,5 +2018,6 @@ DeclarativeSource2.update_forward_refs()
|
|
1981
2018
|
SelectiveAuthenticator.update_forward_refs()
|
1982
2019
|
DeclarativeStream.update_forward_refs()
|
1983
2020
|
SessionTokenAuthenticator.update_forward_refs()
|
2021
|
+
DynamicSchemaLoader.update_forward_refs()
|
1984
2022
|
SimpleRetriever.update_forward_refs()
|
1985
2023
|
AsyncRetriever.update_forward_refs()
|
@@ -64,6 +64,10 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
64
64
|
"AddFields.fields": "AddedFieldDefinition",
|
65
65
|
# CustomPartitionRouter
|
66
66
|
"CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
|
67
|
+
# DynamicSchemaLoader
|
68
|
+
"DynamicSchemaLoader.retriever": "SimpleRetriever",
|
69
|
+
# SchemaTypeIdentifier
|
70
|
+
"SchemaTypeIdentifier.types_map": "TypesMap",
|
67
71
|
}
|
68
72
|
|
69
73
|
# We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
|
@@ -188,6 +188,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
188
188
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
189
189
|
DpathExtractor as DpathExtractorModel,
|
190
190
|
)
|
191
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
192
|
+
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
193
|
+
)
|
191
194
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
192
195
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
193
196
|
)
|
@@ -278,6 +281,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
278
281
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
279
282
|
ResponseToFileExtractor as ResponseToFileExtractorModel,
|
280
283
|
)
|
284
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
285
|
+
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
|
286
|
+
)
|
281
287
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
282
288
|
SelectiveAuthenticator as SelectiveAuthenticatorModel,
|
283
289
|
)
|
@@ -291,6 +297,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
291
297
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
292
298
|
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
|
293
299
|
)
|
300
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
301
|
+
TypesMap as TypesMapModel,
|
302
|
+
)
|
294
303
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
295
304
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
296
305
|
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
@@ -356,8 +365,11 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
356
365
|
)
|
357
366
|
from airbyte_cdk.sources.declarative.schema import (
|
358
367
|
DefaultSchemaLoader,
|
368
|
+
DynamicSchemaLoader,
|
359
369
|
InlineSchemaLoader,
|
360
370
|
JsonFileSchemaLoader,
|
371
|
+
SchemaTypeIdentifier,
|
372
|
+
TypesMap,
|
361
373
|
)
|
362
374
|
from airbyte_cdk.sources.declarative.spec import Spec
|
363
375
|
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
@@ -455,6 +467,9 @@ class ModelToComponentFactory:
|
|
455
467
|
IterableDecoderModel: self.create_iterable_decoder,
|
456
468
|
XmlDecoderModel: self.create_xml_decoder,
|
457
469
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
470
|
+
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
471
|
+
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
472
|
+
TypesMapModel: self.create_types_map,
|
458
473
|
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
459
474
|
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
460
475
|
ListPartitionRouterModel: self.create_list_partition_router,
|
@@ -1574,6 +1589,63 @@ class ModelToComponentFactory:
|
|
1574
1589
|
) -> InlineSchemaLoader:
|
1575
1590
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1576
1591
|
|
1592
|
+
@staticmethod
|
1593
|
+
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1594
|
+
return TypesMap(target_type=model.target_type, current_type=model.current_type)
|
1595
|
+
|
1596
|
+
def create_schema_type_identifier(
|
1597
|
+
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
1598
|
+
) -> SchemaTypeIdentifier:
|
1599
|
+
types_mapping = []
|
1600
|
+
if model.types_mapping:
|
1601
|
+
types_mapping.extend(
|
1602
|
+
[
|
1603
|
+
self._create_component_from_model(types_map, config=config)
|
1604
|
+
for types_map in model.types_mapping
|
1605
|
+
]
|
1606
|
+
)
|
1607
|
+
model_schema_pointer: List[Union[InterpolatedString, str]] = (
|
1608
|
+
[x for x in model.schema_pointer] if model.schema_pointer else []
|
1609
|
+
)
|
1610
|
+
model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
|
1611
|
+
model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
|
1612
|
+
[x for x in model.type_pointer] if model.type_pointer else None
|
1613
|
+
)
|
1614
|
+
|
1615
|
+
return SchemaTypeIdentifier(
|
1616
|
+
schema_pointer=model_schema_pointer,
|
1617
|
+
key_pointer=model_key_pointer,
|
1618
|
+
type_pointer=model_type_pointer,
|
1619
|
+
types_mapping=types_mapping,
|
1620
|
+
parameters=model.parameters or {},
|
1621
|
+
)
|
1622
|
+
|
1623
|
+
def create_dynamic_schema_loader(
|
1624
|
+
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
1625
|
+
) -> DynamicSchemaLoader:
|
1626
|
+
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1627
|
+
combined_slicers = self._build_resumable_cursor_from_paginator(
|
1628
|
+
model.retriever, stream_slicer
|
1629
|
+
)
|
1630
|
+
|
1631
|
+
retriever = self._create_component_from_model(
|
1632
|
+
model=model.retriever,
|
1633
|
+
config=config,
|
1634
|
+
name="",
|
1635
|
+
primary_key=None,
|
1636
|
+
stream_slicer=combined_slicers,
|
1637
|
+
transformations=[],
|
1638
|
+
)
|
1639
|
+
schema_type_identifier = self._create_component_from_model(
|
1640
|
+
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
1641
|
+
)
|
1642
|
+
return DynamicSchemaLoader(
|
1643
|
+
retriever=retriever,
|
1644
|
+
config=config,
|
1645
|
+
schema_type_identifier=schema_type_identifier,
|
1646
|
+
parameters=model.parameters or {},
|
1647
|
+
)
|
1648
|
+
|
1577
1649
|
@staticmethod
|
1578
1650
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
1579
1651
|
return JsonDecoder(parameters={})
|
@@ -6,5 +6,6 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
|
|
6
6
|
from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
|
7
7
|
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
|
8
8
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
9
|
+
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
|
9
10
|
|
10
|
-
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
|
11
|
+
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader", "DynamicSchemaLoader", "TypesMap", "SchemaTypeIdentifier"]
|
@@ -0,0 +1,219 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
|
6
|
+
from copy import deepcopy
|
7
|
+
from dataclasses import InitVar, dataclass
|
8
|
+
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
|
+
|
10
|
+
import dpath
|
11
|
+
from typing_extensions import deprecated
|
12
|
+
|
13
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
|
+
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
|
+
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
+
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
+
from airbyte_cdk.sources.types import Config
|
18
|
+
|
19
|
+
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
|
+
"string": {"type": ["null", "string"]},
|
21
|
+
"boolean": {"type": ["null", "boolean"]},
|
22
|
+
"date": {"type": ["null", "string"], "format": "date"},
|
23
|
+
"timestamp_without_timezone": {
|
24
|
+
"type": ["null", "string"],
|
25
|
+
"format": "date-time",
|
26
|
+
"airbyte_type": "timestamp_without_timezone",
|
27
|
+
},
|
28
|
+
"timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
|
29
|
+
"time_without_timezone": {
|
30
|
+
"type": ["null", "string"],
|
31
|
+
"format": "time",
|
32
|
+
"airbyte_type": "time_without_timezone",
|
33
|
+
},
|
34
|
+
"time_with_timezone": {
|
35
|
+
"type": ["null", "string"],
|
36
|
+
"format": "time",
|
37
|
+
"airbyte_type": "time_with_timezone",
|
38
|
+
},
|
39
|
+
"integer": {"type": ["null", "integer"]},
|
40
|
+
"number": {"type": ["null", "number"]},
|
41
|
+
"array": {"type": ["null", "array"]},
|
42
|
+
"object": {"type": ["null", "object"]},
|
43
|
+
}
|
44
|
+
|
45
|
+
|
46
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
47
|
+
@dataclass(frozen=True)
|
48
|
+
class TypesMap:
|
49
|
+
"""
|
50
|
+
Represents a mapping between a current type and its corresponding target type.
|
51
|
+
"""
|
52
|
+
|
53
|
+
target_type: Union[List[str], str]
|
54
|
+
current_type: Union[List[str], str]
|
55
|
+
|
56
|
+
|
57
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
58
|
+
@dataclass
|
59
|
+
class SchemaTypeIdentifier:
|
60
|
+
"""
|
61
|
+
Identifies schema details for dynamic schema extraction and processing.
|
62
|
+
"""
|
63
|
+
|
64
|
+
key_pointer: List[Union[InterpolatedString, str]]
|
65
|
+
parameters: InitVar[Mapping[str, Any]]
|
66
|
+
type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
67
|
+
types_mapping: Optional[List[TypesMap]] = None
|
68
|
+
schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
69
|
+
|
70
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
71
|
+
self.schema_pointer = (
|
72
|
+
self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
|
73
|
+
) # type: ignore[assignment] # This is reqired field in model
|
74
|
+
self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
|
75
|
+
self.type_pointer = (
|
76
|
+
self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
|
77
|
+
)
|
78
|
+
|
79
|
+
@staticmethod
|
80
|
+
def _update_pointer(
|
81
|
+
pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
|
82
|
+
) -> Optional[List[Union[InterpolatedString, str]]]:
|
83
|
+
return (
|
84
|
+
[
|
85
|
+
InterpolatedString.create(path, parameters=parameters)
|
86
|
+
if isinstance(path, str)
|
87
|
+
else path
|
88
|
+
for path in pointer
|
89
|
+
]
|
90
|
+
if pointer
|
91
|
+
else None
|
92
|
+
)
|
93
|
+
|
94
|
+
|
95
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
96
|
+
@dataclass
|
97
|
+
class DynamicSchemaLoader(SchemaLoader):
|
98
|
+
"""
|
99
|
+
Dynamically loads a JSON Schema by extracting data from retrieved records.
|
100
|
+
"""
|
101
|
+
|
102
|
+
retriever: Retriever
|
103
|
+
config: Config
|
104
|
+
parameters: InitVar[Mapping[str, Any]]
|
105
|
+
schema_type_identifier: SchemaTypeIdentifier
|
106
|
+
|
107
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
108
|
+
"""
|
109
|
+
Constructs a JSON Schema based on retrieved data.
|
110
|
+
"""
|
111
|
+
properties = {}
|
112
|
+
retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
|
113
|
+
|
114
|
+
raw_schema = (
|
115
|
+
self._extract_data(
|
116
|
+
retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
|
117
|
+
self.schema_type_identifier.schema_pointer,
|
118
|
+
)
|
119
|
+
if retrieved_record
|
120
|
+
else []
|
121
|
+
)
|
122
|
+
|
123
|
+
for property_definition in raw_schema:
|
124
|
+
key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
|
125
|
+
value = self._get_type(
|
126
|
+
property_definition,
|
127
|
+
self.schema_type_identifier.type_pointer,
|
128
|
+
)
|
129
|
+
properties[key] = value
|
130
|
+
|
131
|
+
return {
|
132
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
|
+
"type": "object",
|
134
|
+
"properties": properties,
|
135
|
+
}
|
136
|
+
|
137
|
+
def _get_key(
|
138
|
+
self,
|
139
|
+
raw_schema: MutableMapping[str, Any],
|
140
|
+
field_key_path: List[Union[InterpolatedString, str]],
|
141
|
+
) -> str:
|
142
|
+
"""
|
143
|
+
Extracts the key field from the schema using the specified path.
|
144
|
+
"""
|
145
|
+
field_key = self._extract_data(raw_schema, field_key_path)
|
146
|
+
if not isinstance(field_key, str):
|
147
|
+
raise ValueError(f"Expected key to be a string. Got {field_key}")
|
148
|
+
return field_key
|
149
|
+
|
150
|
+
def _get_type(
|
151
|
+
self,
|
152
|
+
raw_schema: MutableMapping[str, Any],
|
153
|
+
field_type_path: Optional[List[Union[InterpolatedString, str]]],
|
154
|
+
) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
|
155
|
+
"""
|
156
|
+
Determines the JSON Schema type for a field, supporting nullable and combined types.
|
157
|
+
"""
|
158
|
+
raw_field_type = (
|
159
|
+
self._extract_data(raw_schema, field_type_path, default="string")
|
160
|
+
if field_type_path
|
161
|
+
else "string"
|
162
|
+
)
|
163
|
+
mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
|
164
|
+
if (
|
165
|
+
isinstance(mapped_field_type, list)
|
166
|
+
and len(mapped_field_type) == 2
|
167
|
+
and all(isinstance(item, str) for item in mapped_field_type)
|
168
|
+
):
|
169
|
+
first_type = self._get_airbyte_type(mapped_field_type[0])
|
170
|
+
second_type = self._get_airbyte_type(mapped_field_type[1])
|
171
|
+
return {"oneOf": [first_type, second_type]}
|
172
|
+
elif isinstance(mapped_field_type, str):
|
173
|
+
return self._get_airbyte_type(mapped_field_type)
|
174
|
+
else:
|
175
|
+
raise ValueError(
|
176
|
+
f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
|
177
|
+
)
|
178
|
+
|
179
|
+
def _replace_type_if_not_valid(
|
180
|
+
self, field_type: Union[List[str], str]
|
181
|
+
) -> Union[List[str], str]:
|
182
|
+
"""
|
183
|
+
Replaces a field type if it matches a type mapping in `types_map`.
|
184
|
+
"""
|
185
|
+
if self.schema_type_identifier.types_mapping:
|
186
|
+
for types_map in self.schema_type_identifier.types_mapping:
|
187
|
+
if field_type == types_map.current_type:
|
188
|
+
return types_map.target_type
|
189
|
+
return field_type
|
190
|
+
|
191
|
+
@staticmethod
|
192
|
+
def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
|
193
|
+
"""
|
194
|
+
Maps a field type to its corresponding Airbyte type definition.
|
195
|
+
"""
|
196
|
+
if field_type not in AIRBYTE_DATA_TYPES:
|
197
|
+
raise ValueError(f"Invalid Airbyte data type: {field_type}")
|
198
|
+
|
199
|
+
return deepcopy(AIRBYTE_DATA_TYPES[field_type])
|
200
|
+
|
201
|
+
def _extract_data(
|
202
|
+
self,
|
203
|
+
body: Mapping[str, Any],
|
204
|
+
extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
|
205
|
+
default: Any = None,
|
206
|
+
) -> Any:
|
207
|
+
"""
|
208
|
+
Extracts data from the body based on the provided extraction path.
|
209
|
+
"""
|
210
|
+
|
211
|
+
if not extraction_path:
|
212
|
+
return body
|
213
|
+
|
214
|
+
path = [
|
215
|
+
node.eval(self.config) if not isinstance(node, str) else node
|
216
|
+
for node in extraction_path
|
217
|
+
]
|
218
|
+
|
219
|
+
return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
from abc import abstractmethod
|
6
6
|
from datetime import datetime, timedelta, timezone
|
7
|
-
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
7
|
+
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
import pendulum
|
10
10
|
from pendulum.datetime import DateTime
|
@@ -202,7 +202,7 @@ class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateC
|
|
202
202
|
self._input_datetime_formats += [self._datetime_format]
|
203
203
|
self._parser = DatetimeParser()
|
204
204
|
|
205
|
-
def output_format(self, timestamp: datetime) ->
|
205
|
+
def output_format(self, timestamp: datetime) -> str:
|
206
206
|
return self._parser.format(timestamp, self._datetime_format)
|
207
207
|
|
208
208
|
def parse_timestamp(self, timestamp: str) -> datetime:
|
@@ -64,9 +64,9 @@ airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYl
|
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
65
|
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=9OXOFzG5PBL_MHEJg4ETE0TXfXeuhvv-at38AN3wWEM,23323
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
|
-
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=
|
67
|
+
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=rBwFwaPXRtHTDMsjxjX2VDksJ1EroS4qYucWcDHfjHc,126897
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
@@ -84,7 +84,7 @@ airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=OJ9xmhNWNwwzx
|
|
84
84
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=AkXPOWyp741cpYLBl9AbmVmOQmQ2BzZ2XjgsMEB6gGc,6583
|
85
85
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
86
86
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=CmZl9ddwMZFo8L7mEl_OFHN3ahIFRSYrJjMbR_cJaFA,1006
|
87
|
-
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=
|
87
|
+
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
|
88
88
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
89
89
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=3_EEZop94bMitZaJd2PF5Q2Xt9v94tYg7p7YJz8tAFc,15869
|
90
90
|
airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py,sha256=hElcYijbOHjdLKOMA7W7aizEbf22r7OSApXALP875uI,15749
|
@@ -104,12 +104,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
104
104
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
105
105
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
106
106
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
107
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
107
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=cQk4epWUcGRA2AC0VwwuBzer3kwjtSO4GHWfB0YHECs,89893
|
108
108
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
109
109
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
110
|
-
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=
|
110
|
+
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=KflzFl_ZKRSW9XkH16sfr5-9HnAAI0T5s8CVBrJK2Ao,8958
|
111
111
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
112
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=kGU0re3R-Ujtn6Gp96KpEVRHJB5P-B8sRa0aMR_jDdk,102536
|
113
113
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
|
114
114
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
115
115
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
@@ -159,8 +159,9 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp0
|
|
159
159
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
|
160
160
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
161
161
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
|
162
|
-
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=
|
162
|
+
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=Io9vninzlEjQ2uFmWklxfwNM0cXfljtzOz5zL1OVyT4,701
|
163
163
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
164
|
+
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=tP5DIEMn-k2JshWeXmo53ZEudDAVb4AJ50Z5tfme_ZU,8063
|
164
165
|
airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
|
165
166
|
airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
|
166
167
|
airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
|
@@ -261,7 +262,7 @@ airbyte_cdk/sources/streams/concurrent/partitions/stream_slicer.py,sha256=nbdkkH
|
|
261
262
|
airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=frPVvHtY7vLxpGEbMQzNvF1Y52ZVyct9f1DDhGoRjwY,1166
|
262
263
|
airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
263
264
|
airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=CXHUMOhndu-LOKgsnNTItv5s5qrKpmJDeHOzlH1nBy8,6819
|
264
|
-
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=
|
265
|
+
airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=syjdxEoElIOzqVS5Jrm5FOR70jsbBdttEO_3Iz12Jyo,7523
|
265
266
|
airbyte_cdk/sources/streams/core.py,sha256=z4Oi5qmJPjs-RdMd5tPWHvHqIjkcxhkVKTvIpfAs2uA,32211
|
266
267
|
airbyte_cdk/sources/streams/http/__init__.py,sha256=NXaNlkzZMkh5kS8S5ujEaKEE6855sk6_HljF_GFjKZI,311
|
267
268
|
airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=sovoGFThZr-doMN9vJvTuJBrvkwQVIO0qTQO64pGZPY,2428
|
@@ -333,8 +334,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
333
334
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
334
335
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
335
336
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
336
|
-
airbyte_cdk-6.9.
|
337
|
-
airbyte_cdk-6.9.
|
338
|
-
airbyte_cdk-6.9.
|
339
|
-
airbyte_cdk-6.9.
|
340
|
-
airbyte_cdk-6.9.
|
337
|
+
airbyte_cdk-6.9.2.dev4100.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
338
|
+
airbyte_cdk-6.9.2.dev4100.dist-info/METADATA,sha256=MlRINdXtRATUHEZkZ61QvvV0_c4u0j962cpT1HXSul0,5957
|
339
|
+
airbyte_cdk-6.9.2.dev4100.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
340
|
+
airbyte_cdk-6.9.2.dev4100.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
341
|
+
airbyte_cdk-6.9.2.dev4100.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|