airbyte-cdk 6.9.0.dev0__py3-none-any.whl → 6.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +23 -32
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +1 -87
- airbyte_cdk/sources/declarative/interpolation/jinja.py +36 -35
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +4 -42
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +0 -72
- airbyte_cdk/sources/declarative/schema/__init__.py +1 -2
- airbyte_cdk/sources/streams/http/http_client.py +15 -5
- {airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/METADATA +1 -4
- {airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/RECORD +13 -14
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +0 -219
- {airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.9.0.dev0.dist-info → airbyte_cdk-6.9.1.dist-info}/entry_points.txt +0 -0
@@ -56,8 +56,9 @@ from airbyte_cdk.sources.types import Config, StreamState
|
|
56
56
|
|
57
57
|
|
58
58
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
59
|
-
# By default, we defer to a value of
|
60
|
-
|
59
|
+
# By default, we defer to a value of 2. A value lower than than could cause a PartitionEnqueuer to be stuck in a state of deadlock
|
60
|
+
# because it has hit the limit of futures but not partition reader is consuming them.
|
61
|
+
_LOWEST_SAFE_CONCURRENCY_LEVEL = 2
|
61
62
|
|
62
63
|
def __init__(
|
63
64
|
self,
|
@@ -86,23 +87,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
86
87
|
component_factory=component_factory,
|
87
88
|
)
|
88
89
|
|
90
|
+
# todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
|
91
|
+
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
89
92
|
self._state = state
|
90
93
|
|
91
|
-
self._concurrent_streams: Optional[List[AbstractStream]]
|
92
|
-
self._synchronous_streams: Optional[List[Stream]]
|
93
|
-
|
94
|
-
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
|
95
|
-
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
|
96
|
-
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
97
|
-
# for our future improvements to the CDK.
|
98
|
-
if config:
|
99
|
-
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
100
|
-
config=config or {}
|
101
|
-
)
|
102
|
-
else:
|
103
|
-
self._concurrent_streams = None
|
104
|
-
self._synchronous_streams = None
|
105
|
-
|
106
94
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
107
95
|
if concurrency_level_from_manifest:
|
108
96
|
concurrency_level_component = self._constructor.create_component(
|
@@ -120,8 +108,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
120
108
|
concurrency_level // 2, 1
|
121
109
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
122
110
|
else:
|
123
|
-
concurrency_level = self.
|
124
|
-
initial_number_of_partitions_to_generate = self.
|
111
|
+
concurrency_level = self._LOWEST_SAFE_CONCURRENCY_LEVEL
|
112
|
+
initial_number_of_partitions_to_generate = self._LOWEST_SAFE_CONCURRENCY_LEVEL // 2
|
125
113
|
|
126
114
|
self._concurrent_source = ConcurrentSource.create(
|
127
115
|
num_workers=concurrency_level,
|
@@ -136,17 +124,20 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
136
124
|
logger: logging.Logger,
|
137
125
|
config: Mapping[str, Any],
|
138
126
|
catalog: ConfiguredAirbyteCatalog,
|
139
|
-
state: Optional[
|
127
|
+
state: Optional[List[AirbyteStateMessage]] = None,
|
140
128
|
) -> Iterator[AirbyteMessage]:
|
141
|
-
|
142
|
-
|
143
|
-
|
129
|
+
concurrent_streams, _ = self._group_streams(config=config)
|
130
|
+
|
131
|
+
# ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of
|
132
|
+
# the concurrent streams must be saved so that they can be removed from the catalog before starting
|
133
|
+
# synchronous streams
|
134
|
+
if len(concurrent_streams) > 0:
|
144
135
|
concurrent_stream_names = set(
|
145
|
-
[concurrent_stream.name for concurrent_stream in
|
136
|
+
[concurrent_stream.name for concurrent_stream in concurrent_streams]
|
146
137
|
)
|
147
138
|
|
148
139
|
selected_concurrent_streams = self._select_streams(
|
149
|
-
streams=
|
140
|
+
streams=concurrent_streams, configured_catalog=catalog
|
150
141
|
)
|
151
142
|
# It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
|
152
143
|
# This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
|
@@ -165,8 +156,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
165
156
|
yield from super().read(logger, config, filtered_catalog, state)
|
166
157
|
|
167
158
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
168
|
-
concurrent_streams = self.
|
169
|
-
synchronous_streams = self._synchronous_streams or []
|
159
|
+
concurrent_streams, synchronous_streams = self._group_streams(config=config)
|
170
160
|
return AirbyteCatalog(
|
171
161
|
streams=[
|
172
162
|
stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
|
@@ -206,7 +196,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
206
196
|
# so we need to treat them as synchronous
|
207
197
|
if (
|
208
198
|
isinstance(declarative_stream, DeclarativeStream)
|
209
|
-
and name_to_stream_mapping[declarative_stream.name]
|
199
|
+
and name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
210
200
|
== "SimpleRetriever"
|
211
201
|
):
|
212
202
|
incremental_sync_component_definition = name_to_stream_mapping[
|
@@ -215,7 +205,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
215
205
|
|
216
206
|
partition_router_component_definition = (
|
217
207
|
name_to_stream_mapping[declarative_stream.name]
|
218
|
-
.get("retriever")
|
208
|
+
.get("retriever", {})
|
219
209
|
.get("partition_router")
|
220
210
|
)
|
221
211
|
is_without_partition_router_or_cursor = not bool(
|
@@ -237,7 +227,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
237
227
|
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
238
228
|
state_manager=state_manager,
|
239
229
|
model_type=DatetimeBasedCursorModel,
|
240
|
-
component_definition=incremental_sync_component_definition,
|
230
|
+
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
241
231
|
stream_name=declarative_stream.name,
|
242
232
|
stream_namespace=declarative_stream.namespace,
|
243
233
|
config=config or {},
|
@@ -320,10 +310,11 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
320
310
|
def _is_datetime_incremental_without_partition_routing(
|
321
311
|
self,
|
322
312
|
declarative_stream: DeclarativeStream,
|
323
|
-
incremental_sync_component_definition: Mapping[str, Any],
|
313
|
+
incremental_sync_component_definition: Mapping[str, Any] | None,
|
324
314
|
) -> bool:
|
325
315
|
return (
|
326
|
-
|
316
|
+
incremental_sync_component_definition is not None
|
317
|
+
and bool(incremental_sync_component_definition)
|
327
318
|
and incremental_sync_component_definition.get("type", "")
|
328
319
|
== DatetimeBasedCursorModel.__name__
|
329
320
|
and self._stream_supports_concurrent_partition_processing(
|
@@ -327,7 +327,7 @@ definitions:
|
|
327
327
|
additionalProperties: true
|
328
328
|
ConcurrencyLevel:
|
329
329
|
title: Concurrency Level
|
330
|
-
description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
|
330
|
+
description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time. Note that a value of 1 could create deadlock if a stream has a very high number of partitions.
|
331
331
|
type: object
|
332
332
|
required:
|
333
333
|
- default_concurrency
|
@@ -1684,92 +1684,6 @@ definitions:
|
|
1684
1684
|
$parameters:
|
1685
1685
|
type: object
|
1686
1686
|
additionalProperties: true
|
1687
|
-
TypesMap:
|
1688
|
-
title: Types Map
|
1689
|
-
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
|
1690
|
-
type: object
|
1691
|
-
required:
|
1692
|
-
- target_type
|
1693
|
-
- current_type
|
1694
|
-
properties:
|
1695
|
-
target_type:
|
1696
|
-
anyOf:
|
1697
|
-
- type: string
|
1698
|
-
- type: array
|
1699
|
-
items:
|
1700
|
-
type: string
|
1701
|
-
current_type:
|
1702
|
-
anyOf:
|
1703
|
-
- type: string
|
1704
|
-
- type: array
|
1705
|
-
items:
|
1706
|
-
type: string
|
1707
|
-
SchemaTypeIdentifier:
|
1708
|
-
title: Schema Type Identifier
|
1709
|
-
description: (This component is experimental. Use at your own risk.) Identifies schema details for dynamic schema extraction and processing.
|
1710
|
-
type: object
|
1711
|
-
required:
|
1712
|
-
- key_pointer
|
1713
|
-
properties:
|
1714
|
-
type:
|
1715
|
-
type: string
|
1716
|
-
enum: [SchemaTypeIdentifier]
|
1717
|
-
schema_pointer:
|
1718
|
-
title: Schema Path
|
1719
|
-
description: List of nested fields defining the schema field path to extract. Defaults to [].
|
1720
|
-
type: array
|
1721
|
-
default: []
|
1722
|
-
items:
|
1723
|
-
- type: string
|
1724
|
-
interpolation_content:
|
1725
|
-
- config
|
1726
|
-
key_pointer:
|
1727
|
-
title: Key Path
|
1728
|
-
description: List of potentially nested fields describing the full path of the field key to extract.
|
1729
|
-
type: array
|
1730
|
-
items:
|
1731
|
-
- type: string
|
1732
|
-
interpolation_content:
|
1733
|
-
- config
|
1734
|
-
type_pointer:
|
1735
|
-
title: Type Path
|
1736
|
-
description: List of potentially nested fields describing the full path of the field type to extract.
|
1737
|
-
type: array
|
1738
|
-
items:
|
1739
|
-
- type: string
|
1740
|
-
interpolation_content:
|
1741
|
-
- config
|
1742
|
-
types_mapping:
|
1743
|
-
type: array
|
1744
|
-
items:
|
1745
|
-
- "$ref": "#/definitions/TypesMap"
|
1746
|
-
$parameters:
|
1747
|
-
type: object
|
1748
|
-
additionalProperties: true
|
1749
|
-
DynamicSchemaLoader:
|
1750
|
-
title: Dynamic Schema Loader
|
1751
|
-
description: (This component is experimental. Use at your own risk.) Loads a schema by extracting data from retrieved records.
|
1752
|
-
type: object
|
1753
|
-
required:
|
1754
|
-
- type
|
1755
|
-
- retriever
|
1756
|
-
- schema_type_identifier
|
1757
|
-
properties:
|
1758
|
-
type:
|
1759
|
-
type: string
|
1760
|
-
enum: [DynamicSchemaLoader]
|
1761
|
-
retriever:
|
1762
|
-
title: Retriever
|
1763
|
-
description: Component used to coordinate how records are extracted across stream slices and request pages.
|
1764
|
-
anyOf:
|
1765
|
-
- "$ref": "#/definitions/AsyncRetriever"
|
1766
|
-
- "$ref": "#/definitions/CustomRetriever"
|
1767
|
-
- "$ref": "#/definitions/SimpleRetriever"
|
1768
|
-
schema_type_identifier:
|
1769
|
-
"$ref": "#/definitions/SchemaTypeIdentifier"
|
1770
|
-
$parameters:
|
1771
|
-
type: object
|
1772
|
-
additionalProperties: true
|
1773
1687
|
InlineSchemaLoader:
|
1774
1688
|
title: Inline Schema Loader
|
1775
1689
|
description: Loads a schema that is defined directly in the manifest file.
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import ast
|
6
6
|
from functools import cache
|
7
|
-
from typing import Any, Mapping, Optional, Tuple, Type
|
7
|
+
from typing import Any, Mapping, Optional, Set, Tuple, Type
|
8
8
|
|
9
9
|
from jinja2 import meta
|
10
10
|
from jinja2.environment import Template
|
@@ -27,7 +27,35 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
|
|
27
27
|
def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
|
28
28
|
if attr in ["_partition"]:
|
29
29
|
return True
|
30
|
-
return super().is_safe_attribute(obj, attr, value)
|
30
|
+
return super().is_safe_attribute(obj, attr, value) # type: ignore # for some reason, mypy says 'Returning Any from function declared to return "bool"'
|
31
|
+
|
32
|
+
|
33
|
+
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
34
|
+
_ALIASES = {
|
35
|
+
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
36
|
+
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
37
|
+
}
|
38
|
+
|
39
|
+
# These extensions are not installed so they're not currently a problem,
|
40
|
+
# but we're still explicitly removing them from the jinja context.
|
41
|
+
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
42
|
+
_RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
43
|
+
|
44
|
+
# By default, these Python builtin functions are available in the Jinja context.
|
45
|
+
# We explicitly remove them because of the potential security risk.
|
46
|
+
# Please add a unit test to test_jinja.py when adding a restriction.
|
47
|
+
_RESTRICTED_BUILTIN_FUNCTIONS = [
|
48
|
+
"range"
|
49
|
+
] # The range function can cause very expensive computations
|
50
|
+
|
51
|
+
_ENVIRONMENT = StreamPartitionAccessEnvironment()
|
52
|
+
_ENVIRONMENT.filters.update(**filters)
|
53
|
+
_ENVIRONMENT.globals.update(**macros)
|
54
|
+
|
55
|
+
for extension in _RESTRICTED_EXTENSIONS:
|
56
|
+
_ENVIRONMENT.extensions.pop(extension, None)
|
57
|
+
for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
|
58
|
+
_ENVIRONMENT.globals.pop(builtin, None)
|
31
59
|
|
32
60
|
|
33
61
|
class JinjaInterpolation(Interpolation):
|
@@ -48,34 +76,6 @@ class JinjaInterpolation(Interpolation):
|
|
48
76
|
Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
|
49
77
|
"""
|
50
78
|
|
51
|
-
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
52
|
-
ALIASES = {
|
53
|
-
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
54
|
-
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
55
|
-
}
|
56
|
-
|
57
|
-
# These extensions are not installed so they're not currently a problem,
|
58
|
-
# but we're still explicitely removing them from the jinja context.
|
59
|
-
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
60
|
-
RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
61
|
-
|
62
|
-
# By default, these Python builtin functions are available in the Jinja context.
|
63
|
-
# We explicitely remove them because of the potential security risk.
|
64
|
-
# Please add a unit test to test_jinja.py when adding a restriction.
|
65
|
-
RESTRICTED_BUILTIN_FUNCTIONS = [
|
66
|
-
"range"
|
67
|
-
] # The range function can cause very expensive computations
|
68
|
-
|
69
|
-
def __init__(self) -> None:
|
70
|
-
self._environment = StreamPartitionAccessEnvironment()
|
71
|
-
self._environment.filters.update(**filters)
|
72
|
-
self._environment.globals.update(**macros)
|
73
|
-
|
74
|
-
for extension in self.RESTRICTED_EXTENSIONS:
|
75
|
-
self._environment.extensions.pop(extension, None)
|
76
|
-
for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
|
77
|
-
self._environment.globals.pop(builtin, None)
|
78
|
-
|
79
79
|
def eval(
|
80
80
|
self,
|
81
81
|
input_str: str,
|
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
|
|
86
86
|
) -> Any:
|
87
87
|
context = {"config": config, **additional_parameters}
|
88
88
|
|
89
|
-
for alias, equivalent in
|
89
|
+
for alias, equivalent in _ALIASES.items():
|
90
90
|
if alias in context:
|
91
91
|
# This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
|
92
92
|
raise ValueError(
|
@@ -105,6 +105,7 @@ class JinjaInterpolation(Interpolation):
|
|
105
105
|
raise Exception(f"Expected a string, got {input_str}")
|
106
106
|
except UndefinedError:
|
107
107
|
pass
|
108
|
+
|
108
109
|
# If result is empty or resulted in an undefined error, evaluate and return the default string
|
109
110
|
return self._literal_eval(self._eval(default, context), valid_types)
|
110
111
|
|
@@ -132,16 +133,16 @@ class JinjaInterpolation(Interpolation):
|
|
132
133
|
return s
|
133
134
|
|
134
135
|
@cache
|
135
|
-
def _find_undeclared_variables(self, s: Optional[str]) ->
|
136
|
+
def _find_undeclared_variables(self, s: Optional[str]) -> Set[str]:
|
136
137
|
"""
|
137
138
|
Find undeclared variables and cache them
|
138
139
|
"""
|
139
|
-
ast =
|
140
|
+
ast = _ENVIRONMENT.parse(s) # type: ignore # parse is able to handle None
|
140
141
|
return meta.find_undeclared_variables(ast)
|
141
142
|
|
142
143
|
@cache
|
143
|
-
def _compile(self, s:
|
144
|
+
def _compile(self, s: str) -> Template:
|
144
145
|
"""
|
145
146
|
We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
|
146
147
|
"""
|
147
|
-
return
|
148
|
+
return _ENVIRONMENT.from_string(s)
|
@@ -650,32 +650,6 @@ class HttpResponseFilter(BaseModel):
|
|
650
650
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
651
651
|
|
652
652
|
|
653
|
-
class TypesMap(BaseModel):
|
654
|
-
target_type: Union[str, List[str]]
|
655
|
-
current_type: Union[str, List[str]]
|
656
|
-
|
657
|
-
|
658
|
-
class SchemaTypeIdentifier(BaseModel):
|
659
|
-
type: Optional[Literal["SchemaTypeIdentifier"]] = None
|
660
|
-
schema_pointer: Optional[List[str]] = Field(
|
661
|
-
[],
|
662
|
-
description="List of nested fields defining the schema field path to extract. Defaults to [].",
|
663
|
-
title="Schema Path",
|
664
|
-
)
|
665
|
-
key_pointer: List[str] = Field(
|
666
|
-
...,
|
667
|
-
description="List of potentially nested fields describing the full path of the field key to extract.",
|
668
|
-
title="Key Path",
|
669
|
-
)
|
670
|
-
type_pointer: Optional[List[str]] = Field(
|
671
|
-
None,
|
672
|
-
description="List of potentially nested fields describing the full path of the field type to extract.",
|
673
|
-
title="Type Path",
|
674
|
-
)
|
675
|
-
types_mapping: Optional[List[TypesMap]] = None
|
676
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
677
|
-
|
678
|
-
|
679
653
|
class InlineSchemaLoader(BaseModel):
|
680
654
|
type: Literal["InlineSchemaLoader"]
|
681
655
|
schema_: Optional[Dict[str, Any]] = Field(
|
@@ -848,13 +822,13 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
848
822
|
)
|
849
823
|
extract_output: List[str] = Field(
|
850
824
|
...,
|
851
|
-
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
|
825
|
+
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. ",
|
852
826
|
examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
|
853
827
|
title="DeclarativeOAuth Extract Output",
|
854
828
|
)
|
855
829
|
state: Optional[State] = Field(
|
856
830
|
None,
|
857
|
-
description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity.",
|
831
|
+
description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity. ",
|
858
832
|
examples=[{"state": {"min": 7, "max": 128}}],
|
859
833
|
title="(Optional) DeclarativeOAuth Configurable State Query Param",
|
860
834
|
)
|
@@ -878,13 +852,13 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
878
852
|
)
|
879
853
|
state_key: Optional[str] = Field(
|
880
854
|
None,
|
881
|
-
description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider.",
|
855
|
+
description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. ",
|
882
856
|
examples=[{"state_key": "my_custom_state_key_key_name"}],
|
883
857
|
title="(Optional) DeclarativeOAuth State Key Override",
|
884
858
|
)
|
885
859
|
auth_code_key: Optional[str] = Field(
|
886
860
|
None,
|
887
|
-
description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider.",
|
861
|
+
description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. ",
|
888
862
|
examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
|
889
863
|
title="(Optional) DeclarativeOAuth Auth Code Key Override",
|
890
864
|
)
|
@@ -1800,17 +1774,6 @@ class HttpRequester(BaseModel):
|
|
1800
1774
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1801
1775
|
|
1802
1776
|
|
1803
|
-
class DynamicSchemaLoader(BaseModel):
|
1804
|
-
type: Literal["DynamicSchemaLoader"]
|
1805
|
-
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
|
1806
|
-
...,
|
1807
|
-
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1808
|
-
title="Retriever",
|
1809
|
-
)
|
1810
|
-
schema_type_identifier: SchemaTypeIdentifier
|
1811
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1812
|
-
|
1813
|
-
|
1814
1777
|
class ParentStreamConfig(BaseModel):
|
1815
1778
|
type: Literal["ParentStreamConfig"]
|
1816
1779
|
parent_key: str = Field(
|
@@ -2018,6 +1981,5 @@ DeclarativeSource2.update_forward_refs()
|
|
2018
1981
|
SelectiveAuthenticator.update_forward_refs()
|
2019
1982
|
DeclarativeStream.update_forward_refs()
|
2020
1983
|
SessionTokenAuthenticator.update_forward_refs()
|
2021
|
-
DynamicSchemaLoader.update_forward_refs()
|
2022
1984
|
SimpleRetriever.update_forward_refs()
|
2023
1985
|
AsyncRetriever.update_forward_refs()
|
@@ -64,10 +64,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
64
64
|
"AddFields.fields": "AddedFieldDefinition",
|
65
65
|
# CustomPartitionRouter
|
66
66
|
"CustomPartitionRouter.parent_stream_configs": "ParentStreamConfig",
|
67
|
-
# DynamicSchemaLoader
|
68
|
-
"DynamicSchemaLoader.retriever": "SimpleRetriever",
|
69
|
-
# SchemaTypeIdentifier
|
70
|
-
"SchemaTypeIdentifier.types_map": "TypesMap",
|
71
67
|
}
|
72
68
|
|
73
69
|
# We retain a separate registry for custom components to automatically insert the type if it is missing. This is intended to
|
@@ -188,9 +188,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
188
188
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
189
189
|
DpathExtractor as DpathExtractorModel,
|
190
190
|
)
|
191
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
192
|
-
DynamicSchemaLoader as DynamicSchemaLoaderModel,
|
193
|
-
)
|
194
191
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
195
192
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
196
193
|
)
|
@@ -281,9 +278,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
281
278
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
282
279
|
ResponseToFileExtractor as ResponseToFileExtractorModel,
|
283
280
|
)
|
284
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
285
|
-
SchemaTypeIdentifier as SchemaTypeIdentifierModel,
|
286
|
-
)
|
287
281
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
288
282
|
SelectiveAuthenticator as SelectiveAuthenticatorModel,
|
289
283
|
)
|
@@ -297,9 +291,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
297
291
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
298
292
|
SubstreamPartitionRouter as SubstreamPartitionRouterModel,
|
299
293
|
)
|
300
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
301
|
-
TypesMap as TypesMapModel,
|
302
|
-
)
|
303
294
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import ValueType
|
304
295
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
305
296
|
WaitTimeFromHeader as WaitTimeFromHeaderModel,
|
@@ -365,11 +356,8 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
365
356
|
)
|
366
357
|
from airbyte_cdk.sources.declarative.schema import (
|
367
358
|
DefaultSchemaLoader,
|
368
|
-
DynamicSchemaLoader,
|
369
359
|
InlineSchemaLoader,
|
370
360
|
JsonFileSchemaLoader,
|
371
|
-
SchemaTypeIdentifier,
|
372
|
-
TypesMap,
|
373
361
|
)
|
374
362
|
from airbyte_cdk.sources.declarative.spec import Spec
|
375
363
|
from airbyte_cdk.sources.declarative.stream_slicers import StreamSlicer
|
@@ -467,9 +455,6 @@ class ModelToComponentFactory:
|
|
467
455
|
IterableDecoderModel: self.create_iterable_decoder,
|
468
456
|
XmlDecoderModel: self.create_xml_decoder,
|
469
457
|
JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
|
470
|
-
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
471
|
-
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
472
|
-
TypesMapModel: self.create_types_map,
|
473
458
|
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
474
459
|
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
475
460
|
ListPartitionRouterModel: self.create_list_partition_router,
|
@@ -1589,63 +1574,6 @@ class ModelToComponentFactory:
|
|
1589
1574
|
) -> InlineSchemaLoader:
|
1590
1575
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1591
1576
|
|
1592
|
-
@staticmethod
|
1593
|
-
def create_types_map(model: TypesMapModel, **kwargs: Any) -> TypesMap:
|
1594
|
-
return TypesMap(target_type=model.target_type, current_type=model.current_type)
|
1595
|
-
|
1596
|
-
def create_schema_type_identifier(
|
1597
|
-
self, model: SchemaTypeIdentifierModel, config: Config, **kwargs: Any
|
1598
|
-
) -> SchemaTypeIdentifier:
|
1599
|
-
types_mapping = []
|
1600
|
-
if model.types_mapping:
|
1601
|
-
types_mapping.extend(
|
1602
|
-
[
|
1603
|
-
self._create_component_from_model(types_map, config=config)
|
1604
|
-
for types_map in model.types_mapping
|
1605
|
-
]
|
1606
|
-
)
|
1607
|
-
model_schema_pointer: List[Union[InterpolatedString, str]] = (
|
1608
|
-
[x for x in model.schema_pointer] if model.schema_pointer else []
|
1609
|
-
)
|
1610
|
-
model_key_pointer: List[Union[InterpolatedString, str]] = [x for x in model.key_pointer]
|
1611
|
-
model_type_pointer: Optional[List[Union[InterpolatedString, str]]] = (
|
1612
|
-
[x for x in model.type_pointer] if model.type_pointer else None
|
1613
|
-
)
|
1614
|
-
|
1615
|
-
return SchemaTypeIdentifier(
|
1616
|
-
schema_pointer=model_schema_pointer,
|
1617
|
-
key_pointer=model_key_pointer,
|
1618
|
-
type_pointer=model_type_pointer,
|
1619
|
-
types_mapping=types_mapping,
|
1620
|
-
parameters=model.parameters or {},
|
1621
|
-
)
|
1622
|
-
|
1623
|
-
def create_dynamic_schema_loader(
|
1624
|
-
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
1625
|
-
) -> DynamicSchemaLoader:
|
1626
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
1627
|
-
combined_slicers = self._build_resumable_cursor_from_paginator(
|
1628
|
-
model.retriever, stream_slicer
|
1629
|
-
)
|
1630
|
-
|
1631
|
-
retriever = self._create_component_from_model(
|
1632
|
-
model=model.retriever,
|
1633
|
-
config=config,
|
1634
|
-
name="",
|
1635
|
-
primary_key=None,
|
1636
|
-
stream_slicer=combined_slicers,
|
1637
|
-
transformations=[],
|
1638
|
-
)
|
1639
|
-
schema_type_identifier = self._create_component_from_model(
|
1640
|
-
model.schema_type_identifier, config=config, parameters=model.parameters or {}
|
1641
|
-
)
|
1642
|
-
return DynamicSchemaLoader(
|
1643
|
-
retriever=retriever,
|
1644
|
-
config=config,
|
1645
|
-
schema_type_identifier=schema_type_identifier,
|
1646
|
-
parameters=model.parameters or {},
|
1647
|
-
)
|
1648
|
-
|
1649
1577
|
@staticmethod
|
1650
1578
|
def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
|
1651
1579
|
return JsonDecoder(parameters={})
|
@@ -6,6 +6,5 @@ from airbyte_cdk.sources.declarative.schema.default_schema_loader import Default
|
|
6
6
|
from airbyte_cdk.sources.declarative.schema.inline_schema_loader import InlineSchemaLoader
|
7
7
|
from airbyte_cdk.sources.declarative.schema.json_file_schema_loader import JsonFileSchemaLoader
|
8
8
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
9
|
-
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import DynamicSchemaLoader, TypesMap, SchemaTypeIdentifier
|
10
9
|
|
11
|
-
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"
|
10
|
+
__all__ = ["JsonFileSchemaLoader", "DefaultSchemaLoader", "SchemaLoader", "InlineSchemaLoader"]
|
@@ -138,12 +138,22 @@ class HttpClient:
|
|
138
138
|
cache_dir = os.getenv(ENV_REQUEST_CACHE_PATH)
|
139
139
|
# Use in-memory cache if cache_dir is not set
|
140
140
|
# This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
|
141
|
-
if cache_dir
|
142
|
-
|
143
|
-
|
144
|
-
|
141
|
+
# Use in-memory cache if cache_dir is not set
|
142
|
+
# This is a non-obvious interface, but it ensures we don't write sql files when running unit tests
|
143
|
+
sqlite_path = (
|
144
|
+
str(Path(cache_dir) / self.cache_filename)
|
145
|
+
if cache_dir
|
146
|
+
else "file::memory:?cache=shared"
|
147
|
+
)
|
148
|
+
# By using `PRAGMA synchronous=OFF` and `PRAGMA journal_mode=WAL`, we reduce the possible occurrences of `database table is locked` errors.
|
149
|
+
# Note that those were blindly added at the same time and one or the other might be sufficient to prevent the issues but we have seen good results with both. Feel free to revisit given more information.
|
150
|
+
# There are strong signals that `fast_save` might create problems but if the sync crashes, we start back from the beginning in terms of sqlite anyway so the impact should be minimal. Signals are:
|
151
|
+
# * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-f43db4a5edf931647c32dec28ea7557aae4cae8444af4b26c8ecbe88d8c925aaR238
|
152
|
+
# * https://github.com/requests-cache/requests-cache/commit/7fa89ffda300331c37d8fad7f773348a3b5b0236#diff-2e7f95b7d7be270ff1a8118f817ea3e6663cdad273592e536a116c24e6d23c18R164-R168
|
153
|
+
# * `If the application running SQLite crashes, the data will be safe, but the database [might become corrupted](https://www.sqlite.org/howtocorrupt.html#cfgerr) if the operating system crashes or the computer loses power before that data has been written to the disk surface.` in [this description](https://www.sqlite.org/pragma.html#pragma_synchronous).
|
154
|
+
backend = requests_cache.SQLiteCache(sqlite_path, fast_save=True, wal=True)
|
145
155
|
return CachedLimiterSession(
|
146
|
-
sqlite_path, backend=
|
156
|
+
sqlite_path, backend=backend, api_budget=self._api_budget, match_headers=True
|
147
157
|
)
|
148
158
|
else:
|
149
159
|
return LimiterSession(api_budget=self._api_budget)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.9.
|
3
|
+
Version: 6.9.1
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -18,12 +18,10 @@ Classifier: Programming Language :: Python :: 3.12
|
|
18
18
|
Classifier: Topic :: Scientific/Engineering
|
19
19
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
20
20
|
Provides-Extra: file-based
|
21
|
-
Provides-Extra: sphinx-docs
|
22
21
|
Provides-Extra: sql
|
23
22
|
Provides-Extra: vector-db-based
|
24
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
25
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
26
|
-
Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
|
27
25
|
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
28
26
|
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
29
27
|
Requires-Dist: backoff
|
@@ -63,7 +61,6 @@ Requires-Dist: rapidfuzz (>=3.10.1,<4.0.0)
|
|
63
61
|
Requires-Dist: requests
|
64
62
|
Requires-Dist: requests_cache
|
65
63
|
Requires-Dist: serpyco-rs (>=1.10.2,<2.0.0)
|
66
|
-
Requires-Dist: sphinx-rtd-theme (>=1.0,<1.1) ; extra == "sphinx-docs"
|
67
64
|
Requires-Dist: sqlalchemy (>=2.0,<3.0,!=2.0.36) ; extra == "sql"
|
68
65
|
Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
|
69
66
|
Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
|
@@ -62,11 +62,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
|
|
62
62
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
63
63
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
64
64
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
65
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
65
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=PxP4p2686wsf1gjsumGKnh2o2Jjnrqg8QLGijEIrp-A,23412
|
66
66
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
67
67
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
68
68
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
69
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Z1v19wOXYpuffvcmZ5TZyU4kSCFyt3Hba7qfY-2o46U,124229
|
70
70
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
71
71
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
72
72
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=hNlhaB5FjNC6IfJyglj5ZJWkYD2nEAukMDmzRz5PC6o,671
|
@@ -97,19 +97,19 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py,sha256=UrF
|
|
97
97
|
airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py,sha256=i2L0gREX8nHA-pKokdVqwBf4aJgWP71KOxIABj_DHcY,1857
|
98
98
|
airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=LYEZnZ_hB7rvBSZxG9s0RSrzsOkDWbBY0_P6qu5lEfc,3212
|
99
99
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=-V5UddGm69UKEB6o_O1EIES9kfY8FV_X4Ji8w1yOuSA,981
|
100
|
-
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=
|
100
|
+
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=BtsY_jtT4MihFqeQgc05HXj3Ndt-e2ESQgGwbg3Sdxc,6430
|
101
101
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=jf24RK-1fBhTYDpcGEakZtGNNJfG5NS8CCF5bEgNmRo,3977
|
102
102
|
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=-v3GvuTVHwUonrfUwDj3wYKaZjX6hTyKmMBRgEzj-j0,15201
|
103
103
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
104
104
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
105
105
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
106
106
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
107
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
107
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=6iAzpGmUrhwEUQcCL5bW-FXuLXPMeFqs_GR4B1rS3ZE,88511
|
108
108
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
109
109
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
110
|
-
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=
|
110
|
+
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=0jfi-ogL-rOVORTIYnu64wNfh1L8fYaLVDWzJ2zGdi8,8799
|
111
111
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
112
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=lS2oKfkND54u66hocc2BycS-AIYIbkn4npq6CFRNokc,99573
|
113
113
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=1NjaZoGAIefvWwj6wx-LOKIXXWS-UnBlZFnuR7y6uYA,745
|
114
114
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
115
115
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
@@ -159,9 +159,8 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=FVQpUGVwp2Gibk4gp0
|
|
159
159
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=WDFnjrXLz3-YEjFhmlMkWAn9AJvnZ0mk9FyC8DAhEYk,4976
|
160
160
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
161
161
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=N4swGw5mfuTXJ2R7AKX18CHzizsr69pXwt5uSHLPi48,24172
|
162
|
-
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=
|
162
|
+
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=ul8L9S0-__AMEdbCLHBq-PMEeA928NVp8BB83BMotfU,517
|
163
163
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
164
|
-
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=tP5DIEMn-k2JshWeXmo53ZEudDAVb4AJ50Z5tfme_ZU,8063
|
165
164
|
airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
|
166
165
|
airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
|
167
166
|
airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
|
@@ -277,7 +276,7 @@ airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py,sha
|
|
277
276
|
airbyte_cdk/sources/streams/http/error_handlers/response_models.py,sha256=xGIVELBFY0TmH9aUq1ikoqJz8oHLr6di2JLvKWVEO-s,2236
|
278
277
|
airbyte_cdk/sources/streams/http/exceptions.py,sha256=njC7MlMJoFYcSGz4mIp6-bqLFTr6vC8ej25X0oSeyjE,1824
|
279
278
|
airbyte_cdk/sources/streams/http/http.py,sha256=JAMpiTdS9HFNOlwayWNvQdxoqs2rpW9wdYlhFHv_1Q4,28496
|
280
|
-
airbyte_cdk/sources/streams/http/http_client.py,sha256=
|
279
|
+
airbyte_cdk/sources/streams/http/http_client.py,sha256=dyNrbcahEnDfGTrhqkr1XdfPiuVVRlKOdj-TJ5WRVrk,22923
|
281
280
|
airbyte_cdk/sources/streams/http/rate_limiting.py,sha256=IwdjrHKUnU97XO4qONgYRv4YYW51xQ8SJm4WLafXDB8,6351
|
282
281
|
airbyte_cdk/sources/streams/http/requests_native_auth/__init__.py,sha256=RN0D3nOX1xLgwEwKWu6pkGy3XqBFzKSNZ8Lf6umU2eY,413
|
283
282
|
airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py,sha256=nxI94yJ3bGfpDO8RR3QvOJ-PSW0n9CElSAkgl5ae80Y,10321
|
@@ -334,8 +333,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
334
333
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
335
334
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
336
335
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
337
|
-
airbyte_cdk-6.9.
|
338
|
-
airbyte_cdk-6.9.
|
339
|
-
airbyte_cdk-6.9.
|
340
|
-
airbyte_cdk-6.9.
|
341
|
-
airbyte_cdk-6.9.
|
336
|
+
airbyte_cdk-6.9.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
337
|
+
airbyte_cdk-6.9.1.dist-info/METADATA,sha256=vmQrD-o8vQwVRNF1PSFviNR1x8VcdqsvNr42p8_8u18,5949
|
338
|
+
airbyte_cdk-6.9.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
339
|
+
airbyte_cdk-6.9.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
340
|
+
airbyte_cdk-6.9.1.dist-info/RECORD,,
|
@@ -1,219 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
|
6
|
-
from copy import deepcopy
|
7
|
-
from dataclasses import InitVar, dataclass
|
8
|
-
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
9
|
-
|
10
|
-
import dpath
|
11
|
-
from typing_extensions import deprecated
|
12
|
-
|
13
|
-
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
14
|
-
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
15
|
-
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
16
|
-
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
17
|
-
from airbyte_cdk.sources.types import Config
|
18
|
-
|
19
|
-
AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
20
|
-
"string": {"type": ["null", "string"]},
|
21
|
-
"boolean": {"type": ["null", "boolean"]},
|
22
|
-
"date": {"type": ["null", "string"], "format": "date"},
|
23
|
-
"timestamp_without_timezone": {
|
24
|
-
"type": ["null", "string"],
|
25
|
-
"format": "date-time",
|
26
|
-
"airbyte_type": "timestamp_without_timezone",
|
27
|
-
},
|
28
|
-
"timestamp_with_timezone": {"type": ["null", "string"], "format": "date-time"},
|
29
|
-
"time_without_timezone": {
|
30
|
-
"type": ["null", "string"],
|
31
|
-
"format": "time",
|
32
|
-
"airbyte_type": "time_without_timezone",
|
33
|
-
},
|
34
|
-
"time_with_timezone": {
|
35
|
-
"type": ["null", "string"],
|
36
|
-
"format": "time",
|
37
|
-
"airbyte_type": "time_with_timezone",
|
38
|
-
},
|
39
|
-
"integer": {"type": ["null", "integer"]},
|
40
|
-
"number": {"type": ["null", "number"]},
|
41
|
-
"array": {"type": ["null", "array"]},
|
42
|
-
"object": {"type": ["null", "object"]},
|
43
|
-
}
|
44
|
-
|
45
|
-
|
46
|
-
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
47
|
-
@dataclass(frozen=True)
|
48
|
-
class TypesMap:
|
49
|
-
"""
|
50
|
-
Represents a mapping between a current type and its corresponding target type.
|
51
|
-
"""
|
52
|
-
|
53
|
-
target_type: Union[List[str], str]
|
54
|
-
current_type: Union[List[str], str]
|
55
|
-
|
56
|
-
|
57
|
-
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
58
|
-
@dataclass
|
59
|
-
class SchemaTypeIdentifier:
|
60
|
-
"""
|
61
|
-
Identifies schema details for dynamic schema extraction and processing.
|
62
|
-
"""
|
63
|
-
|
64
|
-
key_pointer: List[Union[InterpolatedString, str]]
|
65
|
-
parameters: InitVar[Mapping[str, Any]]
|
66
|
-
type_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
67
|
-
types_mapping: Optional[List[TypesMap]] = None
|
68
|
-
schema_pointer: Optional[List[Union[InterpolatedString, str]]] = None
|
69
|
-
|
70
|
-
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
71
|
-
self.schema_pointer = (
|
72
|
-
self._update_pointer(self.schema_pointer, parameters) if self.schema_pointer else []
|
73
|
-
) # type: ignore[assignment] # This is reqired field in model
|
74
|
-
self.key_pointer = self._update_pointer(self.key_pointer, parameters) # type: ignore[assignment] # This is reqired field in model
|
75
|
-
self.type_pointer = (
|
76
|
-
self._update_pointer(self.type_pointer, parameters) if self.type_pointer else None
|
77
|
-
)
|
78
|
-
|
79
|
-
@staticmethod
|
80
|
-
def _update_pointer(
|
81
|
-
pointer: Optional[List[Union[InterpolatedString, str]]], parameters: Mapping[str, Any]
|
82
|
-
) -> Optional[List[Union[InterpolatedString, str]]]:
|
83
|
-
return (
|
84
|
-
[
|
85
|
-
InterpolatedString.create(path, parameters=parameters)
|
86
|
-
if isinstance(path, str)
|
87
|
-
else path
|
88
|
-
for path in pointer
|
89
|
-
]
|
90
|
-
if pointer
|
91
|
-
else None
|
92
|
-
)
|
93
|
-
|
94
|
-
|
95
|
-
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
96
|
-
@dataclass
|
97
|
-
class DynamicSchemaLoader(SchemaLoader):
|
98
|
-
"""
|
99
|
-
Dynamically loads a JSON Schema by extracting data from retrieved records.
|
100
|
-
"""
|
101
|
-
|
102
|
-
retriever: Retriever
|
103
|
-
config: Config
|
104
|
-
parameters: InitVar[Mapping[str, Any]]
|
105
|
-
schema_type_identifier: SchemaTypeIdentifier
|
106
|
-
|
107
|
-
def get_json_schema(self) -> Mapping[str, Any]:
|
108
|
-
"""
|
109
|
-
Constructs a JSON Schema based on retrieved data.
|
110
|
-
"""
|
111
|
-
properties = {}
|
112
|
-
retrieved_record = next(self.retriever.read_records({}), None) # type: ignore[call-overload] # read_records return Iterable data type
|
113
|
-
|
114
|
-
raw_schema = (
|
115
|
-
self._extract_data(
|
116
|
-
retrieved_record, # type: ignore[arg-type] # Expected that retrieved_record will be only Mapping[str, Any]
|
117
|
-
self.schema_type_identifier.schema_pointer,
|
118
|
-
)
|
119
|
-
if retrieved_record
|
120
|
-
else []
|
121
|
-
)
|
122
|
-
|
123
|
-
for property_definition in raw_schema:
|
124
|
-
key = self._get_key(property_definition, self.schema_type_identifier.key_pointer)
|
125
|
-
value = self._get_type(
|
126
|
-
property_definition,
|
127
|
-
self.schema_type_identifier.type_pointer,
|
128
|
-
)
|
129
|
-
properties[key] = value
|
130
|
-
|
131
|
-
return {
|
132
|
-
"$schema": "http://json-schema.org/draft-07/schema#",
|
133
|
-
"type": "object",
|
134
|
-
"properties": properties,
|
135
|
-
}
|
136
|
-
|
137
|
-
def _get_key(
|
138
|
-
self,
|
139
|
-
raw_schema: MutableMapping[str, Any],
|
140
|
-
field_key_path: List[Union[InterpolatedString, str]],
|
141
|
-
) -> str:
|
142
|
-
"""
|
143
|
-
Extracts the key field from the schema using the specified path.
|
144
|
-
"""
|
145
|
-
field_key = self._extract_data(raw_schema, field_key_path)
|
146
|
-
if not isinstance(field_key, str):
|
147
|
-
raise ValueError(f"Expected key to be a string. Got {field_key}")
|
148
|
-
return field_key
|
149
|
-
|
150
|
-
def _get_type(
|
151
|
-
self,
|
152
|
-
raw_schema: MutableMapping[str, Any],
|
153
|
-
field_type_path: Optional[List[Union[InterpolatedString, str]]],
|
154
|
-
) -> Union[Mapping[str, Any], List[Mapping[str, Any]]]:
|
155
|
-
"""
|
156
|
-
Determines the JSON Schema type for a field, supporting nullable and combined types.
|
157
|
-
"""
|
158
|
-
raw_field_type = (
|
159
|
-
self._extract_data(raw_schema, field_type_path, default="string")
|
160
|
-
if field_type_path
|
161
|
-
else "string"
|
162
|
-
)
|
163
|
-
mapped_field_type = self._replace_type_if_not_valid(raw_field_type)
|
164
|
-
if (
|
165
|
-
isinstance(mapped_field_type, list)
|
166
|
-
and len(mapped_field_type) == 2
|
167
|
-
and all(isinstance(item, str) for item in mapped_field_type)
|
168
|
-
):
|
169
|
-
first_type = self._get_airbyte_type(mapped_field_type[0])
|
170
|
-
second_type = self._get_airbyte_type(mapped_field_type[1])
|
171
|
-
return {"oneOf": [first_type, second_type]}
|
172
|
-
elif isinstance(mapped_field_type, str):
|
173
|
-
return self._get_airbyte_type(mapped_field_type)
|
174
|
-
else:
|
175
|
-
raise ValueError(
|
176
|
-
f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
|
177
|
-
)
|
178
|
-
|
179
|
-
def _replace_type_if_not_valid(
|
180
|
-
self, field_type: Union[List[str], str]
|
181
|
-
) -> Union[List[str], str]:
|
182
|
-
"""
|
183
|
-
Replaces a field type if it matches a type mapping in `types_map`.
|
184
|
-
"""
|
185
|
-
if self.schema_type_identifier.types_mapping:
|
186
|
-
for types_map in self.schema_type_identifier.types_mapping:
|
187
|
-
if field_type == types_map.current_type:
|
188
|
-
return types_map.target_type
|
189
|
-
return field_type
|
190
|
-
|
191
|
-
@staticmethod
|
192
|
-
def _get_airbyte_type(field_type: str) -> Mapping[str, Any]:
|
193
|
-
"""
|
194
|
-
Maps a field type to its corresponding Airbyte type definition.
|
195
|
-
"""
|
196
|
-
if field_type not in AIRBYTE_DATA_TYPES:
|
197
|
-
raise ValueError(f"Invalid Airbyte data type: {field_type}")
|
198
|
-
|
199
|
-
return deepcopy(AIRBYTE_DATA_TYPES[field_type])
|
200
|
-
|
201
|
-
def _extract_data(
|
202
|
-
self,
|
203
|
-
body: Mapping[str, Any],
|
204
|
-
extraction_path: Optional[List[Union[InterpolatedString, str]]] = None,
|
205
|
-
default: Any = None,
|
206
|
-
) -> Any:
|
207
|
-
"""
|
208
|
-
Extracts data from the body based on the provided extraction path.
|
209
|
-
"""
|
210
|
-
|
211
|
-
if not extraction_path:
|
212
|
-
return body
|
213
|
-
|
214
|
-
path = [
|
215
|
-
node.eval(self.config) if not isinstance(node, str) else node
|
216
|
-
for node in extraction_path
|
217
|
-
]
|
218
|
-
|
219
|
-
return dpath.get(body, path, default=default) # type: ignore # extracted will be a MutableMapping, given input data structure
|
File without changes
|
File without changes
|
File without changes
|