airbyte-cdk 6.9.1__py3-none-any.whl → 6.9.1.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +35 -30
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +3 -101
- airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -53
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +2 -95
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -6
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +21 -95
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +1 -2
- airbyte_cdk/sources/streams/http/http_client.py +5 -15
- airbyte_cdk/test/utils/manifest_only_fixtures.py +80 -0
- airbyte_cdk-6.9.1.dev1.dist-info/METADATA +306 -0
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev1.dist-info}/RECORD +15 -17
- airbyte_cdk/sources/declarative/resolvers/__init__.py +0 -13
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +0 -55
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +0 -106
- airbyte_cdk-6.9.1.dist-info/METADATA +0 -108
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.9.1.dist-info → airbyte_cdk-6.9.1.dev1.dist-info}/entry_points.txt +0 -0
@@ -56,9 +56,8 @@ from airbyte_cdk.sources.types import Config, StreamState
|
|
56
56
|
|
57
57
|
|
58
58
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
59
|
-
# By default, we defer to a value of
|
60
|
-
|
61
|
-
_LOWEST_SAFE_CONCURRENCY_LEVEL = 2
|
59
|
+
# By default, we defer to a value of 1 which represents running a connector using the Concurrent CDK engine on only one thread.
|
60
|
+
SINGLE_THREADED_CONCURRENCY_LEVEL = 1
|
62
61
|
|
63
62
|
def __init__(
|
64
63
|
self,
|
@@ -87,10 +86,23 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
87
86
|
component_factory=component_factory,
|
88
87
|
)
|
89
88
|
|
90
|
-
# todo: We could remove state from initialization. Now that streams are grouped during the read(), a source
|
91
|
-
# no longer needs to store the original incoming state. But maybe there's an edge case?
|
92
89
|
self._state = state
|
93
90
|
|
91
|
+
self._concurrent_streams: Optional[List[AbstractStream]]
|
92
|
+
self._synchronous_streams: Optional[List[Stream]]
|
93
|
+
|
94
|
+
# If the connector command was SPEC, there is no incoming config, and we cannot instantiate streams because
|
95
|
+
# they might depend on it. Ideally we want to have a static method on this class to get the spec without
|
96
|
+
# any other arguments, but the existing entrypoint.py isn't designed to support this. Just noting this
|
97
|
+
# for our future improvements to the CDK.
|
98
|
+
if config:
|
99
|
+
self._concurrent_streams, self._synchronous_streams = self._group_streams(
|
100
|
+
config=config or {}
|
101
|
+
)
|
102
|
+
else:
|
103
|
+
self._concurrent_streams = None
|
104
|
+
self._synchronous_streams = None
|
105
|
+
|
94
106
|
concurrency_level_from_manifest = self._source_config.get("concurrency_level")
|
95
107
|
if concurrency_level_from_manifest:
|
96
108
|
concurrency_level_component = self._constructor.create_component(
|
@@ -108,8 +120,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
108
120
|
concurrency_level // 2, 1
|
109
121
|
) # Partition_generation iterates using range based on this value. If this is floored to zero we end up in a dead lock during start up
|
110
122
|
else:
|
111
|
-
concurrency_level = self.
|
112
|
-
initial_number_of_partitions_to_generate = self.
|
123
|
+
concurrency_level = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
124
|
+
initial_number_of_partitions_to_generate = self.SINGLE_THREADED_CONCURRENCY_LEVEL
|
113
125
|
|
114
126
|
self._concurrent_source = ConcurrentSource.create(
|
115
127
|
num_workers=concurrency_level,
|
@@ -124,20 +136,17 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
124
136
|
logger: logging.Logger,
|
125
137
|
config: Mapping[str, Any],
|
126
138
|
catalog: ConfiguredAirbyteCatalog,
|
127
|
-
state: Optional[List[AirbyteStateMessage]] = None,
|
139
|
+
state: Optional[Union[List[AirbyteStateMessage]]] = None,
|
128
140
|
) -> Iterator[AirbyteMessage]:
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
# the concurrent streams must be saved so that they can be removed from the catalog before starting
|
133
|
-
# synchronous streams
|
134
|
-
if len(concurrent_streams) > 0:
|
141
|
+
# ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of the concurrent
|
142
|
+
# streams must be saved so that they can be removed from the catalog before starting synchronous streams
|
143
|
+
if self._concurrent_streams:
|
135
144
|
concurrent_stream_names = set(
|
136
|
-
[concurrent_stream.name for concurrent_stream in
|
145
|
+
[concurrent_stream.name for concurrent_stream in self._concurrent_streams]
|
137
146
|
)
|
138
147
|
|
139
148
|
selected_concurrent_streams = self._select_streams(
|
140
|
-
streams=
|
149
|
+
streams=self._concurrent_streams, configured_catalog=catalog
|
141
150
|
)
|
142
151
|
# It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
|
143
152
|
# This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
|
@@ -156,7 +165,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
156
165
|
yield from super().read(logger, config, filtered_catalog, state)
|
157
166
|
|
158
167
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
159
|
-
concurrent_streams
|
168
|
+
concurrent_streams = self._concurrent_streams or []
|
169
|
+
synchronous_streams = self._synchronous_streams or []
|
160
170
|
return AirbyteCatalog(
|
161
171
|
streams=[
|
162
172
|
stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
|
@@ -182,13 +192,9 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
182
192
|
|
183
193
|
state_manager = ConnectorStateManager(state=self._state) # type: ignore # state is always in the form of List[AirbyteStateMessage]. The ConnectorStateManager should use generics, but this can be done later
|
184
194
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
self._source_config, config
|
189
|
-
)
|
190
|
-
|
191
|
-
name_to_stream_mapping = {stream["name"]: stream for stream in streams}
|
195
|
+
name_to_stream_mapping = {
|
196
|
+
stream["name"]: stream for stream in self.resolved_manifest["streams"]
|
197
|
+
}
|
192
198
|
|
193
199
|
for declarative_stream in self.streams(config=config):
|
194
200
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
@@ -196,7 +202,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
196
202
|
# so we need to treat them as synchronous
|
197
203
|
if (
|
198
204
|
isinstance(declarative_stream, DeclarativeStream)
|
199
|
-
and name_to_stream_mapping[declarative_stream.name]
|
205
|
+
and name_to_stream_mapping[declarative_stream.name].get("retriever")["type"]
|
200
206
|
== "SimpleRetriever"
|
201
207
|
):
|
202
208
|
incremental_sync_component_definition = name_to_stream_mapping[
|
@@ -205,7 +211,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
205
211
|
|
206
212
|
partition_router_component_definition = (
|
207
213
|
name_to_stream_mapping[declarative_stream.name]
|
208
|
-
.get("retriever"
|
214
|
+
.get("retriever")
|
209
215
|
.get("partition_router")
|
210
216
|
)
|
211
217
|
is_without_partition_router_or_cursor = not bool(
|
@@ -227,7 +233,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
227
233
|
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
228
234
|
state_manager=state_manager,
|
229
235
|
model_type=DatetimeBasedCursorModel,
|
230
|
-
component_definition=incremental_sync_component_definition,
|
236
|
+
component_definition=incremental_sync_component_definition,
|
231
237
|
stream_name=declarative_stream.name,
|
232
238
|
stream_namespace=declarative_stream.namespace,
|
233
239
|
config=config or {},
|
@@ -310,11 +316,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
310
316
|
def _is_datetime_incremental_without_partition_routing(
|
311
317
|
self,
|
312
318
|
declarative_stream: DeclarativeStream,
|
313
|
-
incremental_sync_component_definition: Mapping[str, Any]
|
319
|
+
incremental_sync_component_definition: Mapping[str, Any],
|
314
320
|
) -> bool:
|
315
321
|
return (
|
316
|
-
incremental_sync_component_definition
|
317
|
-
and bool(incremental_sync_component_definition)
|
322
|
+
bool(incremental_sync_component_definition)
|
318
323
|
and incremental_sync_component_definition.get("type", "")
|
319
324
|
== DatetimeBasedCursorModel.__name__
|
320
325
|
and self._stream_supports_concurrent_partition_processing(
|
@@ -7,12 +7,8 @@ version: 1.0.0
|
|
7
7
|
required:
|
8
8
|
- type
|
9
9
|
- check
|
10
|
+
- streams
|
10
11
|
- version
|
11
|
-
anyOf:
|
12
|
-
- required:
|
13
|
-
- streams
|
14
|
-
- required:
|
15
|
-
- dynamic_streams
|
16
12
|
properties:
|
17
13
|
type:
|
18
14
|
type: string
|
@@ -23,10 +19,6 @@ properties:
|
|
23
19
|
type: array
|
24
20
|
items:
|
25
21
|
"$ref": "#/definitions/DeclarativeStream"
|
26
|
-
dynamic_streams:
|
27
|
-
type: array
|
28
|
-
items:
|
29
|
-
"$ref": "#/definitions/DynamicDeclarativeStream"
|
30
22
|
version:
|
31
23
|
type: string
|
32
24
|
description: The version of the Airbyte CDK used to build and test the source.
|
@@ -327,7 +319,7 @@ definitions:
|
|
327
319
|
additionalProperties: true
|
328
320
|
ConcurrencyLevel:
|
329
321
|
title: Concurrency Level
|
330
|
-
description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
|
322
|
+
description: Defines the amount of parallelization for the streams that are being synced. The factor of parallelization is how many partitions or streams are synced at the same time. For example, with a concurrency_level of 10, ten streams or partitions of data will processed at the same time.
|
331
323
|
type: object
|
332
324
|
required:
|
333
325
|
- default_concurrency
|
@@ -1329,7 +1321,7 @@ definitions:
|
|
1329
1321
|
type: array
|
1330
1322
|
items:
|
1331
1323
|
- type: string
|
1332
|
-
|
1324
|
+
interpolation_content:
|
1333
1325
|
- config
|
1334
1326
|
examples:
|
1335
1327
|
- ["data"]
|
@@ -2903,96 +2895,6 @@ definitions:
|
|
2903
2895
|
$parameters:
|
2904
2896
|
type: object
|
2905
2897
|
additionalProperties: true
|
2906
|
-
ComponentMappingDefinition:
|
2907
|
-
title: Component Mapping Definition
|
2908
|
-
description: (This component is experimental. Use at your own risk.) Specifies a mapping definition to update or add fields in a record or configuration. This allows dynamic mapping of data by interpolating values into the template based on provided contexts.
|
2909
|
-
type: object
|
2910
|
-
required:
|
2911
|
-
- type
|
2912
|
-
- field_path
|
2913
|
-
- value
|
2914
|
-
properties:
|
2915
|
-
type:
|
2916
|
-
type: string
|
2917
|
-
enum: [ComponentMappingDefinition]
|
2918
|
-
field_path:
|
2919
|
-
title: Field Path
|
2920
|
-
description: A list of potentially nested fields indicating the full path where value will be added or updated.
|
2921
|
-
type: array
|
2922
|
-
items:
|
2923
|
-
- type: string
|
2924
|
-
interpolation_context:
|
2925
|
-
- config
|
2926
|
-
- components_values
|
2927
|
-
- stream_template_config
|
2928
|
-
examples:
|
2929
|
-
- ["data"]
|
2930
|
-
- ["data", "records"]
|
2931
|
-
- ["data", "{{ parameters.name }}"]
|
2932
|
-
- ["data", "*", "record"]
|
2933
|
-
value:
|
2934
|
-
title: Value
|
2935
|
-
description: The dynamic or static value to assign to the key. Interpolated values can be used to dynamically determine the value during runtime.
|
2936
|
-
type: string
|
2937
|
-
interpolation_context:
|
2938
|
-
- config
|
2939
|
-
- stream_template_config
|
2940
|
-
- components_values
|
2941
|
-
examples:
|
2942
|
-
- "{{ components_values['updates'] }}"
|
2943
|
-
- "{{ components_values['MetaData']['LastUpdatedTime'] }}"
|
2944
|
-
- "{{ config['segment_id'] }}"
|
2945
|
-
value_type:
|
2946
|
-
title: Value Type
|
2947
|
-
description: The expected data type of the value. If omitted, the type will be inferred from the value provided.
|
2948
|
-
"$ref": "#/definitions/ValueType"
|
2949
|
-
$parameters:
|
2950
|
-
type: object
|
2951
|
-
additionalProperties: true
|
2952
|
-
HttpComponentsResolver:
|
2953
|
-
type: object
|
2954
|
-
description: (This component is experimental. Use at your own risk.) Component resolve and populates stream templates with components fetched via an HTTP retriever.
|
2955
|
-
properties:
|
2956
|
-
type:
|
2957
|
-
type: string
|
2958
|
-
enum: [HttpComponentsResolver]
|
2959
|
-
retriever:
|
2960
|
-
title: Retriever
|
2961
|
-
description: Component used to coordinate how records are extracted across stream slices and request pages.
|
2962
|
-
anyOf:
|
2963
|
-
- "$ref": "#/definitions/AsyncRetriever"
|
2964
|
-
- "$ref": "#/definitions/CustomRetriever"
|
2965
|
-
- "$ref": "#/definitions/SimpleRetriever"
|
2966
|
-
components_mapping:
|
2967
|
-
type: array
|
2968
|
-
items:
|
2969
|
-
"$ref": "#/definitions/ComponentMappingDefinition"
|
2970
|
-
$parameters:
|
2971
|
-
type: object
|
2972
|
-
additionalProperties: true
|
2973
|
-
required:
|
2974
|
-
- type
|
2975
|
-
- retriever
|
2976
|
-
- components_mapping
|
2977
|
-
DynamicDeclarativeStream:
|
2978
|
-
type: object
|
2979
|
-
description: (This component is experimental. Use at your own risk.) A component that described how will be created declarative streams based on stream template.
|
2980
|
-
properties:
|
2981
|
-
type:
|
2982
|
-
type: string
|
2983
|
-
enum: [DynamicDeclarativeStream]
|
2984
|
-
stream_template:
|
2985
|
-
title: Stream Template
|
2986
|
-
description: Reference to the stream template.
|
2987
|
-
"$ref": "#/definitions/DeclarativeStream"
|
2988
|
-
components_resolver:
|
2989
|
-
title: Components Resolver
|
2990
|
-
description: Component resolve and populates stream templates with components values.
|
2991
|
-
"$ref": "#/definitions/HttpComponentsResolver"
|
2992
|
-
required:
|
2993
|
-
- type
|
2994
|
-
- stream_template
|
2995
|
-
- components_resolver
|
2996
2898
|
interpolation:
|
2997
2899
|
variables:
|
2998
2900
|
- title: config
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import ast
|
6
6
|
from functools import cache
|
7
|
-
from typing import Any, Mapping, Optional,
|
7
|
+
from typing import Any, Mapping, Optional, Tuple, Type
|
8
8
|
|
9
9
|
from jinja2 import meta
|
10
10
|
from jinja2.environment import Template
|
@@ -27,35 +27,7 @@ class StreamPartitionAccessEnvironment(SandboxedEnvironment):
|
|
27
27
|
def is_safe_attribute(self, obj: Any, attr: str, value: Any) -> bool:
|
28
28
|
if attr in ["_partition"]:
|
29
29
|
return True
|
30
|
-
return super().is_safe_attribute(obj, attr, value)
|
31
|
-
|
32
|
-
|
33
|
-
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
34
|
-
_ALIASES = {
|
35
|
-
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
36
|
-
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
37
|
-
}
|
38
|
-
|
39
|
-
# These extensions are not installed so they're not currently a problem,
|
40
|
-
# but we're still explicitly removing them from the jinja context.
|
41
|
-
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
42
|
-
_RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
43
|
-
|
44
|
-
# By default, these Python builtin functions are available in the Jinja context.
|
45
|
-
# We explicitly remove them because of the potential security risk.
|
46
|
-
# Please add a unit test to test_jinja.py when adding a restriction.
|
47
|
-
_RESTRICTED_BUILTIN_FUNCTIONS = [
|
48
|
-
"range"
|
49
|
-
] # The range function can cause very expensive computations
|
50
|
-
|
51
|
-
_ENVIRONMENT = StreamPartitionAccessEnvironment()
|
52
|
-
_ENVIRONMENT.filters.update(**filters)
|
53
|
-
_ENVIRONMENT.globals.update(**macros)
|
54
|
-
|
55
|
-
for extension in _RESTRICTED_EXTENSIONS:
|
56
|
-
_ENVIRONMENT.extensions.pop(extension, None)
|
57
|
-
for builtin in _RESTRICTED_BUILTIN_FUNCTIONS:
|
58
|
-
_ENVIRONMENT.globals.pop(builtin, None)
|
30
|
+
return super().is_safe_attribute(obj, attr, value)
|
59
31
|
|
60
32
|
|
61
33
|
class JinjaInterpolation(Interpolation):
|
@@ -76,6 +48,34 @@ class JinjaInterpolation(Interpolation):
|
|
76
48
|
Additional information on jinja templating can be found at https://jinja.palletsprojects.com/en/3.1.x/templates/#
|
77
49
|
"""
|
78
50
|
|
51
|
+
# These aliases are used to deprecate existing keywords without breaking all existing connectors.
|
52
|
+
ALIASES = {
|
53
|
+
"stream_interval": "stream_slice", # Use stream_interval to access incremental_sync values
|
54
|
+
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
55
|
+
}
|
56
|
+
|
57
|
+
# These extensions are not installed so they're not currently a problem,
|
58
|
+
# but we're still explicitely removing them from the jinja context.
|
59
|
+
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
60
|
+
RESTRICTED_EXTENSIONS = ["jinja2.ext.loopcontrols"] # Adds support for break continue in loops
|
61
|
+
|
62
|
+
# By default, these Python builtin functions are available in the Jinja context.
|
63
|
+
# We explicitely remove them because of the potential security risk.
|
64
|
+
# Please add a unit test to test_jinja.py when adding a restriction.
|
65
|
+
RESTRICTED_BUILTIN_FUNCTIONS = [
|
66
|
+
"range"
|
67
|
+
] # The range function can cause very expensive computations
|
68
|
+
|
69
|
+
def __init__(self) -> None:
|
70
|
+
self._environment = StreamPartitionAccessEnvironment()
|
71
|
+
self._environment.filters.update(**filters)
|
72
|
+
self._environment.globals.update(**macros)
|
73
|
+
|
74
|
+
for extension in self.RESTRICTED_EXTENSIONS:
|
75
|
+
self._environment.extensions.pop(extension, None)
|
76
|
+
for builtin in self.RESTRICTED_BUILTIN_FUNCTIONS:
|
77
|
+
self._environment.globals.pop(builtin, None)
|
78
|
+
|
79
79
|
def eval(
|
80
80
|
self,
|
81
81
|
input_str: str,
|
@@ -86,7 +86,7 @@ class JinjaInterpolation(Interpolation):
|
|
86
86
|
) -> Any:
|
87
87
|
context = {"config": config, **additional_parameters}
|
88
88
|
|
89
|
-
for alias, equivalent in
|
89
|
+
for alias, equivalent in self.ALIASES.items():
|
90
90
|
if alias in context:
|
91
91
|
# This is unexpected. We could ignore or log a warning, but failing loudly should result in fewer surprises
|
92
92
|
raise ValueError(
|
@@ -105,7 +105,6 @@ class JinjaInterpolation(Interpolation):
|
|
105
105
|
raise Exception(f"Expected a string, got {input_str}")
|
106
106
|
except UndefinedError:
|
107
107
|
pass
|
108
|
-
|
109
108
|
# If result is empty or resulted in an undefined error, evaluate and return the default string
|
110
109
|
return self._literal_eval(self._eval(default, context), valid_types)
|
111
110
|
|
@@ -133,16 +132,16 @@ class JinjaInterpolation(Interpolation):
|
|
133
132
|
return s
|
134
133
|
|
135
134
|
@cache
|
136
|
-
def _find_undeclared_variables(self, s: Optional[str]) ->
|
135
|
+
def _find_undeclared_variables(self, s: Optional[str]) -> set[str]:
|
137
136
|
"""
|
138
137
|
Find undeclared variables and cache them
|
139
138
|
"""
|
140
|
-
ast =
|
139
|
+
ast = self._environment.parse(s) # type: ignore # parse is able to handle None
|
141
140
|
return meta.find_undeclared_variables(ast)
|
142
141
|
|
143
142
|
@cache
|
144
|
-
def _compile(self, s: str) -> Template:
|
143
|
+
def _compile(self, s: Optional[str]) -> Template:
|
145
144
|
"""
|
146
145
|
We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
|
147
146
|
"""
|
148
|
-
return
|
147
|
+
return self._environment.from_string(s) # type: ignore [arg-type] # Expected `str | Template` but passed `str | None`
|
@@ -39,7 +39,6 @@ from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import
|
|
39
39
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
40
40
|
ModelToComponentFactory,
|
41
41
|
)
|
42
|
-
from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING
|
43
42
|
from airbyte_cdk.sources.message import MessageRepository
|
44
43
|
from airbyte_cdk.sources.streams.core import Stream
|
45
44
|
from airbyte_cdk.sources.types import ConnectionDefinition
|
@@ -121,10 +120,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
121
120
|
self._emit_manifest_debug_message(
|
122
121
|
extra_args={"source_name": self.name, "parsed_config": json.dumps(self._source_config)}
|
123
122
|
)
|
124
|
-
|
125
|
-
stream_configs = self._stream_configs(self._source_config) + self._dynamic_stream_configs(
|
126
|
-
self._source_config, config
|
127
|
-
)
|
123
|
+
stream_configs = self._stream_configs(self._source_config)
|
128
124
|
|
129
125
|
source_streams = [
|
130
126
|
self._constructor.create_component(
|
@@ -238,8 +234,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
238
234
|
)
|
239
235
|
|
240
236
|
streams = self._source_config.get("streams")
|
241
|
-
|
242
|
-
if not (streams or dynamic_streams):
|
237
|
+
if not streams:
|
243
238
|
raise ValidationError(
|
244
239
|
f"A valid manifest should have at least one stream defined. Got {streams}"
|
245
240
|
)
|
@@ -308,51 +303,5 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
308
303
|
s["type"] = "DeclarativeStream"
|
309
304
|
return stream_configs
|
310
305
|
|
311
|
-
def _dynamic_stream_configs(
|
312
|
-
self, manifest: Mapping[str, Any], config: Mapping[str, Any]
|
313
|
-
) -> List[Dict[str, Any]]:
|
314
|
-
dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
|
315
|
-
dynamic_stream_configs: List[Dict[str, Any]] = []
|
316
|
-
|
317
|
-
for dynamic_definition in dynamic_stream_definitions:
|
318
|
-
components_resolver_config = dynamic_definition["components_resolver"]
|
319
|
-
|
320
|
-
if not components_resolver_config:
|
321
|
-
raise ValueError(
|
322
|
-
f"Missing 'components_resolver' in dynamic definition: {dynamic_definition}"
|
323
|
-
)
|
324
|
-
|
325
|
-
resolver_type = components_resolver_config.get("type")
|
326
|
-
if not resolver_type:
|
327
|
-
raise ValueError(
|
328
|
-
f"Missing 'type' in components resolver configuration: {components_resolver_config}"
|
329
|
-
)
|
330
|
-
|
331
|
-
if resolver_type not in COMPONENTS_RESOLVER_TYPE_MAPPING:
|
332
|
-
raise ValueError(
|
333
|
-
f"Invalid components resolver type '{resolver_type}'. "
|
334
|
-
f"Expected one of {list(COMPONENTS_RESOLVER_TYPE_MAPPING.keys())}."
|
335
|
-
)
|
336
|
-
|
337
|
-
if "retriever" in components_resolver_config:
|
338
|
-
components_resolver_config["retriever"]["requester"]["use_cache"] = True
|
339
|
-
|
340
|
-
# Create a resolver for dynamic components based on type
|
341
|
-
components_resolver = self._constructor.create_component(
|
342
|
-
COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type], components_resolver_config, config
|
343
|
-
)
|
344
|
-
|
345
|
-
stream_template_config = dynamic_definition["stream_template"]
|
346
|
-
|
347
|
-
for dynamic_stream in components_resolver.resolve_components(
|
348
|
-
stream_template_config=stream_template_config
|
349
|
-
):
|
350
|
-
if "type" not in dynamic_stream:
|
351
|
-
dynamic_stream["type"] = "DeclarativeStream"
|
352
|
-
|
353
|
-
dynamic_stream_configs.append(dynamic_stream)
|
354
|
-
|
355
|
-
return dynamic_stream_configs
|
356
|
-
|
357
306
|
def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
|
358
307
|
self.logger.debug("declarative source created from manifest", extra=extra_args)
|
@@ -1158,37 +1158,6 @@ class WaitUntilTimeFromHeader(BaseModel):
|
|
1158
1158
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1159
1159
|
|
1160
1160
|
|
1161
|
-
class ComponentMappingDefinition(BaseModel):
|
1162
|
-
type: Literal["ComponentMappingDefinition"]
|
1163
|
-
field_path: List[str] = Field(
|
1164
|
-
...,
|
1165
|
-
description="A list of potentially nested fields indicating the full path where value will be added or updated.",
|
1166
|
-
examples=[
|
1167
|
-
["data"],
|
1168
|
-
["data", "records"],
|
1169
|
-
["data", "{{ parameters.name }}"],
|
1170
|
-
["data", "*", "record"],
|
1171
|
-
],
|
1172
|
-
title="Field Path",
|
1173
|
-
)
|
1174
|
-
value: str = Field(
|
1175
|
-
...,
|
1176
|
-
description="The dynamic or static value to assign to the key. Interpolated values can be used to dynamically determine the value during runtime.",
|
1177
|
-
examples=[
|
1178
|
-
"{{ components_values['updates'] }}",
|
1179
|
-
"{{ components_values['MetaData']['LastUpdatedTime'] }}",
|
1180
|
-
"{{ config['segment_id'] }}",
|
1181
|
-
],
|
1182
|
-
title="Value",
|
1183
|
-
)
|
1184
|
-
value_type: Optional[ValueType] = Field(
|
1185
|
-
None,
|
1186
|
-
description="The expected data type of the value. If omitted, the type will be inferred from the value provided.",
|
1187
|
-
title="Value Type",
|
1188
|
-
)
|
1189
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1190
|
-
|
1191
|
-
|
1192
1161
|
class AddedFieldDefinition(BaseModel):
|
1193
1162
|
type: Literal["AddedFieldDefinition"]
|
1194
1163
|
path: List[str] = Field(
|
@@ -1486,40 +1455,13 @@ class CompositeErrorHandler(BaseModel):
|
|
1486
1455
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1487
1456
|
|
1488
1457
|
|
1489
|
-
class
|
1458
|
+
class DeclarativeSource(BaseModel):
|
1490
1459
|
class Config:
|
1491
1460
|
extra = Extra.forbid
|
1492
1461
|
|
1493
1462
|
type: Literal["DeclarativeSource"]
|
1494
1463
|
check: CheckStream
|
1495
1464
|
streams: List[DeclarativeStream]
|
1496
|
-
dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
|
1497
|
-
version: str = Field(
|
1498
|
-
...,
|
1499
|
-
description="The version of the Airbyte CDK used to build and test the source.",
|
1500
|
-
)
|
1501
|
-
schemas: Optional[Schemas] = None
|
1502
|
-
definitions: Optional[Dict[str, Any]] = None
|
1503
|
-
spec: Optional[Spec] = None
|
1504
|
-
concurrency_level: Optional[ConcurrencyLevel] = None
|
1505
|
-
metadata: Optional[Dict[str, Any]] = Field(
|
1506
|
-
None,
|
1507
|
-
description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
|
1508
|
-
)
|
1509
|
-
description: Optional[str] = Field(
|
1510
|
-
None,
|
1511
|
-
description="A description of the connector. It will be presented on the Source documentation page.",
|
1512
|
-
)
|
1513
|
-
|
1514
|
-
|
1515
|
-
class DeclarativeSource2(BaseModel):
|
1516
|
-
class Config:
|
1517
|
-
extra = Extra.forbid
|
1518
|
-
|
1519
|
-
type: Literal["DeclarativeSource"]
|
1520
|
-
check: CheckStream
|
1521
|
-
streams: Optional[List[DeclarativeStream]] = None
|
1522
|
-
dynamic_streams: List[DynamicDeclarativeStream]
|
1523
1465
|
version: str = Field(
|
1524
1466
|
...,
|
1525
1467
|
description="The version of the Airbyte CDK used to build and test the source.",
|
@@ -1538,17 +1480,6 @@ class DeclarativeSource2(BaseModel):
|
|
1538
1480
|
)
|
1539
1481
|
|
1540
1482
|
|
1541
|
-
class DeclarativeSource(BaseModel):
|
1542
|
-
class Config:
|
1543
|
-
extra = Extra.forbid
|
1544
|
-
|
1545
|
-
__root__: Union[DeclarativeSource1, DeclarativeSource2] = Field(
|
1546
|
-
...,
|
1547
|
-
description="An API source that extracts data according to its declarative components.",
|
1548
|
-
title="DeclarativeSource",
|
1549
|
-
)
|
1550
|
-
|
1551
|
-
|
1552
1483
|
class SelectiveAuthenticator(BaseModel):
|
1553
1484
|
class Config:
|
1554
1485
|
extra = Extra.allow
|
@@ -1952,32 +1883,8 @@ class SubstreamPartitionRouter(BaseModel):
|
|
1952
1883
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1953
1884
|
|
1954
1885
|
|
1955
|
-
class HttpComponentsResolver(BaseModel):
|
1956
|
-
type: Literal["HttpComponentsResolver"]
|
1957
|
-
retriever: Union[AsyncRetriever, CustomRetriever, SimpleRetriever] = Field(
|
1958
|
-
...,
|
1959
|
-
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
1960
|
-
title="Retriever",
|
1961
|
-
)
|
1962
|
-
components_mapping: List[ComponentMappingDefinition]
|
1963
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1964
|
-
|
1965
|
-
|
1966
|
-
class DynamicDeclarativeStream(BaseModel):
|
1967
|
-
type: Literal["DynamicDeclarativeStream"]
|
1968
|
-
stream_template: DeclarativeStream = Field(
|
1969
|
-
..., description="Reference to the stream template.", title="Stream Template"
|
1970
|
-
)
|
1971
|
-
components_resolver: HttpComponentsResolver = Field(
|
1972
|
-
...,
|
1973
|
-
description="Component resolve and populates stream templates with components values.",
|
1974
|
-
title="Components Resolver",
|
1975
|
-
)
|
1976
|
-
|
1977
|
-
|
1978
1886
|
CompositeErrorHandler.update_forward_refs()
|
1979
|
-
|
1980
|
-
DeclarativeSource2.update_forward_refs()
|
1887
|
+
DeclarativeSource.update_forward_refs()
|
1981
1888
|
SelectiveAuthenticator.update_forward_refs()
|
1982
1889
|
DeclarativeStream.update_forward_refs()
|
1983
1890
|
SessionTokenAuthenticator.update_forward_refs()
|
@@ -31,12 +31,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
31
31
|
# DeclarativeStream
|
32
32
|
"DeclarativeStream.retriever": "SimpleRetriever",
|
33
33
|
"DeclarativeStream.schema_loader": "JsonFileSchemaLoader",
|
34
|
-
# DynamicDeclarativeStream
|
35
|
-
"DynamicDeclarativeStream.stream_template": "DeclarativeStream",
|
36
|
-
"DynamicDeclarativeStream.components_resolver": "HttpComponentsResolver",
|
37
|
-
# HttpComponentsResolver
|
38
|
-
"HttpComponentsResolver.retriever": "SimpleRetriever",
|
39
|
-
"HttpComponentsResolver.components_mapping": "ComponentMappingDefinition",
|
40
34
|
# DefaultErrorHandler
|
41
35
|
"DefaultErrorHandler.response_filters": "HttpResponseFilter",
|
42
36
|
# DefaultPaginator
|