airbyte-cdk 6.39.2__py3-none-any.whl → 6.40.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +66 -2
- airbyte_cdk/sources/declarative/declarative_stream.py +8 -1
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +24 -3
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +44 -3
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +205 -80
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +66 -12
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/retrievers/__init__.py +8 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +30 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +84 -2
- airbyte_cdk/sources/declarative/transformations/add_fields.py +10 -2
- airbyte_cdk/sources/declarative/transformations/dpath_flatten_fields.py +10 -4
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/RECORD +19 -19
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.39.2.dist-info → airbyte_cdk-6.40.0.dev0.dist-info}/entry_points.txt +0 -0
@@ -162,6 +162,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
162
162
|
else:
|
163
163
|
filtered_catalog = catalog
|
164
164
|
|
165
|
+
# It is no need run read for synchronous streams if they are not exists.
|
166
|
+
if not filtered_catalog.streams:
|
167
|
+
return
|
168
|
+
|
165
169
|
yield from super().read(logger, config, filtered_catalog, state)
|
166
170
|
|
167
171
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
@@ -201,6 +205,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
201
205
|
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
202
206
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
203
207
|
# so we need to treat them as synchronous
|
208
|
+
|
209
|
+
if name_to_stream_mapping[declarative_stream.name]["type"] == "StateDelegatingStream":
|
210
|
+
stream_state = self._connector_state_manager.get_stream_state(
|
211
|
+
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
212
|
+
)
|
213
|
+
|
214
|
+
name_to_stream_mapping[declarative_stream.name] = (
|
215
|
+
name_to_stream_mapping[declarative_stream.name]["incremental_stream"]
|
216
|
+
if stream_state
|
217
|
+
else name_to_stream_mapping[declarative_stream.name]["full_refresh_stream"]
|
218
|
+
)
|
219
|
+
|
204
220
|
if isinstance(declarative_stream, DeclarativeStream) and (
|
205
221
|
name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
206
222
|
== "SimpleRetriever"
|
@@ -24,7 +24,9 @@ properties:
|
|
24
24
|
streams:
|
25
25
|
type: array
|
26
26
|
items:
|
27
|
-
|
27
|
+
anyOf:
|
28
|
+
- "$ref": "#/definitions/DeclarativeStream"
|
29
|
+
- "$ref": "#/definitions/StateDelegatingStream"
|
28
30
|
dynamic_streams:
|
29
31
|
type: array
|
30
32
|
items:
|
@@ -114,6 +116,19 @@ definitions:
|
|
114
116
|
type: array
|
115
117
|
items:
|
116
118
|
"$ref": "#/definitions/AddedFieldDefinition"
|
119
|
+
condition:
|
120
|
+
description: Fields will be added if expression is evaluated to True.
|
121
|
+
type: string
|
122
|
+
default: ""
|
123
|
+
interpolation_context:
|
124
|
+
- config
|
125
|
+
- property
|
126
|
+
- parameters
|
127
|
+
examples:
|
128
|
+
- "{{ property|string == '' }}"
|
129
|
+
- "{{ property is integer }}"
|
130
|
+
- "{{ property|length > 5 }}"
|
131
|
+
- "{{ property == 'some_string_to_match' }}"
|
117
132
|
$parameters:
|
118
133
|
type: object
|
119
134
|
additionalProperties: true
|
@@ -2263,6 +2278,10 @@ definitions:
|
|
2263
2278
|
title: Delete Origin Value
|
2264
2279
|
description: Whether to delete the origin value or keep it. Default is False.
|
2265
2280
|
type: boolean
|
2281
|
+
replace_record:
|
2282
|
+
title: Replace Origin Record
|
2283
|
+
description: Whether to replace the origin record or not. Default is False.
|
2284
|
+
type: boolean
|
2266
2285
|
$parameters:
|
2267
2286
|
type: object
|
2268
2287
|
additionalProperties: true
|
@@ -2871,6 +2890,15 @@ definitions:
|
|
2871
2890
|
type:
|
2872
2891
|
type: string
|
2873
2892
|
enum: [ParentStreamConfig]
|
2893
|
+
lazy_read_pointer:
|
2894
|
+
title: Lazy Read Pointer
|
2895
|
+
description: If set, this will enable lazy reading, using the initial read of parent records to extract child records.
|
2896
|
+
type: array
|
2897
|
+
default: [ ]
|
2898
|
+
items:
|
2899
|
+
- type: string
|
2900
|
+
interpolation_context:
|
2901
|
+
- config
|
2874
2902
|
parent_key:
|
2875
2903
|
title: Parent Key
|
2876
2904
|
description: The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.
|
@@ -2881,7 +2909,9 @@ definitions:
|
|
2881
2909
|
stream:
|
2882
2910
|
title: Parent Stream
|
2883
2911
|
description: Reference to the parent stream.
|
2884
|
-
|
2912
|
+
anyOf:
|
2913
|
+
- "$ref": "#/definitions/DeclarativeStream"
|
2914
|
+
- "$ref": "#/definitions/StateDelegatingStream"
|
2885
2915
|
partition_field:
|
2886
2916
|
title: Current Parent Key Value Identifier
|
2887
2917
|
description: While iterating over parent records during a sync, the parent_key value can be referenced by using this field.
|
@@ -2987,6 +3017,10 @@ definitions:
|
|
2987
3017
|
- "$ref": "#/definitions/SchemaNormalization"
|
2988
3018
|
- "$ref": "#/definitions/CustomSchemaNormalization"
|
2989
3019
|
default: None
|
3020
|
+
transform_before_filtering:
|
3021
|
+
description: If true, transformation will be applied before record filtering.
|
3022
|
+
type: boolean
|
3023
|
+
default: false
|
2990
3024
|
$parameters:
|
2991
3025
|
type: object
|
2992
3026
|
additionalProperties: true
|
@@ -3154,6 +3188,36 @@ definitions:
|
|
3154
3188
|
$parameters:
|
3155
3189
|
type: object
|
3156
3190
|
additionalProperties: true
|
3191
|
+
StateDelegatingStream:
|
3192
|
+
description: (This component is experimental. Use at your own risk.) Orchestrate the retriever's usage based on the state value.
|
3193
|
+
type: object
|
3194
|
+
required:
|
3195
|
+
- type
|
3196
|
+
- name
|
3197
|
+
- full_refresh_stream
|
3198
|
+
- incremental_stream
|
3199
|
+
properties:
|
3200
|
+
type:
|
3201
|
+
type: string
|
3202
|
+
enum: [ StateDelegatingStream ]
|
3203
|
+
name:
|
3204
|
+
title: Name
|
3205
|
+
description: The stream name.
|
3206
|
+
type: string
|
3207
|
+
default: ""
|
3208
|
+
example:
|
3209
|
+
- "Users"
|
3210
|
+
full_refresh_stream:
|
3211
|
+
title: Retriever
|
3212
|
+
description: Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.
|
3213
|
+
"$ref": "#/definitions/DeclarativeStream"
|
3214
|
+
incremental_stream:
|
3215
|
+
title: Retriever
|
3216
|
+
description: Component used to coordinate how records are extracted across stream slices and request pages when the state provided.
|
3217
|
+
"$ref": "#/definitions/DeclarativeStream"
|
3218
|
+
$parameters:
|
3219
|
+
type: object
|
3220
|
+
additionalProperties: true
|
3157
3221
|
SimpleRetriever:
|
3158
3222
|
description: Retrieves records by synchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.
|
3159
3223
|
type: object
|
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
14
14
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
15
15
|
from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
|
16
16
|
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
|
17
|
+
from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
|
17
18
|
from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
|
18
19
|
from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader
|
19
20
|
from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
|
@@ -76,11 +77,17 @@ class DeclarativeStream(Stream):
|
|
76
77
|
|
77
78
|
@property
|
78
79
|
def exit_on_rate_limit(self) -> bool:
|
80
|
+
if isinstance(self.retriever, AsyncRetriever):
|
81
|
+
return self.retriever.exit_on_rate_limit
|
82
|
+
|
79
83
|
return self.retriever.requester.exit_on_rate_limit # type: ignore # abstract Retriever class has not requester attribute
|
80
84
|
|
81
85
|
@exit_on_rate_limit.setter
|
82
86
|
def exit_on_rate_limit(self, value: bool) -> None:
|
83
|
-
self.retriever
|
87
|
+
if isinstance(self.retriever, AsyncRetriever):
|
88
|
+
self.retriever.exit_on_rate_limit = value
|
89
|
+
else:
|
90
|
+
self.retriever.requester.exit_on_rate_limit = value # type: ignore[attr-defined]
|
84
91
|
|
85
92
|
@property # type: ignore
|
86
93
|
def name(self) -> str:
|
@@ -30,6 +30,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
30
30
|
DeclarativeStream as DeclarativeStreamModel,
|
31
31
|
)
|
32
32
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
|
33
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
34
|
+
StateDelegatingStream as StateDelegatingStreamModel,
|
35
|
+
)
|
33
36
|
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
34
37
|
get_registered_components_module,
|
35
38
|
)
|
@@ -146,7 +149,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
146
149
|
|
147
150
|
source_streams = [
|
148
151
|
self._constructor.create_component(
|
149
|
-
|
152
|
+
StateDelegatingStreamModel
|
153
|
+
if stream_config.get("type") == StateDelegatingStreamModel.__name__
|
154
|
+
else DeclarativeStreamModel,
|
150
155
|
stream_config,
|
151
156
|
config,
|
152
157
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
@@ -165,7 +170,15 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
165
170
|
def update_with_cache_parent_configs(parent_configs: list[dict[str, Any]]) -> None:
|
166
171
|
for parent_config in parent_configs:
|
167
172
|
parent_streams.add(parent_config["stream"]["name"])
|
168
|
-
parent_config["stream"]["
|
173
|
+
if parent_config["stream"]["type"] == "StateDelegatingStream":
|
174
|
+
parent_config["stream"]["full_refresh_stream"]["retriever"]["requester"][
|
175
|
+
"use_cache"
|
176
|
+
] = True
|
177
|
+
parent_config["stream"]["incremental_stream"]["retriever"]["requester"][
|
178
|
+
"use_cache"
|
179
|
+
] = True
|
180
|
+
else:
|
181
|
+
parent_config["stream"]["retriever"]["requester"]["use_cache"] = True
|
169
182
|
|
170
183
|
for stream_config in stream_configs:
|
171
184
|
if stream_config.get("incremental_sync", {}).get("parent_stream"):
|
@@ -188,7 +201,15 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
188
201
|
|
189
202
|
for stream_config in stream_configs:
|
190
203
|
if stream_config["name"] in parent_streams:
|
191
|
-
stream_config["
|
204
|
+
if stream_config["type"] == "StateDelegatingStream":
|
205
|
+
stream_config["full_refresh_stream"]["retriever"]["requester"]["use_cache"] = (
|
206
|
+
True
|
207
|
+
)
|
208
|
+
stream_config["incremental_stream"]["retriever"]["requester"]["use_cache"] = (
|
209
|
+
True
|
210
|
+
)
|
211
|
+
else:
|
212
|
+
stream_config["retriever"]["requester"]["use_cache"] = True
|
192
213
|
|
193
214
|
return stream_configs
|
194
215
|
|
@@ -877,6 +877,11 @@ class DpathFlattenFields(BaseModel):
|
|
877
877
|
description="Whether to delete the origin value or keep it. Default is False.",
|
878
878
|
title="Delete Origin Value",
|
879
879
|
)
|
880
|
+
replace_record: Optional[bool] = Field(
|
881
|
+
None,
|
882
|
+
description="Whether to replace the origin record or not. Default is False.",
|
883
|
+
title="Replace Origin Record",
|
884
|
+
)
|
880
885
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
881
886
|
|
882
887
|
|
@@ -1460,6 +1465,16 @@ class AddFields(BaseModel):
|
|
1460
1465
|
description="List of transformations (path and corresponding value) that will be added to the record.",
|
1461
1466
|
title="Fields",
|
1462
1467
|
)
|
1468
|
+
condition: Optional[str] = Field(
|
1469
|
+
"",
|
1470
|
+
description="Fields will be added if expression is evaluated to True.,",
|
1471
|
+
examples=[
|
1472
|
+
"{{ property|string == '' }}",
|
1473
|
+
"{{ property is integer }}",
|
1474
|
+
"{{ property|length > 5 }}",
|
1475
|
+
"{{ property == 'some_string_to_match' }}",
|
1476
|
+
],
|
1477
|
+
)
|
1463
1478
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1464
1479
|
|
1465
1480
|
|
@@ -1771,6 +1786,10 @@ class RecordSelector(BaseModel):
|
|
1771
1786
|
description="Responsible for normalization according to the schema.",
|
1772
1787
|
title="Schema Normalization",
|
1773
1788
|
)
|
1789
|
+
transform_before_filtering: Optional[bool] = Field(
|
1790
|
+
False,
|
1791
|
+
description="If true, transformation will be applied before record filtering.",
|
1792
|
+
)
|
1774
1793
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1775
1794
|
|
1776
1795
|
|
@@ -1860,7 +1879,7 @@ class DeclarativeSource1(BaseModel):
|
|
1860
1879
|
|
1861
1880
|
type: Literal["DeclarativeSource"]
|
1862
1881
|
check: Union[CheckStream, CheckDynamicStream]
|
1863
|
-
streams: List[DeclarativeStream]
|
1882
|
+
streams: List[Union[DeclarativeStream, StateDelegatingStream]]
|
1864
1883
|
dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
|
1865
1884
|
version: str = Field(
|
1866
1885
|
...,
|
@@ -1892,7 +1911,7 @@ class DeclarativeSource2(BaseModel):
|
|
1892
1911
|
|
1893
1912
|
type: Literal["DeclarativeSource"]
|
1894
1913
|
check: Union[CheckStream, CheckDynamicStream]
|
1895
|
-
streams: Optional[List[DeclarativeStream]] = None
|
1914
|
+
streams: Optional[List[Union[DeclarativeStream, StateDelegatingStream]]] = None
|
1896
1915
|
dynamic_streams: List[DynamicDeclarativeStream]
|
1897
1916
|
version: str = Field(
|
1898
1917
|
...,
|
@@ -2205,13 +2224,18 @@ class DynamicSchemaLoader(BaseModel):
|
|
2205
2224
|
|
2206
2225
|
class ParentStreamConfig(BaseModel):
|
2207
2226
|
type: Literal["ParentStreamConfig"]
|
2227
|
+
lazy_read_pointer: Optional[List[str]] = Field(
|
2228
|
+
[],
|
2229
|
+
description="If set, this will enable lazy reading, using the initial read of parent records to extract child records.",
|
2230
|
+
title="Lazy Read Pointer",
|
2231
|
+
)
|
2208
2232
|
parent_key: str = Field(
|
2209
2233
|
...,
|
2210
2234
|
description="The primary key of records from the parent stream that will be used during the retrieval of records for the current substream. This parent identifier field is typically a characteristic of the child records being extracted from the source API.",
|
2211
2235
|
examples=["id", "{{ config['parent_record_id'] }}"],
|
2212
2236
|
title="Parent Key",
|
2213
2237
|
)
|
2214
|
-
stream: DeclarativeStream = Field(
|
2238
|
+
stream: Union[DeclarativeStream, StateDelegatingStream] = Field(
|
2215
2239
|
..., description="Reference to the parent stream.", title="Parent Stream"
|
2216
2240
|
)
|
2217
2241
|
partition_field: str = Field(
|
@@ -2238,6 +2262,22 @@ class ParentStreamConfig(BaseModel):
|
|
2238
2262
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2239
2263
|
|
2240
2264
|
|
2265
|
+
class StateDelegatingStream(BaseModel):
|
2266
|
+
type: Literal["StateDelegatingStream"]
|
2267
|
+
name: str = Field(..., description="The stream name.", example=["Users"], title="Name")
|
2268
|
+
full_refresh_stream: DeclarativeStream = Field(
|
2269
|
+
...,
|
2270
|
+
description="Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.",
|
2271
|
+
title="Retriever",
|
2272
|
+
)
|
2273
|
+
incremental_stream: DeclarativeStream = Field(
|
2274
|
+
...,
|
2275
|
+
description="Component used to coordinate how records are extracted across stream slices and request pages when the state provided.",
|
2276
|
+
title="Retriever",
|
2277
|
+
)
|
2278
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2279
|
+
|
2280
|
+
|
2241
2281
|
class SimpleRetriever(BaseModel):
|
2242
2282
|
type: Literal["SimpleRetriever"]
|
2243
2283
|
record_selector: RecordSelector = Field(
|
@@ -2423,5 +2463,6 @@ SelectiveAuthenticator.update_forward_refs()
|
|
2423
2463
|
DeclarativeStream.update_forward_refs()
|
2424
2464
|
SessionTokenAuthenticator.update_forward_refs()
|
2425
2465
|
DynamicSchemaLoader.update_forward_refs()
|
2466
|
+
ParentStreamConfig.update_forward_refs()
|
2426
2467
|
SimpleRetriever.update_forward_refs()
|
2427
2468
|
AsyncRetriever.update_forward_refs()
|