airbyte-cdk 6.62.0.dev2__py3-none-any.whl → 6.62.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +2 -2
- airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
- airbyte_cdk/connector_builder/main.py +2 -2
- airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
- airbyte_cdk/{sources → legacy/sources}/declarative/declarative_stream.py +2 -2
- airbyte_cdk/legacy/sources/declarative/incremental/__init__.py +30 -0
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/datetime_based_cursor.py +1 -1
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/global_substream_cursor.py +4 -2
- airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +1 -1
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/per_partition_with_global.py +8 -6
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/resumable_full_refresh_cursor.py +1 -1
- airbyte_cdk/manifest_server/Dockerfile +2 -2
- airbyte_cdk/manifest_server/README.md +22 -0
- airbyte_cdk/manifest_server/api_models/__init__.py +2 -0
- airbyte_cdk/manifest_server/api_models/manifest.py +12 -0
- airbyte_cdk/manifest_server/api_models/stream.py +2 -2
- airbyte_cdk/manifest_server/app.py +6 -0
- airbyte_cdk/manifest_server/cli/_common.py +1 -0
- airbyte_cdk/manifest_server/command_processor/processor.py +2 -5
- airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
- airbyte_cdk/manifest_server/helpers/__init__.py +0 -0
- airbyte_cdk/manifest_server/helpers/tracing.py +36 -0
- airbyte_cdk/manifest_server/routers/manifest.py +38 -2
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +6 -3
- airbyte_cdk/sources/declarative/checks/check_stream.py +6 -3
- airbyte_cdk/sources/declarative/checks/connection_checker.py +5 -2
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +38 -451
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -27
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -24
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +58 -5
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1 -21
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -5
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +35 -23
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +21 -7
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +1 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +0 -4
- airbyte_cdk/sources/utils/schema_helpers.py +29 -9
- airbyte_cdk/sources/utils/transform.py +25 -13
- airbyte_cdk/utils/spec_schema_transformations.py +7 -5
- {airbyte_cdk-6.62.0.dev2.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/METADATA +4 -2
- {airbyte_cdk-6.62.0.dev2.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/RECORD +51 -49
- /airbyte_cdk/{sources → legacy/sources}/declarative/incremental/declarative_cursor.py +0 -0
- /airbyte_cdk/manifest_server/{auth.py → helpers/auth.py} +0 -0
- {airbyte_cdk-6.62.0.dev2.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.62.0.dev2.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.62.0.dev2.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.62.0.dev2.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/entry_points.txt +0 -0
@@ -593,32 +593,6 @@ definitions:
|
|
593
593
|
$parameters:
|
594
594
|
type: object
|
595
595
|
additionalProperties: true
|
596
|
-
CustomIncrementalSync:
|
597
|
-
title: Custom Incremental Sync
|
598
|
-
description: Incremental component whose behavior is derived from a custom code implementation of the connector.
|
599
|
-
type: object
|
600
|
-
additionalProperties: true
|
601
|
-
required:
|
602
|
-
- type
|
603
|
-
- class_name
|
604
|
-
- cursor_field
|
605
|
-
properties:
|
606
|
-
type:
|
607
|
-
type: string
|
608
|
-
enum: [CustomIncrementalSync]
|
609
|
-
class_name:
|
610
|
-
title: Class Name
|
611
|
-
description: Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_<name>.<package>.<class_name>`.
|
612
|
-
type: string
|
613
|
-
additionalProperties: true
|
614
|
-
examples:
|
615
|
-
- "source_railz.components.MyCustomIncrementalSync"
|
616
|
-
cursor_field:
|
617
|
-
description: The location of the value on a record that will be used as a bookmark during sync.
|
618
|
-
type: string
|
619
|
-
$parameters:
|
620
|
-
type: object
|
621
|
-
additionalProperties: true
|
622
596
|
CustomPaginationStrategy:
|
623
597
|
title: Custom Pagination Strategy
|
624
598
|
description: Pagination strategy component whose behavior is derived from a custom code implementation of the connector.
|
@@ -1551,7 +1525,6 @@ definitions:
|
|
1551
1525
|
anyOf:
|
1552
1526
|
- "$ref": "#/definitions/DatetimeBasedCursor"
|
1553
1527
|
- "$ref": "#/definitions/IncrementingCountCursor"
|
1554
|
-
- "$ref": "#/definitions/CustomIncrementalSync"
|
1555
1528
|
primary_key:
|
1556
1529
|
title: Primary Key
|
1557
1530
|
"$ref": "#/definitions/PrimaryKey"
|
@@ -2,36 +2,12 @@
|
|
2
2
|
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.legacy.sources.declarative.incremental.per_partition_cursor import (
|
6
|
-
CursorFactory,
|
7
|
-
PerPartitionCursor,
|
8
|
-
)
|
9
5
|
from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
|
10
6
|
ConcurrentCursorFactory,
|
11
7
|
ConcurrentPerPartitionCursor,
|
12
8
|
)
|
13
|
-
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
14
|
-
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
15
|
-
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
16
|
-
GlobalSubstreamCursor,
|
17
|
-
)
|
18
|
-
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
19
|
-
PerPartitionWithGlobalCursor,
|
20
|
-
)
|
21
|
-
from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor import (
|
22
|
-
ChildPartitionResumableFullRefreshCursor,
|
23
|
-
ResumableFullRefreshCursor,
|
24
|
-
)
|
25
9
|
|
26
10
|
__all__ = [
|
27
|
-
"CursorFactory",
|
28
11
|
"ConcurrentCursorFactory",
|
29
12
|
"ConcurrentPerPartitionCursor",
|
30
|
-
"DatetimeBasedCursor",
|
31
|
-
"DeclarativeCursor",
|
32
|
-
"GlobalSubstreamCursor",
|
33
|
-
"PerPartitionCursor",
|
34
|
-
"PerPartitionWithGlobalCursor",
|
35
|
-
"ResumableFullRefreshCursor",
|
36
|
-
"ChildPartitionResumableFullRefreshCursor",
|
37
13
|
]
|
@@ -9,7 +9,7 @@ import time
|
|
9
9
|
from collections import OrderedDict
|
10
10
|
from copy import deepcopy
|
11
11
|
from datetime import timedelta
|
12
|
-
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional
|
12
|
+
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, TypeVar
|
13
13
|
|
14
14
|
from airbyte_cdk.models import (
|
15
15
|
AirbyteStateBlob,
|
@@ -19,10 +19,6 @@ from airbyte_cdk.models import (
|
|
19
19
|
StreamDescriptor,
|
20
20
|
)
|
21
21
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
22
|
-
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
23
|
-
Timer,
|
24
|
-
iterate_with_last_flag_and_state,
|
25
|
-
)
|
26
22
|
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
27
23
|
from airbyte_cdk.sources.message import MessageRepository
|
28
24
|
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
@@ -38,6 +34,63 @@ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
|
38
34
|
logger = logging.getLogger("airbyte")
|
39
35
|
|
40
36
|
|
37
|
+
T = TypeVar("T")
|
38
|
+
|
39
|
+
|
40
|
+
def iterate_with_last_flag_and_state(
|
41
|
+
generator: Iterable[T], get_stream_state_func: Callable[[], Optional[Mapping[str, StreamState]]]
|
42
|
+
) -> Iterable[tuple[T, bool, Any]]:
|
43
|
+
"""
|
44
|
+
Iterates over the given generator, yielding tuples containing the element, a flag
|
45
|
+
indicating whether it's the last element in the generator, and the result of
|
46
|
+
`get_stream_state_func` applied to the element.
|
47
|
+
|
48
|
+
Args:
|
49
|
+
generator: The iterable to iterate over.
|
50
|
+
get_stream_state_func: A function that takes an element from the generator and
|
51
|
+
returns its state.
|
52
|
+
|
53
|
+
Returns:
|
54
|
+
An iterator that yields tuples of the form (element, is_last, state).
|
55
|
+
"""
|
56
|
+
|
57
|
+
iterator = iter(generator)
|
58
|
+
|
59
|
+
try:
|
60
|
+
current = next(iterator)
|
61
|
+
state = get_stream_state_func()
|
62
|
+
except StopIteration:
|
63
|
+
return # Return an empty iterator
|
64
|
+
|
65
|
+
for next_item in iterator:
|
66
|
+
yield current, False, state
|
67
|
+
current = next_item
|
68
|
+
state = get_stream_state_func()
|
69
|
+
|
70
|
+
yield current, True, state
|
71
|
+
|
72
|
+
|
73
|
+
class Timer:
|
74
|
+
"""
|
75
|
+
A simple timer class that measures elapsed time in seconds using a high-resolution performance counter.
|
76
|
+
"""
|
77
|
+
|
78
|
+
def __init__(self) -> None:
|
79
|
+
self._start: Optional[int] = None
|
80
|
+
|
81
|
+
def start(self) -> None:
|
82
|
+
self._start = time.perf_counter_ns()
|
83
|
+
|
84
|
+
def finish(self) -> int:
|
85
|
+
if self._start:
|
86
|
+
return ((time.perf_counter_ns() - self._start) / 1e9).__ceil__()
|
87
|
+
else:
|
88
|
+
raise RuntimeError("Global substream cursor timer not started")
|
89
|
+
|
90
|
+
def is_running(self) -> bool:
|
91
|
+
return self._start is not None
|
92
|
+
|
93
|
+
|
41
94
|
class ConcurrentCursorFactory:
|
42
95
|
def __init__(self, create_function: Callable[..., ConcurrentCursor]):
|
43
96
|
self._create_function = create_function
|
@@ -5,7 +5,6 @@ from typing import Any, Mapping
|
|
5
5
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
6
6
|
from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
|
7
7
|
from airbyte_cdk.sources.declarative.models import (
|
8
|
-
CustomIncrementalSync,
|
9
8
|
DatetimeBasedCursor,
|
10
9
|
SubstreamPartitionRouter,
|
11
10
|
)
|
@@ -36,7 +35,7 @@ class LegacyToPerPartitionStateMigration(StateMigration):
|
|
36
35
|
def __init__(
|
37
36
|
self,
|
38
37
|
partition_router: SubstreamPartitionRouter,
|
39
|
-
cursor:
|
38
|
+
cursor: DatetimeBasedCursor,
|
40
39
|
config: Mapping[str, Any],
|
41
40
|
parameters: Mapping[str, Any],
|
42
41
|
):
|
@@ -174,24 +174,6 @@ class CustomErrorHandler(BaseModel):
|
|
174
174
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
175
175
|
|
176
176
|
|
177
|
-
class CustomIncrementalSync(BaseModel):
|
178
|
-
class Config:
|
179
|
-
extra = Extra.allow
|
180
|
-
|
181
|
-
type: Literal["CustomIncrementalSync"]
|
182
|
-
class_name: str = Field(
|
183
|
-
...,
|
184
|
-
description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_<name>.<package>.<class_name>`.",
|
185
|
-
examples=["source_railz.components.MyCustomIncrementalSync"],
|
186
|
-
title="Class Name",
|
187
|
-
)
|
188
|
-
cursor_field: str = Field(
|
189
|
-
...,
|
190
|
-
description="The location of the value on a record that will be used as a bookmark during sync.",
|
191
|
-
)
|
192
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
193
|
-
|
194
|
-
|
195
177
|
class CustomPaginationStrategy(BaseModel):
|
196
178
|
class Config:
|
197
179
|
extra = Extra.allow
|
@@ -2432,9 +2414,7 @@ class DeclarativeStream(BaseModel):
|
|
2432
2414
|
description="Component used to coordinate how records are extracted across stream slices and request pages.",
|
2433
2415
|
title="Retriever",
|
2434
2416
|
)
|
2435
|
-
incremental_sync: Optional[
|
2436
|
-
Union[DatetimeBasedCursor, IncrementingCountCursor, CustomIncrementalSync]
|
2437
|
-
] = Field(
|
2417
|
+
incremental_sync: Optional[Union[DatetimeBasedCursor, IncrementingCountCursor]] = Field(
|
2438
2418
|
None,
|
2439
2419
|
description="Component used to fetch data incrementally based on a time field in the data.",
|
2440
2420
|
title="Incremental Sync",
|
@@ -19,11 +19,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
|
|
19
19
|
"DatetimeBasedCursor.end_time_option": "RequestOption",
|
20
20
|
"DatetimeBasedCursor.start_datetime": "MinMaxDatetime",
|
21
21
|
"DatetimeBasedCursor.start_time_option": "RequestOption",
|
22
|
-
# CustomIncrementalSync
|
23
|
-
"CustomIncrementalSync.end_datetime": "MinMaxDatetime",
|
24
|
-
"CustomIncrementalSync.end_time_option": "RequestOption",
|
25
|
-
"CustomIncrementalSync.start_datetime": "MinMaxDatetime",
|
26
|
-
"CustomIncrementalSync.start_time_option": "RequestOption",
|
27
22
|
# DeclarativeSource
|
28
23
|
"DeclarativeSource.check": "CheckStream",
|
29
24
|
"DeclarativeSource.spec": "Spec",
|
@@ -33,6 +33,10 @@ from requests import Response
|
|
33
33
|
from airbyte_cdk.connector_builder.models import (
|
34
34
|
LogMessage as ConnectorBuilderLogMessage,
|
35
35
|
)
|
36
|
+
from airbyte_cdk.legacy.sources.declarative.declarative_stream import DeclarativeStream
|
37
|
+
from airbyte_cdk.legacy.sources.declarative.incremental import (
|
38
|
+
DatetimeBasedCursor,
|
39
|
+
)
|
36
40
|
from airbyte_cdk.models import (
|
37
41
|
AirbyteStateBlob,
|
38
42
|
AirbyteStateMessage,
|
@@ -75,7 +79,6 @@ from airbyte_cdk.sources.declarative.checks import (
|
|
75
79
|
)
|
76
80
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
77
81
|
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
78
|
-
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
79
82
|
from airbyte_cdk.sources.declarative.decoders import (
|
80
83
|
Decoder,
|
81
84
|
IterableDecoder,
|
@@ -105,10 +108,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
105
108
|
from airbyte_cdk.sources.declarative.incremental import (
|
106
109
|
ConcurrentCursorFactory,
|
107
110
|
ConcurrentPerPartitionCursor,
|
108
|
-
CursorFactory,
|
109
|
-
DatetimeBasedCursor,
|
110
|
-
GlobalSubstreamCursor,
|
111
|
-
PerPartitionWithGlobalCursor,
|
112
111
|
)
|
113
112
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
114
113
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
@@ -200,9 +199,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
200
199
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
201
200
|
CustomErrorHandler as CustomErrorHandlerModel,
|
202
201
|
)
|
203
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
204
|
-
CustomIncrementalSync as CustomIncrementalSyncModel,
|
205
|
-
)
|
206
202
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
207
203
|
CustomPaginationStrategy as CustomPaginationStrategyModel,
|
208
204
|
)
|
@@ -701,7 +697,6 @@ class ModelToComponentFactory:
|
|
701
697
|
CustomBackoffStrategyModel: self.create_custom_component,
|
702
698
|
CustomDecoderModel: self.create_custom_component,
|
703
699
|
CustomErrorHandlerModel: self.create_custom_component,
|
704
|
-
CustomIncrementalSyncModel: self.create_custom_component,
|
705
700
|
CustomRecordExtractorModel: self.create_custom_component,
|
706
701
|
CustomRecordFilterModel: self.create_custom_component,
|
707
702
|
CustomRequesterModel: self.create_custom_component,
|
@@ -752,7 +747,7 @@ class ModelToComponentFactory:
|
|
752
747
|
OAuthAuthenticatorModel: self.create_oauth_authenticator,
|
753
748
|
OffsetIncrementModel: self.create_offset_increment,
|
754
749
|
PageIncrementModel: self.create_page_increment,
|
755
|
-
ParentStreamConfigModel: self.
|
750
|
+
ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper,
|
756
751
|
PredicateValidatorModel: self.create_predicate_validator,
|
757
752
|
PropertiesFromEndpointModel: self.create_properties_from_endpoint,
|
758
753
|
PropertyChunkingModel: self.create_property_chunking,
|
@@ -1748,7 +1743,11 @@ class ModelToComponentFactory:
|
|
1748
1743
|
|
1749
1744
|
if self._is_component(model_value):
|
1750
1745
|
model_args[model_field] = self._create_nested_component(
|
1751
|
-
model,
|
1746
|
+
model,
|
1747
|
+
model_field,
|
1748
|
+
model_value,
|
1749
|
+
config,
|
1750
|
+
**kwargs,
|
1752
1751
|
)
|
1753
1752
|
elif isinstance(model_value, list):
|
1754
1753
|
vals = []
|
@@ -1760,7 +1759,15 @@ class ModelToComponentFactory:
|
|
1760
1759
|
if derived_type:
|
1761
1760
|
v["type"] = derived_type
|
1762
1761
|
if self._is_component(v):
|
1763
|
-
vals.append(
|
1762
|
+
vals.append(
|
1763
|
+
self._create_nested_component(
|
1764
|
+
model,
|
1765
|
+
model_field,
|
1766
|
+
v,
|
1767
|
+
config,
|
1768
|
+
**kwargs,
|
1769
|
+
)
|
1770
|
+
)
|
1764
1771
|
else:
|
1765
1772
|
vals.append(v)
|
1766
1773
|
model_args[model_field] = vals
|
@@ -1965,7 +1972,7 @@ class ModelToComponentFactory:
|
|
1965
1972
|
|
1966
1973
|
def create_default_stream(
|
1967
1974
|
self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
|
1968
|
-
) ->
|
1975
|
+
) -> AbstractStream:
|
1969
1976
|
primary_key = model.primary_key.__root__ if model.primary_key else None
|
1970
1977
|
|
1971
1978
|
partition_router = self._build_stream_slicer_from_partition_router(
|
@@ -2062,6 +2069,7 @@ class ModelToComponentFactory:
|
|
2062
2069
|
primary_key=primary_key,
|
2063
2070
|
request_options_provider=request_options_provider,
|
2064
2071
|
stream_slicer=stream_slicer,
|
2072
|
+
partition_router=partition_router,
|
2065
2073
|
stop_condition_cursor=concurrent_cursor
|
2066
2074
|
if self._is_stop_condition_on_cursor(model)
|
2067
2075
|
else None,
|
@@ -2524,7 +2532,9 @@ class ModelToComponentFactory:
|
|
2524
2532
|
config=config,
|
2525
2533
|
name=name,
|
2526
2534
|
primary_key=None,
|
2527
|
-
|
2535
|
+
partition_router=self._build_stream_slicer_from_partition_router(
|
2536
|
+
model.retriever, config
|
2537
|
+
),
|
2528
2538
|
transformations=[],
|
2529
2539
|
use_cache=True,
|
2530
2540
|
log_formatter=(
|
@@ -2603,6 +2613,8 @@ class ModelToComponentFactory:
|
|
2603
2613
|
fallback_parser=gzip_parser.inner_parser,
|
2604
2614
|
)
|
2605
2615
|
|
2616
|
+
# todo: This method should be removed once we deprecate the SimpleRetriever.cursor field and the various
|
2617
|
+
# state methods
|
2606
2618
|
@staticmethod
|
2607
2619
|
def create_incrementing_count_cursor(
|
2608
2620
|
model: IncrementingCountCursorModel, config: Config, **kwargs: Any
|
@@ -3133,12 +3145,11 @@ class ModelToComponentFactory:
|
|
3133
3145
|
transformations: List[RecordTransformation],
|
3134
3146
|
file_uploader: Optional[DefaultFileUploader] = None,
|
3135
3147
|
incremental_sync: Optional[
|
3136
|
-
Union[
|
3137
|
-
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
3138
|
-
]
|
3148
|
+
Union[IncrementingCountCursorModel, DatetimeBasedCursorModel]
|
3139
3149
|
] = None,
|
3140
3150
|
use_cache: Optional[bool] = None,
|
3141
3151
|
log_formatter: Optional[Callable[[Response], Any]] = None,
|
3152
|
+
partition_router: Optional[PartitionRouter] = None,
|
3142
3153
|
**kwargs: Any,
|
3143
3154
|
) -> SimpleRetriever:
|
3144
3155
|
def _get_url(req: Requester) -> str:
|
@@ -3236,6 +3247,10 @@ class ModelToComponentFactory:
|
|
3236
3247
|
|
3237
3248
|
if not request_options_provider:
|
3238
3249
|
request_options_provider = DefaultRequestOptionsProvider(parameters={})
|
3250
|
+
if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance(
|
3251
|
+
partition_router, PartitionRouter
|
3252
|
+
):
|
3253
|
+
request_options_provider = partition_router
|
3239
3254
|
|
3240
3255
|
paginator = (
|
3241
3256
|
self._create_component_from_model(
|
@@ -3684,7 +3699,7 @@ class ModelToComponentFactory:
|
|
3684
3699
|
if model.parent_stream_configs:
|
3685
3700
|
parent_stream_configs.extend(
|
3686
3701
|
[
|
3687
|
-
self.
|
3702
|
+
self.create_parent_stream_config_with_substream_wrapper(
|
3688
3703
|
model=parent_stream_config, config=config, **kwargs
|
3689
3704
|
)
|
3690
3705
|
for parent_stream_config in model.parent_stream_configs
|
@@ -3697,13 +3712,11 @@ class ModelToComponentFactory:
|
|
3697
3712
|
config=config,
|
3698
3713
|
)
|
3699
3714
|
|
3700
|
-
def
|
3715
|
+
def create_parent_stream_config_with_substream_wrapper(
|
3701
3716
|
self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any
|
3702
3717
|
) -> Any:
|
3703
3718
|
# getting the parent state
|
3704
|
-
child_state = self._connector_state_manager.get_stream_state(
|
3705
|
-
stream_name, None
|
3706
|
-
)
|
3719
|
+
child_state = self._connector_state_manager.get_stream_state(stream_name, None)
|
3707
3720
|
|
3708
3721
|
# This flag will be used exclusively for StateDelegatingStream when a parent stream is created
|
3709
3722
|
has_parent_state = bool(
|
@@ -3771,7 +3784,6 @@ class ModelToComponentFactory:
|
|
3771
3784
|
incremental_sync_model: Union[
|
3772
3785
|
DatetimeBasedCursorModel,
|
3773
3786
|
IncrementingCountCursorModel,
|
3774
|
-
CustomIncrementalSyncModel,
|
3775
3787
|
] = (
|
3776
3788
|
model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream
|
3777
3789
|
if isinstance(model.stream, DeclarativeStreamModel)
|
@@ -1,4 +1,5 @@
|
|
1
|
-
# Copyright (c)
|
1
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
2
|
+
|
2
3
|
import logging
|
3
4
|
import uuid
|
4
5
|
from dataclasses import dataclass, field
|
@@ -8,9 +9,8 @@ from typing import Any, Dict, Iterable, Mapping, Optional
|
|
8
9
|
import requests
|
9
10
|
from requests import Response
|
10
11
|
|
11
|
-
from airbyte_cdk import AirbyteMessage
|
12
12
|
from airbyte_cdk.logger import lazy_log
|
13
|
-
from airbyte_cdk.models import FailureType, Type
|
13
|
+
from airbyte_cdk.models import AirbyteMessage, FailureType, Type
|
14
14
|
from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
|
15
15
|
from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
|
16
16
|
from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
|
@@ -23,10 +23,10 @@ from typing import (
|
|
23
23
|
import requests
|
24
24
|
from typing_extensions import deprecated
|
25
25
|
|
26
|
+
from airbyte_cdk.legacy.sources.declarative.incremental import ResumableFullRefreshCursor
|
27
|
+
from airbyte_cdk.legacy.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
26
28
|
from airbyte_cdk.models import AirbyteMessage
|
27
29
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
28
|
-
from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
|
29
|
-
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
30
30
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
31
31
|
from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
|
32
32
|
SinglePartitionRouter,
|
@@ -14,10 +14,21 @@ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import Stre
|
|
14
14
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
15
15
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
16
16
|
|
17
|
+
|
17
18
|
# For Connector Builder test read operations, we track the total number of records
|
18
|
-
# read for the stream
|
19
|
-
|
20
|
-
|
19
|
+
# read for the stream so that we can stop reading early if we exceed the record limit.
|
20
|
+
class RecordCounter:
|
21
|
+
def __init__(self) -> None:
|
22
|
+
self.total_record_counter = 0
|
23
|
+
|
24
|
+
def increment(self) -> None:
|
25
|
+
self.total_record_counter += 1
|
26
|
+
|
27
|
+
def reset(self) -> None:
|
28
|
+
self.total_record_counter = 0
|
29
|
+
|
30
|
+
def get_total_records(self) -> int:
|
31
|
+
return self.total_record_counter
|
21
32
|
|
22
33
|
|
23
34
|
class SchemaLoaderCachingDecorator(SchemaLoader):
|
@@ -51,6 +62,7 @@ class DeclarativePartitionFactory:
|
|
51
62
|
self._retriever = retriever
|
52
63
|
self._message_repository = message_repository
|
53
64
|
self._max_records_limit = max_records_limit
|
65
|
+
self._record_counter = RecordCounter()
|
54
66
|
|
55
67
|
def create(self, stream_slice: StreamSlice) -> Partition:
|
56
68
|
return DeclarativePartition(
|
@@ -60,6 +72,7 @@ class DeclarativePartitionFactory:
|
|
60
72
|
message_repository=self._message_repository,
|
61
73
|
max_records_limit=self._max_records_limit,
|
62
74
|
stream_slice=stream_slice,
|
75
|
+
record_counter=self._record_counter,
|
63
76
|
)
|
64
77
|
|
65
78
|
|
@@ -72,6 +85,7 @@ class DeclarativePartition(Partition):
|
|
72
85
|
message_repository: MessageRepository,
|
73
86
|
max_records_limit: Optional[int],
|
74
87
|
stream_slice: StreamSlice,
|
88
|
+
record_counter: RecordCounter,
|
75
89
|
):
|
76
90
|
self._stream_name = stream_name
|
77
91
|
self._schema_loader = schema_loader
|
@@ -80,17 +94,17 @@ class DeclarativePartition(Partition):
|
|
80
94
|
self._max_records_limit = max_records_limit
|
81
95
|
self._stream_slice = stream_slice
|
82
96
|
self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
|
97
|
+
self._record_counter = record_counter
|
83
98
|
|
84
99
|
def read(self) -> Iterable[Record]:
|
85
100
|
if self._max_records_limit is not None:
|
86
|
-
|
87
|
-
if total_record_counter >= self._max_records_limit:
|
101
|
+
if self._record_counter.get_total_records() >= self._max_records_limit:
|
88
102
|
return
|
89
103
|
for stream_data in self._retriever.read_records(
|
90
104
|
self._schema_loader.get_json_schema(), self._stream_slice
|
91
105
|
):
|
92
106
|
if self._max_records_limit is not None:
|
93
|
-
if
|
107
|
+
if self._record_counter.get_total_records() >= self._max_records_limit:
|
94
108
|
break
|
95
109
|
|
96
110
|
if isinstance(stream_data, Mapping):
|
@@ -108,7 +122,7 @@ class DeclarativePartition(Partition):
|
|
108
122
|
self._message_repository.emit_message(stream_data)
|
109
123
|
|
110
124
|
if self._max_records_limit is not None:
|
111
|
-
|
125
|
+
self._record_counter.increment()
|
112
126
|
|
113
127
|
def to_slice(self) -> Optional[Mapping[str, Any]]:
|
114
128
|
return self._stream_slice
|
@@ -5,7 +5,7 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
6
|
from typing import Any, Dict, Mapping, Optional
|
7
7
|
|
8
|
-
from airbyte_cdk import InterpolatedString
|
8
|
+
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
9
9
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
10
10
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
11
11
|
|
@@ -14,7 +14,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
|
14
14
|
from airbyte_cdk.sources.types import ConnectionDefinition
|
15
15
|
|
16
16
|
|
17
|
-
class YamlDeclarativeSource(ConcurrentDeclarativeSource
|
17
|
+
class YamlDeclarativeSource(ConcurrentDeclarativeSource):
|
18
18
|
"""Declarative source defined by a yaml file"""
|
19
19
|
|
20
20
|
def __init__(
|
@@ -14,10 +14,6 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
15
|
|
16
16
|
|
17
|
-
@deprecated(
|
18
|
-
"This class is experimental. Use at your own risk.",
|
19
|
-
category=ExperimentalClassWarning,
|
20
|
-
)
|
21
17
|
class AbstractStream(ABC):
|
22
18
|
"""
|
23
19
|
AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
|
@@ -7,12 +7,16 @@ import importlib
|
|
7
7
|
import json
|
8
8
|
import os
|
9
9
|
import pkgutil
|
10
|
-
from
|
10
|
+
from copy import deepcopy
|
11
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Mapping, MutableMapping, Tuple, cast
|
11
12
|
|
12
13
|
import jsonref
|
13
|
-
from jsonschema import
|
14
|
+
from jsonschema import validate
|
14
15
|
from jsonschema.exceptions import ValidationError
|
15
16
|
from pydantic.v1 import BaseModel, Field
|
17
|
+
from referencing import Registry, Resource
|
18
|
+
from referencing._core import Resolver # used for type hints
|
19
|
+
from referencing.jsonschema import DRAFT7
|
16
20
|
|
17
21
|
from airbyte_cdk.models import ConnectorSpecification, FailureType
|
18
22
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
@@ -63,18 +67,30 @@ def resolve_ref_links(obj: Any) -> Any:
|
|
63
67
|
return obj
|
64
68
|
|
65
69
|
|
66
|
-
def
|
70
|
+
def get_ref_resolver_registry(schema: dict[str, Any]) -> Registry:
|
71
|
+
"""Get a reference resolver registry for the given schema."""
|
72
|
+
resource: Resource = Resource.from_contents(
|
73
|
+
contents=schema,
|
74
|
+
default_specification=DRAFT7,
|
75
|
+
)
|
76
|
+
return cast( # Mypy has a hard time detecting this return type.
|
77
|
+
"Registry",
|
78
|
+
Registry().with_resource(
|
79
|
+
uri="",
|
80
|
+
resource=resource,
|
81
|
+
),
|
82
|
+
)
|
83
|
+
|
84
|
+
|
85
|
+
def _expand_refs(schema: Any, ref_resolver: Resolver) -> None:
|
67
86
|
"""Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.
|
68
87
|
|
69
88
|
:param schema: schema that will be patched
|
70
|
-
:param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
|
71
89
|
"""
|
72
|
-
ref_resolver = ref_resolver or RefResolver.from_schema(schema)
|
73
|
-
|
74
90
|
if isinstance(schema, MutableMapping):
|
75
91
|
if "$ref" in schema:
|
76
92
|
ref_url = schema.pop("$ref")
|
77
|
-
|
93
|
+
definition = ref_resolver.lookup(ref_url).contents
|
78
94
|
_expand_refs(
|
79
95
|
definition, ref_resolver=ref_resolver
|
80
96
|
) # expand refs in definitions as well
|
@@ -90,10 +106,14 @@ def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> Non
|
|
90
106
|
def expand_refs(schema: Any) -> None:
|
91
107
|
"""Iterate over schema and replace all occurrences of $ref with their definitions.
|
92
108
|
|
109
|
+
If a "definitions" section is present at the root of the schema, it will be removed
|
110
|
+
after $ref resolution is complete.
|
111
|
+
|
93
112
|
:param schema: schema that will be patched
|
94
113
|
"""
|
95
|
-
|
96
|
-
schema
|
114
|
+
ref_resolver = get_ref_resolver_registry(schema).resolver()
|
115
|
+
_expand_refs(schema, ref_resolver)
|
116
|
+
schema.pop("definitions", None)
|
97
117
|
|
98
118
|
|
99
119
|
def rename_key(schema: Any, old_key: str, new_key: str) -> None:
|