airbyte-cdk 6.60.13__py3-none-any.whl → 6.60.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/test_reader/reader.py +5 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +22 -24
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +8 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +77 -54
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +25 -10
- airbyte_cdk/sources/streams/concurrent/adapters.py +2 -1
- airbyte_cdk/sources/streams/concurrent/default_stream.py +4 -6
- {airbyte_cdk-6.60.13.dist-info → airbyte_cdk-6.60.15.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.60.13.dist-info → airbyte_cdk-6.60.15.dist-info}/RECORD +13 -13
- {airbyte_cdk-6.60.13.dist-info → airbyte_cdk-6.60.15.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.60.13.dist-info → airbyte_cdk-6.60.15.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.60.13.dist-info → airbyte_cdk-6.60.15.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.60.13.dist-info → airbyte_cdk-6.60.15.dist-info}/entry_points.txt +0 -0
@@ -120,7 +120,11 @@ class TestReader:
|
|
120
120
|
deprecation_warnings: List[LogMessage] = source.deprecation_warnings()
|
121
121
|
|
122
122
|
schema_inferrer = SchemaInferrer(
|
123
|
-
self._pk_to_nested_and_composite_field(
|
123
|
+
self._pk_to_nested_and_composite_field(
|
124
|
+
stream.primary_key if hasattr(stream, "primary_key") else stream._primary_key # type: ignore # We are accessing the private property here as the primary key is not exposed. We should either expose it or use `as_airbyte_stream` to retrieve it as this is the "official" way where it is exposed in the Airbyte protocol
|
125
|
+
)
|
126
|
+
if stream
|
127
|
+
else None,
|
124
128
|
self._cursor_field_to_nested_and_composite_field(stream.cursor_field)
|
125
129
|
if stream
|
126
130
|
else None,
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -15,10 +15,6 @@ from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSo
|
|
15
15
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
16
16
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
17
17
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
18
|
-
from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
19
|
-
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
|
-
ClientSideIncrementalRecordFilterDecorator,
|
21
|
-
)
|
22
18
|
from airbyte_cdk.sources.declarative.incremental import (
|
23
19
|
ConcurrentPerPartitionCursor,
|
24
20
|
GlobalSubstreamCursor,
|
@@ -28,7 +24,6 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global impor
|
|
28
24
|
PerPartitionWithGlobalCursor,
|
29
25
|
)
|
30
26
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
31
|
-
from airbyte_cdk.sources.declarative.models import FileUploader
|
32
27
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
33
28
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
34
29
|
)
|
@@ -84,7 +79,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
84
79
|
# incremental streams running in full refresh.
|
85
80
|
component_factory = component_factory or ModelToComponentFactory(
|
86
81
|
emit_connector_builder_messages=emit_connector_builder_messages,
|
87
|
-
disable_resumable_full_refresh=True,
|
88
82
|
connector_state_manager=self._connector_state_manager,
|
89
83
|
max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
|
90
84
|
)
|
@@ -180,7 +174,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
180
174
|
]
|
181
175
|
)
|
182
176
|
|
183
|
-
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
177
|
+
def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
|
184
178
|
"""
|
185
179
|
The `streams` method is used as part of the AbstractSource in the following cases:
|
186
180
|
* ConcurrentDeclarativeSource.check -> ManifestDeclarativeSource.check -> AbstractSource.check -> DeclarativeSource.check_connection -> CheckStream.check_connection -> streams
|
@@ -210,6 +204,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
210
204
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
211
205
|
# so we need to treat them as synchronous
|
212
206
|
|
207
|
+
if isinstance(declarative_stream, AbstractStream):
|
208
|
+
concurrent_streams.append(declarative_stream)
|
209
|
+
continue
|
210
|
+
|
213
211
|
supports_file_transfer = (
|
214
212
|
isinstance(declarative_stream, DeclarativeStream)
|
215
213
|
and "file_uploader" in name_to_stream_mapping[declarative_stream.name]
|
@@ -278,10 +276,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
278
276
|
|
279
277
|
partition_generator = StreamSlicerPartitionGenerator(
|
280
278
|
partition_factory=DeclarativePartitionFactory(
|
281
|
-
declarative_stream.name,
|
282
|
-
declarative_stream.
|
283
|
-
retriever,
|
284
|
-
self.message_repository,
|
279
|
+
stream_name=declarative_stream.name,
|
280
|
+
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
281
|
+
retriever=retriever,
|
282
|
+
message_repository=self.message_repository,
|
285
283
|
),
|
286
284
|
stream_slicer=declarative_stream.retriever.stream_slicer,
|
287
285
|
)
|
@@ -309,10 +307,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
309
307
|
)
|
310
308
|
partition_generator = StreamSlicerPartitionGenerator(
|
311
309
|
partition_factory=DeclarativePartitionFactory(
|
312
|
-
declarative_stream.name,
|
313
|
-
declarative_stream.
|
314
|
-
retriever,
|
315
|
-
self.message_repository,
|
310
|
+
stream_name=declarative_stream.name,
|
311
|
+
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
312
|
+
retriever=retriever,
|
313
|
+
message_repository=self.message_repository,
|
316
314
|
),
|
317
315
|
stream_slicer=cursor,
|
318
316
|
)
|
@@ -339,10 +337,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
339
337
|
) and hasattr(declarative_stream.retriever, "stream_slicer"):
|
340
338
|
partition_generator = StreamSlicerPartitionGenerator(
|
341
339
|
DeclarativePartitionFactory(
|
342
|
-
declarative_stream.name,
|
343
|
-
declarative_stream.
|
344
|
-
declarative_stream.retriever,
|
345
|
-
self.message_repository,
|
340
|
+
stream_name=declarative_stream.name,
|
341
|
+
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
342
|
+
retriever=declarative_stream.retriever,
|
343
|
+
message_repository=self.message_repository,
|
346
344
|
),
|
347
345
|
declarative_stream.retriever.stream_slicer,
|
348
346
|
)
|
@@ -399,10 +397,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
399
397
|
|
400
398
|
partition_generator = StreamSlicerPartitionGenerator(
|
401
399
|
DeclarativePartitionFactory(
|
402
|
-
declarative_stream.name,
|
403
|
-
declarative_stream.
|
404
|
-
retriever,
|
405
|
-
self.message_repository,
|
400
|
+
stream_name=declarative_stream.name,
|
401
|
+
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
402
|
+
retriever=retriever,
|
403
|
+
message_repository=self.message_repository,
|
406
404
|
),
|
407
405
|
perpartition_cursor,
|
408
406
|
)
|
@@ -8,7 +8,7 @@ import pkgutil
|
|
8
8
|
from copy import deepcopy
|
9
9
|
from importlib import metadata
|
10
10
|
from types import ModuleType
|
11
|
-
from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
|
11
|
+
from typing import Any, Dict, Iterator, List, Mapping, Optional, Set, Union
|
12
12
|
|
13
13
|
import orjson
|
14
14
|
import yaml
|
@@ -66,6 +66,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
|
66
66
|
from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING
|
67
67
|
from airbyte_cdk.sources.declarative.spec.spec import Spec
|
68
68
|
from airbyte_cdk.sources.message import MessageRepository
|
69
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
69
70
|
from airbyte_cdk.sources.streams.core import Stream
|
70
71
|
from airbyte_cdk.sources.types import Config, ConnectionDefinition
|
71
72
|
from airbyte_cdk.sources.utils.slice_logger import (
|
@@ -297,7 +298,12 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
297
298
|
f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}"
|
298
299
|
)
|
299
300
|
|
300
|
-
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
|
301
|
+
def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
|
302
|
+
"""
|
303
|
+
As a migration step, this method will return both legacy stream (Stream) and concurrent stream (AbstractStream).
|
304
|
+
Once the migration is done, we can probably have this method throw "not implemented" as we figure out how to
|
305
|
+
fully decouple this from the AbstractSource.
|
306
|
+
"""
|
301
307
|
if self._spec_component:
|
302
308
|
self._spec_component.validate_config(config)
|
303
309
|
|
@@ -7,6 +7,7 @@ from __future__ import annotations
|
|
7
7
|
import datetime
|
8
8
|
import importlib
|
9
9
|
import inspect
|
10
|
+
import logging
|
10
11
|
import re
|
11
12
|
from functools import partial
|
12
13
|
from typing import (
|
@@ -543,6 +544,10 @@ from airbyte_cdk.sources.declarative.stream_slicers import (
|
|
543
544
|
StreamSlicer,
|
544
545
|
StreamSlicerTestReadDecorator,
|
545
546
|
)
|
547
|
+
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
548
|
+
DeclarativePartitionFactory,
|
549
|
+
StreamSlicerPartitionGenerator,
|
550
|
+
)
|
546
551
|
from airbyte_cdk.sources.declarative.transformations import (
|
547
552
|
AddFields,
|
548
553
|
RecordTransformation,
|
@@ -594,6 +599,7 @@ from airbyte_cdk.sources.streams.call_rate import (
|
|
594
599
|
Rate,
|
595
600
|
UnlimitedCallRatePolicy,
|
596
601
|
)
|
602
|
+
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
597
603
|
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
598
604
|
ClampingEndProvider,
|
599
605
|
ClampingStrategy,
|
@@ -603,7 +609,14 @@ from airbyte_cdk.sources.streams.concurrent.clamping import (
|
|
603
609
|
WeekClampingStrategy,
|
604
610
|
Weekday,
|
605
611
|
)
|
606
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import
|
612
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import (
|
613
|
+
ConcurrentCursor,
|
614
|
+
Cursor,
|
615
|
+
CursorField,
|
616
|
+
FinalStateCursor,
|
617
|
+
)
|
618
|
+
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
619
|
+
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
607
620
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
608
621
|
CustomFormatConcurrentStreamStateConverter,
|
609
622
|
DateTimeStreamStateConverter,
|
@@ -633,7 +646,6 @@ class ModelToComponentFactory:
|
|
633
646
|
emit_connector_builder_messages: bool = False,
|
634
647
|
disable_retries: bool = False,
|
635
648
|
disable_cache: bool = False,
|
636
|
-
disable_resumable_full_refresh: bool = False,
|
637
649
|
message_repository: Optional[MessageRepository] = None,
|
638
650
|
connector_state_manager: Optional[ConnectorStateManager] = None,
|
639
651
|
max_concurrent_async_job_count: Optional[int] = None,
|
@@ -644,7 +656,6 @@ class ModelToComponentFactory:
|
|
644
656
|
self._emit_connector_builder_messages = emit_connector_builder_messages
|
645
657
|
self._disable_retries = disable_retries
|
646
658
|
self._disable_cache = disable_cache
|
647
|
-
self._disable_resumable_full_refresh = disable_resumable_full_refresh
|
648
659
|
self._message_repository = message_repository or InMemoryMessageRepository(
|
649
660
|
self._evaluate_log_level(emit_connector_builder_messages)
|
650
661
|
)
|
@@ -1920,8 +1931,8 @@ class ModelToComponentFactory:
|
|
1920
1931
|
)
|
1921
1932
|
|
1922
1933
|
def create_declarative_stream(
|
1923
|
-
self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
|
1924
|
-
) -> DeclarativeStream:
|
1934
|
+
self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
|
1935
|
+
) -> Union[DeclarativeStream, AbstractStream]:
|
1925
1936
|
# When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
|
1926
1937
|
# components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
|
1927
1938
|
# Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
|
@@ -2027,15 +2038,6 @@ class ModelToComponentFactory:
|
|
2027
2038
|
file_uploader=file_uploader,
|
2028
2039
|
incremental_sync=model.incremental_sync,
|
2029
2040
|
)
|
2030
|
-
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
2031
|
-
|
2032
|
-
if model.state_migrations:
|
2033
|
-
state_transformations = [
|
2034
|
-
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2035
|
-
for state_migration in model.state_migrations
|
2036
|
-
]
|
2037
|
-
else:
|
2038
|
-
state_transformations = []
|
2039
2041
|
|
2040
2042
|
schema_loader: Union[
|
2041
2043
|
CompositeSchemaLoader,
|
@@ -2063,6 +2065,50 @@ class ModelToComponentFactory:
|
|
2063
2065
|
options["name"] = model.name
|
2064
2066
|
schema_loader = DefaultSchemaLoader(config=config, parameters=options)
|
2065
2067
|
|
2068
|
+
if (
|
2069
|
+
isinstance(combined_slicers, PartitionRouter)
|
2070
|
+
and not self._emit_connector_builder_messages
|
2071
|
+
and not is_parent
|
2072
|
+
):
|
2073
|
+
# We are starting to migrate streams to instantiate directly the DefaultStream instead of instantiating the
|
2074
|
+
# DeclarativeStream and assembling the DefaultStream from that. The plan is the following:
|
2075
|
+
# * Streams without partition router nor cursors and streams with only partition router. This is the `isinstance(combined_slicers, PartitionRouter)` condition as the first kind with have a SinglePartitionRouter
|
2076
|
+
# * Streams without partition router but with cursor
|
2077
|
+
# * Streams with both partition router and cursor
|
2078
|
+
# We specifically exclude parent streams here because SubstreamPartitionRouter has not been updated yet
|
2079
|
+
# We specifically exclude Connector Builder stuff for now as Brian is working on this anyway
|
2080
|
+
stream_name = model.name or ""
|
2081
|
+
partition_generator = StreamSlicerPartitionGenerator(
|
2082
|
+
DeclarativePartitionFactory(
|
2083
|
+
stream_name,
|
2084
|
+
schema_loader,
|
2085
|
+
retriever,
|
2086
|
+
self._message_repository,
|
2087
|
+
),
|
2088
|
+
stream_slicer=combined_slicers,
|
2089
|
+
)
|
2090
|
+
return DefaultStream(
|
2091
|
+
partition_generator=partition_generator,
|
2092
|
+
name=stream_name,
|
2093
|
+
json_schema=schema_loader.get_json_schema,
|
2094
|
+
primary_key=get_primary_key_from_stream(primary_key),
|
2095
|
+
cursor_field=None,
|
2096
|
+
# FIXME we should have the cursor field has part of the interface of cursor
|
2097
|
+
logger=logging.getLogger(f"airbyte.{stream_name}"),
|
2098
|
+
# FIXME this is a breaking change compared to the old implementation,
|
2099
|
+
cursor=FinalStateCursor(stream_name, None, self._message_repository),
|
2100
|
+
supports_file_transfer=hasattr(model, "file_uploader")
|
2101
|
+
and bool(model.file_uploader),
|
2102
|
+
)
|
2103
|
+
|
2104
|
+
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
2105
|
+
if model.state_migrations:
|
2106
|
+
state_transformations = [
|
2107
|
+
self._create_component_from_model(state_migration, config, declarative_stream=model)
|
2108
|
+
for state_migration in model.state_migrations
|
2109
|
+
]
|
2110
|
+
else:
|
2111
|
+
state_transformations = []
|
2066
2112
|
return DeclarativeStream(
|
2067
2113
|
name=model.name or "",
|
2068
2114
|
primary_key=primary_key,
|
@@ -2083,7 +2129,7 @@ class ModelToComponentFactory:
|
|
2083
2129
|
],
|
2084
2130
|
config: Config,
|
2085
2131
|
stream_name: Optional[str] = None,
|
2086
|
-
) ->
|
2132
|
+
) -> PartitionRouter:
|
2087
2133
|
if (
|
2088
2134
|
hasattr(model, "partition_router")
|
2089
2135
|
and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
|
@@ -2104,7 +2150,7 @@ class ModelToComponentFactory:
|
|
2104
2150
|
return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
|
2105
2151
|
model=stream_slicer_model, config=config, stream_name=stream_name or ""
|
2106
2152
|
)
|
2107
|
-
return
|
2153
|
+
return SinglePartitionRouter(parameters={})
|
2108
2154
|
|
2109
2155
|
def _build_incremental_cursor(
|
2110
2156
|
self,
|
@@ -2121,7 +2167,9 @@ class ModelToComponentFactory:
|
|
2121
2167
|
else []
|
2122
2168
|
)
|
2123
2169
|
|
2124
|
-
if model.incremental_sync and
|
2170
|
+
if model.incremental_sync and (
|
2171
|
+
stream_slicer and not isinstance(stream_slicer, SinglePartitionRouter)
|
2172
|
+
):
|
2125
2173
|
if model.retriever.type == "AsyncRetriever":
|
2126
2174
|
stream_name = model.name or ""
|
2127
2175
|
stream_namespace = None
|
@@ -2194,7 +2242,11 @@ class ModelToComponentFactory:
|
|
2194
2242
|
else:
|
2195
2243
|
state_transformations = []
|
2196
2244
|
|
2197
|
-
if
|
2245
|
+
if (
|
2246
|
+
model.incremental_sync
|
2247
|
+
and stream_slicer
|
2248
|
+
and not isinstance(stream_slicer, SinglePartitionRouter)
|
2249
|
+
):
|
2198
2250
|
return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
|
2199
2251
|
state_manager=self._connector_state_manager,
|
2200
2252
|
model_type=DatetimeBasedCursorModel,
|
@@ -2233,28 +2285,6 @@ class ModelToComponentFactory:
|
|
2233
2285
|
)
|
2234
2286
|
return None
|
2235
2287
|
|
2236
|
-
def _build_resumable_cursor(
|
2237
|
-
self,
|
2238
|
-
model: Union[
|
2239
|
-
AsyncRetrieverModel,
|
2240
|
-
CustomRetrieverModel,
|
2241
|
-
SimpleRetrieverModel,
|
2242
|
-
],
|
2243
|
-
stream_slicer: Optional[PartitionRouter],
|
2244
|
-
) -> Optional[StreamSlicer]:
|
2245
|
-
if hasattr(model, "paginator") and model.paginator and not stream_slicer:
|
2246
|
-
# For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
|
2247
|
-
return ResumableFullRefreshCursor(parameters={})
|
2248
|
-
elif stream_slicer:
|
2249
|
-
# For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
|
2250
|
-
return PerPartitionCursor(
|
2251
|
-
cursor_factory=CursorFactory(
|
2252
|
-
create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
|
2253
|
-
),
|
2254
|
-
partition_router=stream_slicer,
|
2255
|
-
)
|
2256
|
-
return None
|
2257
|
-
|
2258
2288
|
def _merge_stream_slicers(
|
2259
2289
|
self, model: DeclarativeStreamModel, config: Config
|
2260
2290
|
) -> Optional[StreamSlicer]:
|
@@ -2291,11 +2321,7 @@ class ModelToComponentFactory:
|
|
2291
2321
|
if model.incremental_sync:
|
2292
2322
|
return self._build_incremental_cursor(model, stream_slicer, config)
|
2293
2323
|
|
2294
|
-
return
|
2295
|
-
stream_slicer
|
2296
|
-
if self._disable_resumable_full_refresh
|
2297
|
-
else self._build_resumable_cursor(retriever_model, stream_slicer)
|
2298
|
-
)
|
2324
|
+
return stream_slicer
|
2299
2325
|
|
2300
2326
|
def create_default_error_handler(
|
2301
2327
|
self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
|
@@ -2577,9 +2603,6 @@ class ModelToComponentFactory:
|
|
2577
2603
|
def create_dynamic_schema_loader(
|
2578
2604
|
self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
|
2579
2605
|
) -> DynamicSchemaLoader:
|
2580
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
2581
|
-
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
2582
|
-
|
2583
2606
|
schema_transformations = []
|
2584
2607
|
if model.schema_transformations:
|
2585
2608
|
for transformation_model in model.schema_transformations:
|
@@ -2592,7 +2615,7 @@ class ModelToComponentFactory:
|
|
2592
2615
|
config=config,
|
2593
2616
|
name=name,
|
2594
2617
|
primary_key=None,
|
2595
|
-
stream_slicer=
|
2618
|
+
stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
|
2596
2619
|
transformations=[],
|
2597
2620
|
use_cache=True,
|
2598
2621
|
log_formatter=(
|
@@ -2945,7 +2968,10 @@ class ModelToComponentFactory:
|
|
2945
2968
|
self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
|
2946
2969
|
) -> ParentStreamConfig:
|
2947
2970
|
declarative_stream = self._create_component_from_model(
|
2948
|
-
model.stream,
|
2971
|
+
model.stream,
|
2972
|
+
config=config,
|
2973
|
+
is_parent=True,
|
2974
|
+
**kwargs,
|
2949
2975
|
)
|
2950
2976
|
request_option = (
|
2951
2977
|
self._create_component_from_model(model.request_option, config=config)
|
@@ -3855,15 +3881,12 @@ class ModelToComponentFactory:
|
|
3855
3881
|
def create_http_components_resolver(
|
3856
3882
|
self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None
|
3857
3883
|
) -> Any:
|
3858
|
-
stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
|
3859
|
-
combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
|
3860
|
-
|
3861
3884
|
retriever = self._create_component_from_model(
|
3862
3885
|
model=model.retriever,
|
3863
3886
|
config=config,
|
3864
3887
|
name=f"{stream_name if stream_name else '__http_components_resolver'}",
|
3865
3888
|
primary_key=None,
|
3866
|
-
stream_slicer=
|
3889
|
+
stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
|
3867
3890
|
transformations=[],
|
3868
3891
|
)
|
3869
3892
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
from typing import Any, Iterable, Mapping, Optional
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
6
|
+
from airbyte_cdk.sources.declarative.schema import SchemaLoader
|
6
7
|
from airbyte_cdk.sources.message import MessageRepository
|
7
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
8
9
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
@@ -11,11 +12,23 @@ from airbyte_cdk.sources.types import Record, StreamSlice
|
|
11
12
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
12
13
|
|
13
14
|
|
15
|
+
class SchemaLoaderCachingDecorator(SchemaLoader):
|
16
|
+
def __init__(self, schema_loader: SchemaLoader):
|
17
|
+
self._decorated = schema_loader
|
18
|
+
self._loaded_schema: Optional[Mapping[str, Any]] = None
|
19
|
+
|
20
|
+
def get_json_schema(self) -> Mapping[str, Any]:
|
21
|
+
if self._loaded_schema is None:
|
22
|
+
self._loaded_schema = self._decorated.get_json_schema()
|
23
|
+
|
24
|
+
return self._loaded_schema # type: ignore # at that point, we assume the schema will be populated
|
25
|
+
|
26
|
+
|
14
27
|
class DeclarativePartitionFactory:
|
15
28
|
def __init__(
|
16
29
|
self,
|
17
30
|
stream_name: str,
|
18
|
-
|
31
|
+
schema_loader: SchemaLoader,
|
19
32
|
retriever: Retriever,
|
20
33
|
message_repository: MessageRepository,
|
21
34
|
) -> None:
|
@@ -25,17 +38,17 @@ class DeclarativePartitionFactory:
|
|
25
38
|
In order to avoid these problems, we will create one retriever per thread which should make the processing thread-safe.
|
26
39
|
"""
|
27
40
|
self._stream_name = stream_name
|
28
|
-
self.
|
41
|
+
self._schema_loader = SchemaLoaderCachingDecorator(schema_loader)
|
29
42
|
self._retriever = retriever
|
30
43
|
self._message_repository = message_repository
|
31
44
|
|
32
45
|
def create(self, stream_slice: StreamSlice) -> Partition:
|
33
46
|
return DeclarativePartition(
|
34
|
-
self._stream_name,
|
35
|
-
self.
|
36
|
-
self._retriever,
|
37
|
-
self._message_repository,
|
38
|
-
stream_slice,
|
47
|
+
stream_name=self._stream_name,
|
48
|
+
schema_loader=self._schema_loader,
|
49
|
+
retriever=self._retriever,
|
50
|
+
message_repository=self._message_repository,
|
51
|
+
stream_slice=stream_slice,
|
39
52
|
)
|
40
53
|
|
41
54
|
|
@@ -43,20 +56,22 @@ class DeclarativePartition(Partition):
|
|
43
56
|
def __init__(
|
44
57
|
self,
|
45
58
|
stream_name: str,
|
46
|
-
|
59
|
+
schema_loader: SchemaLoader,
|
47
60
|
retriever: Retriever,
|
48
61
|
message_repository: MessageRepository,
|
49
62
|
stream_slice: StreamSlice,
|
50
63
|
):
|
51
64
|
self._stream_name = stream_name
|
52
|
-
self.
|
65
|
+
self._schema_loader = schema_loader
|
53
66
|
self._retriever = retriever
|
54
67
|
self._message_repository = message_repository
|
55
68
|
self._stream_slice = stream_slice
|
56
69
|
self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
|
57
70
|
|
58
71
|
def read(self) -> Iterable[Record]:
|
59
|
-
for stream_data in self._retriever.read_records(
|
72
|
+
for stream_data in self._retriever.read_records(
|
73
|
+
self._schema_loader.get_json_schema(), self._stream_slice
|
74
|
+
):
|
60
75
|
if isinstance(stream_data, Mapping):
|
61
76
|
record = (
|
62
77
|
stream_data
|
@@ -6,7 +6,7 @@ import copy
|
|
6
6
|
import json
|
7
7
|
import logging
|
8
8
|
from functools import lru_cache
|
9
|
-
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional,
|
9
|
+
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
10
10
|
|
11
11
|
from typing_extensions import deprecated
|
12
12
|
|
@@ -196,6 +196,7 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
|
196
196
|
def cursor(self) -> Optional[Cursor]: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor
|
197
197
|
return self._cursor
|
198
198
|
|
199
|
+
# FIXME the lru_cache seems to be mostly there because of typing issue
|
199
200
|
@lru_cache(maxsize=None)
|
200
201
|
def get_json_schema(self) -> Mapping[str, Any]:
|
201
202
|
return self._abstract_stream.get_json_schema()
|
@@ -2,9 +2,8 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from functools import lru_cache
|
6
5
|
from logging import Logger
|
7
|
-
from typing import Any, Iterable, List, Mapping, Optional
|
6
|
+
from typing import Any, Callable, Iterable, List, Mapping, Optional, Union
|
8
7
|
|
9
8
|
from airbyte_cdk.models import AirbyteStream, SyncMode
|
10
9
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
@@ -20,7 +19,7 @@ class DefaultStream(AbstractStream):
|
|
20
19
|
self,
|
21
20
|
partition_generator: PartitionGenerator,
|
22
21
|
name: str,
|
23
|
-
json_schema: Mapping[str, Any],
|
22
|
+
json_schema: Union[Mapping[str, Any], Callable[[], Mapping[str, Any]]],
|
24
23
|
primary_key: List[str],
|
25
24
|
cursor_field: Optional[str],
|
26
25
|
logger: Logger,
|
@@ -53,14 +52,13 @@ class DefaultStream(AbstractStream):
|
|
53
52
|
def cursor_field(self) -> Optional[str]:
|
54
53
|
return self._cursor_field
|
55
54
|
|
56
|
-
@lru_cache(maxsize=None)
|
57
55
|
def get_json_schema(self) -> Mapping[str, Any]:
|
58
|
-
return self._json_schema
|
56
|
+
return self._json_schema() if callable(self._json_schema) else self._json_schema
|
59
57
|
|
60
58
|
def as_airbyte_stream(self) -> AirbyteStream:
|
61
59
|
stream = AirbyteStream(
|
62
60
|
name=self.name,
|
63
|
-
json_schema=dict(self.
|
61
|
+
json_schema=dict(self.get_json_schema()),
|
64
62
|
supported_sync_modes=[SyncMode.full_refresh],
|
65
63
|
is_resumable=False,
|
66
64
|
is_file_based=self._supports_file_transfer,
|
@@ -21,7 +21,7 @@ airbyte_cdk/connector_builder/models.py,sha256=9pIZ98LW_d6fRS39VdnUOf3cxGt4TkC5M
|
|
21
21
|
airbyte_cdk/connector_builder/test_reader/__init__.py,sha256=iTwBMoI9vaJotEgpqZbFjlxRcbxXYypSVJ9YxeHk7wc,120
|
22
22
|
airbyte_cdk/connector_builder/test_reader/helpers.py,sha256=vqoHpZeQ0BLIw2NiTNGXr0euA8gI_X0pcNRcHOv8sHM,27942
|
23
23
|
airbyte_cdk/connector_builder/test_reader/message_grouper.py,sha256=LDNl-xFQwA4RsUpn7684KbWaVH-SWWBIwhHvIgduLTE,7090
|
24
|
-
airbyte_cdk/connector_builder/test_reader/reader.py,sha256=
|
24
|
+
airbyte_cdk/connector_builder/test_reader/reader.py,sha256=DugoqS6SMrtOJ--2Y0F0h_9x8m632i7fSOPMAA0JHnc,21654
|
25
25
|
airbyte_cdk/connector_builder/test_reader/types.py,sha256=hPZG3jO03kBaPyW94NI3JHRS1jxXGSNBcN1HFzOxo5Y,2528
|
26
26
|
airbyte_cdk/destinations/__init__.py,sha256=FyDp28PT_YceJD5HDFhA-mrGfX9AONIyMQ4d68CHNxQ,213
|
27
27
|
airbyte_cdk/destinations/destination.py,sha256=CIq-yb8C_0QvcKCtmStaHfiqn53GEfRAIGGCkJhKP1Q,5880
|
@@ -86,7 +86,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=sV-ZY7dZ03V8GdAxPY
|
|
86
86
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
87
87
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
88
88
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
89
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
89
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=HQxvDEoMWtVdVRZgJylrT0YLx-R8sOgICjY3HnifvWs,27391
|
90
90
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
91
91
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
92
92
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
@@ -128,7 +128,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkH
|
|
128
128
|
airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
|
129
129
|
airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=oFGKs3oX0xO6DOL4E9x8rhxwbEoRcgx4HJVIL1RQ9c4,7269
|
130
130
|
airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=RpsAYG75bW0js2fQCzAN1nf3oeGyXwyt0LhJCHnlaUA,6031
|
131
|
-
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=
|
131
|
+
airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=VqR3lti_RLRRe0_1EwUn8_OsJTxQrGqU3n-T9GowAKk,27154
|
132
132
|
airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
133
133
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=V2lpYE9LJKvz6BUViHk4vaRGndxNABmPbDCtyYdkqaE,4013
|
134
134
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
@@ -141,7 +141,7 @@ airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9R
|
|
141
141
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=2UdpCz3yi7ISZTyqkQXSSy3dMxeyOWqV7OlAS5b9GVg,11568
|
142
142
|
airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
|
143
143
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
|
144
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
144
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=1DL6O8FDqFqCt7qdkvR0UZOuP2xo51Ff76BmDHe5v6c,182819
|
145
145
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
|
146
146
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
147
147
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -220,7 +220,7 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
|
|
220
220
|
airbyte_cdk/sources/declarative/spec/__init__.py,sha256=9FYO-fVOclrwjAW4qwRTbZRVopTc9rOaauAJfThdNCQ,177
|
221
221
|
airbyte_cdk/sources/declarative/spec/spec.py,sha256=SwL_pfXZgcLYLJY-MAeFMHug9oYh2tOWjgG0C3DoLOY,3602
|
222
222
|
airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=UX-cP_C-9FIFFPL9z8nuxu_rglssRsMOqQmQHN8FLB8,341
|
223
|
-
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=
|
223
|
+
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=Wk7P-Jpy3f3a59mwnc9ycJbpA3zVcgykNt2grBSXhBA,4272
|
224
224
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
225
225
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer_test_read_decorator.py,sha256=aUSleOw9elq3-5TaDUvp7H8W-2qUKqpr__kaJd8-ZFA,983
|
226
226
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
@@ -316,12 +316,12 @@ airbyte_cdk/sources/streams/concurrent/README.md,sha256=0nvgnlCBfZJiPDAofT8yFmUh
|
|
316
316
|
airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
317
317
|
airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=aalik3FvyEjoeA1S3wUYEV3bgQLGrTnhYKPvT-rgy9E,3919
|
318
318
|
airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
|
319
|
-
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=
|
319
|
+
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=h4ZewhWn2PzPTt0lZZjcUL4rrpW9E_of7prnI3bm-c4,14004
|
320
320
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=M0XmvF3vjlr4GbCM0XH1hAj7udiAONM9SnmXjqufzLM,1035
|
321
321
|
airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
|
322
322
|
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=xFFB8eEbtjGUdb42vkyWT5JB-WTUsaJlZ0gjKoVEycc,22307
|
323
323
|
airbyte_cdk/sources/streams/concurrent/cursor_types.py,sha256=ZyWLPpeLX1qXcP5MwS-wxK11IBMsnVPCw9zx8gA2_Ro,843
|
324
|
-
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=
|
324
|
+
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=SSufbo5f7OOYS8DZaABXeJVvodcfp9wb8J9lT5Xik3s,4744
|
325
325
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
326
326
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
327
327
|
airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=2t64b_z9cEPmlHZnjSiMTO8PEtEdiAJDG0JcYOtUqAE,3363
|
@@ -424,9 +424,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
424
424
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
425
425
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
426
426
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
427
|
-
airbyte_cdk-6.60.
|
428
|
-
airbyte_cdk-6.60.
|
429
|
-
airbyte_cdk-6.60.
|
430
|
-
airbyte_cdk-6.60.
|
431
|
-
airbyte_cdk-6.60.
|
432
|
-
airbyte_cdk-6.60.
|
427
|
+
airbyte_cdk-6.60.15.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
428
|
+
airbyte_cdk-6.60.15.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
429
|
+
airbyte_cdk-6.60.15.dist-info/METADATA,sha256=_wL4DjytQL8-LKrvnFcd59JEjILORRopG7q2_z3q5cE,6478
|
430
|
+
airbyte_cdk-6.60.15.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
431
|
+
airbyte_cdk-6.60.15.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
|
432
|
+
airbyte_cdk-6.60.15.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|