airbyte-cdk 6.62.0.dev3__py3-none-any.whl → 7.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +2 -2
- airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
- airbyte_cdk/connector_builder/main.py +2 -2
- airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
- airbyte_cdk/{sources → legacy/sources}/declarative/declarative_stream.py +2 -2
- airbyte_cdk/legacy/sources/declarative/incremental/__init__.py +30 -0
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/datetime_based_cursor.py +1 -1
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/global_substream_cursor.py +4 -2
- airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +1 -1
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/per_partition_with_global.py +8 -6
- airbyte_cdk/{sources → legacy/sources}/declarative/incremental/resumable_full_refresh_cursor.py +1 -1
- airbyte_cdk/manifest_server/Dockerfile +2 -2
- airbyte_cdk/manifest_server/README.md +22 -0
- airbyte_cdk/manifest_server/api_models/__init__.py +2 -0
- airbyte_cdk/manifest_server/api_models/manifest.py +12 -0
- airbyte_cdk/manifest_server/api_models/stream.py +2 -2
- airbyte_cdk/manifest_server/app.py +6 -0
- airbyte_cdk/manifest_server/cli/_common.py +1 -0
- airbyte_cdk/manifest_server/command_processor/processor.py +2 -5
- airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
- airbyte_cdk/manifest_server/helpers/__init__.py +0 -0
- airbyte_cdk/manifest_server/helpers/tracing.py +36 -0
- airbyte_cdk/manifest_server/routers/manifest.py +38 -2
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +6 -3
- airbyte_cdk/sources/declarative/checks/check_stream.py +6 -3
- airbyte_cdk/sources/declarative/checks/connection_checker.py +5 -2
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +38 -451
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -27
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -24
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +58 -5
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1 -21
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -5
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +32 -24
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -3
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +21 -7
- airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +1 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +0 -4
- airbyte_cdk/sources/utils/schema_helpers.py +29 -9
- airbyte_cdk/sources/utils/transform.py +25 -13
- airbyte_cdk/utils/spec_schema_transformations.py +7 -5
- {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-7.0.0.dist-info}/METADATA +4 -2
- {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-7.0.0.dist-info}/RECORD +51 -49
- /airbyte_cdk/{sources → legacy/sources}/declarative/incremental/declarative_cursor.py +0 -0
- /airbyte_cdk/manifest_server/{auth.py → helpers/auth.py} +0 -0
- {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-7.0.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-7.0.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-7.0.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-7.0.0.dist-info}/entry_points.txt +0 -0
@@ -11,15 +11,11 @@ from typing import (
|
|
11
11
|
Any,
|
12
12
|
ClassVar,
|
13
13
|
Dict,
|
14
|
-
Generic,
|
15
14
|
Iterator,
|
16
15
|
List,
|
17
16
|
Mapping,
|
18
|
-
MutableMapping,
|
19
17
|
Optional,
|
20
18
|
Set,
|
21
|
-
Tuple,
|
22
|
-
Union,
|
23
19
|
)
|
24
20
|
|
25
21
|
import orjson
|
@@ -43,37 +39,22 @@ from airbyte_cdk.models import (
|
|
43
39
|
ConfiguredAirbyteCatalog,
|
44
40
|
ConnectorSpecification,
|
45
41
|
FailureType,
|
42
|
+
Status,
|
46
43
|
)
|
47
44
|
from airbyte_cdk.models.airbyte_protocol_serializers import AirbyteMessageSerializer
|
48
|
-
from airbyte_cdk.sources
|
45
|
+
from airbyte_cdk.sources import Source
|
49
46
|
from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
|
50
47
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
51
48
|
from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
|
52
49
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
53
50
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
54
|
-
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
55
|
-
from airbyte_cdk.sources.declarative.incremental import (
|
56
|
-
ConcurrentPerPartitionCursor,
|
57
|
-
GlobalSubstreamCursor,
|
58
|
-
)
|
59
|
-
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
60
|
-
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
61
|
-
PerPartitionWithGlobalCursor,
|
62
|
-
)
|
63
51
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedBoolean
|
64
|
-
from airbyte_cdk.sources.declarative.models import FileUploader
|
65
52
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
66
53
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
67
54
|
)
|
68
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
69
|
-
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
70
|
-
)
|
71
55
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
72
56
|
DeclarativeStream as DeclarativeStreamModel,
|
73
57
|
)
|
74
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
75
|
-
IncrementingCountCursor as IncrementingCountCursorModel,
|
76
|
-
)
|
77
58
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
78
59
|
Spec as SpecModel,
|
79
60
|
)
|
@@ -95,24 +76,12 @@ from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import
|
|
95
76
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
96
77
|
ModelToComponentFactory,
|
97
78
|
)
|
98
|
-
from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
|
99
79
|
from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING
|
100
|
-
from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
|
101
80
|
from airbyte_cdk.sources.declarative.spec.spec import Spec
|
102
|
-
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
103
|
-
DeclarativePartitionFactory,
|
104
|
-
StreamSlicerPartitionGenerator,
|
105
|
-
)
|
106
81
|
from airbyte_cdk.sources.declarative.types import Config, ConnectionDefinition
|
107
82
|
from airbyte_cdk.sources.message.concurrent_repository import ConcurrentMessageRepository
|
108
|
-
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
|
109
|
-
from airbyte_cdk.sources.source import TState
|
110
|
-
from airbyte_cdk.sources.streams import Stream
|
83
|
+
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
|
111
84
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
112
|
-
from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
|
113
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
|
114
|
-
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
115
|
-
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
116
85
|
from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
|
117
86
|
from airbyte_cdk.sources.utils.slice_logger import (
|
118
87
|
AlwaysLogSliceLogger,
|
@@ -155,23 +124,17 @@ def _get_declarative_component_schema() -> Dict[str, Any]:
|
|
155
124
|
)
|
156
125
|
|
157
126
|
|
158
|
-
|
159
|
-
# and replaced with implementing the source.py:Source class
|
160
|
-
#
|
161
|
-
# todo: The `ConcurrentDeclarativeSource.message_repository()` method can also be removed once AbstractSource
|
162
|
-
# is no longer inherited from since the only external dependency is from that class.
|
163
|
-
#
|
164
|
-
# todo: It is worth investigating removal of the Generic[TState] since it will always be Optional[List[AirbyteStateMessage]]
|
165
|
-
class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
127
|
+
class ConcurrentDeclarativeSource(Source):
|
166
128
|
# By default, we defer to a value of 2. A value lower than could cause a PartitionEnqueuer to be stuck in a state of deadlock
|
167
129
|
# because it has hit the limit of futures but not partition reader is consuming them.
|
168
130
|
_LOWEST_SAFE_CONCURRENCY_LEVEL = 2
|
169
131
|
|
170
132
|
def __init__(
|
171
133
|
self,
|
172
|
-
catalog: Optional[ConfiguredAirbyteCatalog],
|
173
|
-
config: Optional[Mapping[str, Any]],
|
174
|
-
state:
|
134
|
+
catalog: Optional[ConfiguredAirbyteCatalog] = None,
|
135
|
+
config: Optional[Mapping[str, Any]] = None,
|
136
|
+
state: Optional[List[AirbyteStateMessage]] = None,
|
137
|
+
*,
|
175
138
|
source_config: ConnectionDefinition,
|
176
139
|
debug: bool = False,
|
177
140
|
emit_connector_builder_messages: bool = False,
|
@@ -395,17 +358,6 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
395
358
|
"""
|
396
359
|
return self._source_config
|
397
360
|
|
398
|
-
# TODO: Deprecate this class once ConcurrentDeclarativeSource no longer inherits AbstractSource
|
399
|
-
@property
|
400
|
-
def message_repository(self) -> MessageRepository:
|
401
|
-
return self._message_repository
|
402
|
-
|
403
|
-
# TODO: Remove this. This property is necessary to safely migrate Stripe during the transition state.
|
404
|
-
@property
|
405
|
-
def is_partially_declarative(self) -> bool:
|
406
|
-
"""This flag used to avoid unexpected AbstractStreamFacade processing as concurrent streams."""
|
407
|
-
return False
|
408
|
-
|
409
361
|
def deprecation_warnings(self) -> List[ConnectorBuilderLogMessage]:
|
410
362
|
return self._constructor.get_model_deprecations()
|
411
363
|
|
@@ -416,48 +368,23 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
416
368
|
catalog: ConfiguredAirbyteCatalog,
|
417
369
|
state: Optional[List[AirbyteStateMessage]] = None,
|
418
370
|
) -> Iterator[AirbyteMessage]:
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
# synchronous streams
|
424
|
-
if len(concurrent_streams) > 0:
|
425
|
-
concurrent_stream_names = set(
|
426
|
-
[concurrent_stream.name for concurrent_stream in concurrent_streams]
|
427
|
-
)
|
428
|
-
|
429
|
-
selected_concurrent_streams = self._select_streams(
|
430
|
-
streams=concurrent_streams, configured_catalog=catalog
|
431
|
-
)
|
432
|
-
# It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
|
433
|
-
# This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
|
434
|
-
if selected_concurrent_streams:
|
435
|
-
yield from self._concurrent_source.read(selected_concurrent_streams)
|
436
|
-
|
437
|
-
# Sync all streams that are not concurrent compatible. We filter out concurrent streams because the
|
438
|
-
# existing AbstractSource.read() implementation iterates over the catalog when syncing streams. Many
|
439
|
-
# of which were already synced using the Concurrent CDK
|
440
|
-
filtered_catalog = self._remove_concurrent_streams_from_catalog(
|
441
|
-
catalog=catalog, concurrent_stream_names=concurrent_stream_names
|
442
|
-
)
|
443
|
-
else:
|
444
|
-
filtered_catalog = catalog
|
445
|
-
|
446
|
-
# It is no need run read for synchronous streams if they are not exists.
|
447
|
-
if not filtered_catalog.streams:
|
448
|
-
return
|
371
|
+
selected_concurrent_streams = self._select_streams(
|
372
|
+
streams=self.streams(config=self._config), # type: ignore # We are migrating away from the DeclarativeStream implementation and streams() only returns the concurrent-compatible AbstractStream. To preserve compatibility, we retain the existing method interface
|
373
|
+
configured_catalog=catalog,
|
374
|
+
)
|
449
375
|
|
450
|
-
|
376
|
+
# It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
|
377
|
+
# This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
|
378
|
+
if len(selected_concurrent_streams) > 0:
|
379
|
+
yield from self._concurrent_source.read(selected_concurrent_streams)
|
451
380
|
|
452
381
|
def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
|
453
|
-
concurrent_streams, synchronous_streams = self._group_streams(config=config)
|
454
382
|
return AirbyteCatalog(
|
455
|
-
streams=[
|
456
|
-
stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
|
457
|
-
]
|
383
|
+
streams=[stream.as_airbyte_stream() for stream in self.streams(config=self._config)]
|
458
384
|
)
|
459
385
|
|
460
|
-
|
386
|
+
# todo: add PR comment about whether we can change the signature to List[AbstractStream]
|
387
|
+
def streams(self, config: Mapping[str, Any]) -> List[AbstractStream]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
|
461
388
|
"""
|
462
389
|
The `streams` method is used as part of the AbstractSource in the following cases:
|
463
390
|
* ConcurrentDeclarativeSource.check -> ManifestDeclarativeSource.check -> AbstractSource.check -> DeclarativeSource.check_connection -> CheckStream.check_connection -> streams
|
@@ -468,15 +395,13 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
468
395
|
"""
|
469
396
|
|
470
397
|
if self._spec_component:
|
471
|
-
self._spec_component.validate_config(
|
398
|
+
self._spec_component.validate_config(self._config)
|
472
399
|
|
473
|
-
stream_configs = (
|
474
|
-
self._stream_configs(self._source_config, config=config) + self.dynamic_streams
|
475
|
-
)
|
400
|
+
stream_configs = self._stream_configs(self._source_config) + self.dynamic_streams
|
476
401
|
|
477
402
|
api_budget_model = self._source_config.get("api_budget")
|
478
403
|
if api_budget_model:
|
479
|
-
self._constructor.set_api_budget(api_budget_model,
|
404
|
+
self._constructor.set_api_budget(api_budget_model, self._config)
|
480
405
|
|
481
406
|
source_streams = [
|
482
407
|
self._constructor.create_component(
|
@@ -486,7 +411,7 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
486
411
|
else DeclarativeStreamModel
|
487
412
|
),
|
488
413
|
stream_config,
|
489
|
-
|
414
|
+
self._config,
|
490
415
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
491
416
|
)
|
492
417
|
for stream_config in self._initialize_cache_for_parent_streams(deepcopy(stream_configs))
|
@@ -558,315 +483,36 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
558
483
|
)
|
559
484
|
|
560
485
|
def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
|
561
|
-
|
486
|
+
check = self._source_config.get("check")
|
487
|
+
if not check:
|
488
|
+
raise ValueError(f"Missing 'check' component definition within the manifest.")
|
562
489
|
|
563
|
-
def check_connection(
|
564
|
-
self, logger: logging.Logger, config: Mapping[str, Any]
|
565
|
-
) -> Tuple[bool, Any]:
|
566
|
-
"""
|
567
|
-
:param logger: The source logger
|
568
|
-
:param config: The user-provided configuration as specified by the source's spec.
|
569
|
-
This usually contains information required to check connection e.g. tokens, secrets and keys etc.
|
570
|
-
:return: A tuple of (boolean, error). If boolean is true, then the connection check is successful
|
571
|
-
and we can connect to the underlying data source using the provided configuration.
|
572
|
-
Otherwise, the input config cannot be used to connect to the underlying data source,
|
573
|
-
and the "error" object should describe what went wrong.
|
574
|
-
The error object will be cast to string to display the problem to the user.
|
575
|
-
"""
|
576
|
-
return self.connection_checker.check_connection(self, logger, config)
|
577
|
-
|
578
|
-
@property
|
579
|
-
def connection_checker(self) -> ConnectionChecker:
|
580
|
-
check = self._source_config["check"]
|
581
490
|
if "type" not in check:
|
582
491
|
check["type"] = "CheckStream"
|
583
|
-
|
492
|
+
connection_checker = self._constructor.create_component(
|
584
493
|
COMPONENTS_CHECKER_TYPE_MAPPING[check["type"]],
|
585
494
|
check,
|
586
495
|
dict(),
|
587
496
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
588
497
|
)
|
589
|
-
if isinstance(
|
590
|
-
return check_stream
|
591
|
-
else:
|
498
|
+
if not isinstance(connection_checker, ConnectionChecker):
|
592
499
|
raise ValueError(
|
593
|
-
f"Expected to generate a ConnectionChecker component, but received {
|
500
|
+
f"Expected to generate a ConnectionChecker component, but received {connection_checker.__class__}"
|
594
501
|
)
|
595
502
|
|
503
|
+
check_succeeded, error = connection_checker.check_connection(self, logger, self._config)
|
504
|
+
if not check_succeeded:
|
505
|
+
return AirbyteConnectionStatus(status=Status.FAILED, message=repr(error))
|
506
|
+
return AirbyteConnectionStatus(status=Status.SUCCEEDED)
|
507
|
+
|
596
508
|
@property
|
597
509
|
def dynamic_streams(self) -> List[Dict[str, Any]]:
|
598
510
|
return self._dynamic_stream_configs(
|
599
511
|
manifest=self._source_config,
|
600
|
-
config=self._config,
|
601
512
|
with_dynamic_stream_name=True,
|
602
513
|
)
|
603
514
|
|
604
|
-
def
|
605
|
-
self, config: Mapping[str, Any]
|
606
|
-
) -> Tuple[List[AbstractStream], List[Stream]]:
|
607
|
-
concurrent_streams: List[AbstractStream] = []
|
608
|
-
synchronous_streams: List[Stream] = []
|
609
|
-
|
610
|
-
# Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
|
611
|
-
# and this is validated during the initialization of the source.
|
612
|
-
streams = self._stream_configs(self._source_config, config) + self._dynamic_stream_configs(
|
613
|
-
self._source_config, config
|
614
|
-
)
|
615
|
-
|
616
|
-
name_to_stream_mapping = {stream["name"]: stream for stream in streams}
|
617
|
-
|
618
|
-
for declarative_stream in self.streams(config=config):
|
619
|
-
# Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
|
620
|
-
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
621
|
-
# so we need to treat them as synchronous
|
622
|
-
|
623
|
-
if isinstance(declarative_stream, AbstractStream):
|
624
|
-
concurrent_streams.append(declarative_stream)
|
625
|
-
continue
|
626
|
-
|
627
|
-
supports_file_transfer = (
|
628
|
-
isinstance(declarative_stream, DeclarativeStream)
|
629
|
-
and "file_uploader" in name_to_stream_mapping[declarative_stream.name]
|
630
|
-
)
|
631
|
-
|
632
|
-
if (
|
633
|
-
isinstance(declarative_stream, DeclarativeStream)
|
634
|
-
and name_to_stream_mapping[declarative_stream.name]["type"]
|
635
|
-
== "StateDelegatingStream"
|
636
|
-
):
|
637
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
638
|
-
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
639
|
-
)
|
640
|
-
|
641
|
-
name_to_stream_mapping[declarative_stream.name] = (
|
642
|
-
name_to_stream_mapping[declarative_stream.name]["incremental_stream"]
|
643
|
-
if stream_state
|
644
|
-
else name_to_stream_mapping[declarative_stream.name]["full_refresh_stream"]
|
645
|
-
)
|
646
|
-
|
647
|
-
if isinstance(declarative_stream, DeclarativeStream) and (
|
648
|
-
name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
649
|
-
== "SimpleRetriever"
|
650
|
-
or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
|
651
|
-
== "AsyncRetriever"
|
652
|
-
):
|
653
|
-
incremental_sync_component_definition = name_to_stream_mapping[
|
654
|
-
declarative_stream.name
|
655
|
-
].get("incremental_sync")
|
656
|
-
|
657
|
-
partition_router_component_definition = (
|
658
|
-
name_to_stream_mapping[declarative_stream.name]
|
659
|
-
.get("retriever", {})
|
660
|
-
.get("partition_router")
|
661
|
-
)
|
662
|
-
is_without_partition_router_or_cursor = not bool(
|
663
|
-
incremental_sync_component_definition
|
664
|
-
) and not bool(partition_router_component_definition)
|
665
|
-
|
666
|
-
is_substream_without_incremental = (
|
667
|
-
partition_router_component_definition
|
668
|
-
and not incremental_sync_component_definition
|
669
|
-
)
|
670
|
-
|
671
|
-
if self._is_concurrent_cursor_incremental_without_partition_routing(
|
672
|
-
declarative_stream, incremental_sync_component_definition
|
673
|
-
):
|
674
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
675
|
-
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
676
|
-
)
|
677
|
-
stream_state = self._migrate_state(declarative_stream, stream_state)
|
678
|
-
|
679
|
-
retriever = self._get_retriever(declarative_stream, stream_state)
|
680
|
-
|
681
|
-
if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
|
682
|
-
declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
|
683
|
-
):
|
684
|
-
cursor = declarative_stream.retriever.stream_slicer.stream_slicer
|
685
|
-
|
686
|
-
if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
|
687
|
-
# This should never happen since we instantiate ConcurrentCursor in
|
688
|
-
# model_to_component_factory.py
|
689
|
-
raise ValueError(
|
690
|
-
f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
|
691
|
-
)
|
692
|
-
|
693
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
694
|
-
partition_factory=DeclarativePartitionFactory(
|
695
|
-
stream_name=declarative_stream.name,
|
696
|
-
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
697
|
-
retriever=retriever,
|
698
|
-
message_repository=self._message_repository,
|
699
|
-
max_records_limit=self._limits.max_records
|
700
|
-
if self._limits
|
701
|
-
else None,
|
702
|
-
),
|
703
|
-
stream_slicer=declarative_stream.retriever.stream_slicer,
|
704
|
-
slice_limit=self._limits.max_slices
|
705
|
-
if self._limits
|
706
|
-
else None, # technically not needed because create_default_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
|
707
|
-
)
|
708
|
-
else:
|
709
|
-
if (
|
710
|
-
incremental_sync_component_definition
|
711
|
-
and incremental_sync_component_definition.get("type")
|
712
|
-
== IncrementingCountCursorModel.__name__
|
713
|
-
):
|
714
|
-
cursor = self._constructor.create_concurrent_cursor_from_incrementing_count_cursor(
|
715
|
-
model_type=IncrementingCountCursorModel,
|
716
|
-
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
717
|
-
stream_name=declarative_stream.name,
|
718
|
-
stream_namespace=declarative_stream.namespace,
|
719
|
-
config=config or {},
|
720
|
-
)
|
721
|
-
else:
|
722
|
-
cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
|
723
|
-
model_type=DatetimeBasedCursorModel,
|
724
|
-
component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
|
725
|
-
stream_name=declarative_stream.name,
|
726
|
-
stream_namespace=declarative_stream.namespace,
|
727
|
-
config=config or {},
|
728
|
-
stream_state_migrations=declarative_stream.state_migrations,
|
729
|
-
)
|
730
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
731
|
-
partition_factory=DeclarativePartitionFactory(
|
732
|
-
stream_name=declarative_stream.name,
|
733
|
-
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
734
|
-
retriever=retriever,
|
735
|
-
message_repository=self._message_repository,
|
736
|
-
max_records_limit=self._limits.max_records
|
737
|
-
if self._limits
|
738
|
-
else None,
|
739
|
-
),
|
740
|
-
stream_slicer=cursor,
|
741
|
-
slice_limit=self._limits.max_slices if self._limits else None,
|
742
|
-
)
|
743
|
-
|
744
|
-
concurrent_streams.append(
|
745
|
-
DefaultStream(
|
746
|
-
partition_generator=partition_generator,
|
747
|
-
name=declarative_stream.name,
|
748
|
-
json_schema=declarative_stream.get_json_schema(),
|
749
|
-
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
750
|
-
cursor_field=cursor.cursor_field.cursor_field_key
|
751
|
-
if hasattr(cursor, "cursor_field")
|
752
|
-
and hasattr(
|
753
|
-
cursor.cursor_field, "cursor_field_key"
|
754
|
-
) # FIXME this will need to be updated once we do the per partition
|
755
|
-
else None,
|
756
|
-
logger=self.logger,
|
757
|
-
cursor=cursor,
|
758
|
-
supports_file_transfer=supports_file_transfer,
|
759
|
-
)
|
760
|
-
)
|
761
|
-
elif (
|
762
|
-
is_substream_without_incremental or is_without_partition_router_or_cursor
|
763
|
-
) and hasattr(declarative_stream.retriever, "stream_slicer"):
|
764
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
765
|
-
DeclarativePartitionFactory(
|
766
|
-
stream_name=declarative_stream.name,
|
767
|
-
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
768
|
-
retriever=declarative_stream.retriever,
|
769
|
-
message_repository=self._message_repository,
|
770
|
-
max_records_limit=self._limits.max_records if self._limits else None,
|
771
|
-
),
|
772
|
-
declarative_stream.retriever.stream_slicer,
|
773
|
-
slice_limit=self._limits.max_slices
|
774
|
-
if self._limits
|
775
|
-
else None, # technically not needed because create_default_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
|
776
|
-
)
|
777
|
-
|
778
|
-
final_state_cursor = FinalStateCursor(
|
779
|
-
stream_name=declarative_stream.name,
|
780
|
-
stream_namespace=declarative_stream.namespace,
|
781
|
-
message_repository=self._message_repository,
|
782
|
-
)
|
783
|
-
|
784
|
-
concurrent_streams.append(
|
785
|
-
DefaultStream(
|
786
|
-
partition_generator=partition_generator,
|
787
|
-
name=declarative_stream.name,
|
788
|
-
json_schema=declarative_stream.get_json_schema(),
|
789
|
-
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
790
|
-
cursor_field=None,
|
791
|
-
logger=self.logger,
|
792
|
-
cursor=final_state_cursor,
|
793
|
-
supports_file_transfer=supports_file_transfer,
|
794
|
-
)
|
795
|
-
)
|
796
|
-
elif (
|
797
|
-
incremental_sync_component_definition
|
798
|
-
and incremental_sync_component_definition.get("type", "")
|
799
|
-
== DatetimeBasedCursorModel.__name__
|
800
|
-
and hasattr(declarative_stream.retriever, "stream_slicer")
|
801
|
-
and isinstance(
|
802
|
-
declarative_stream.retriever.stream_slicer,
|
803
|
-
(GlobalSubstreamCursor, PerPartitionWithGlobalCursor),
|
804
|
-
)
|
805
|
-
):
|
806
|
-
stream_state = self._connector_state_manager.get_stream_state(
|
807
|
-
stream_name=declarative_stream.name, namespace=declarative_stream.namespace
|
808
|
-
)
|
809
|
-
stream_state = self._migrate_state(declarative_stream, stream_state)
|
810
|
-
|
811
|
-
partition_router = declarative_stream.retriever.stream_slicer._partition_router
|
812
|
-
|
813
|
-
perpartition_cursor = (
|
814
|
-
self._constructor.create_concurrent_cursor_from_perpartition_cursor(
|
815
|
-
state_manager=self._connector_state_manager,
|
816
|
-
model_type=DatetimeBasedCursorModel,
|
817
|
-
component_definition=incremental_sync_component_definition,
|
818
|
-
stream_name=declarative_stream.name,
|
819
|
-
stream_namespace=declarative_stream.namespace,
|
820
|
-
config=config or {},
|
821
|
-
stream_state=stream_state,
|
822
|
-
partition_router=partition_router,
|
823
|
-
)
|
824
|
-
)
|
825
|
-
|
826
|
-
retriever = self._get_retriever(declarative_stream, stream_state)
|
827
|
-
|
828
|
-
partition_generator = StreamSlicerPartitionGenerator(
|
829
|
-
DeclarativePartitionFactory(
|
830
|
-
stream_name=declarative_stream.name,
|
831
|
-
schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
|
832
|
-
retriever=retriever,
|
833
|
-
message_repository=self._message_repository,
|
834
|
-
max_records_limit=self._limits.max_records if self._limits else None,
|
835
|
-
),
|
836
|
-
perpartition_cursor,
|
837
|
-
slice_limit=self._limits.max_slices if self._limits else None,
|
838
|
-
)
|
839
|
-
|
840
|
-
concurrent_streams.append(
|
841
|
-
DefaultStream(
|
842
|
-
partition_generator=partition_generator,
|
843
|
-
name=declarative_stream.name,
|
844
|
-
json_schema=declarative_stream.get_json_schema(),
|
845
|
-
primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
|
846
|
-
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
847
|
-
logger=self.logger,
|
848
|
-
cursor=perpartition_cursor,
|
849
|
-
supports_file_transfer=supports_file_transfer,
|
850
|
-
)
|
851
|
-
)
|
852
|
-
else:
|
853
|
-
synchronous_streams.append(declarative_stream)
|
854
|
-
# TODO: Remove this. This check is necessary to safely migrate Stripe during the transition state.
|
855
|
-
# Condition below needs to ensure that concurrent support is not lost for sources that already support
|
856
|
-
# it before migration, but now are only partially migrated to declarative implementation (e.g., Stripe).
|
857
|
-
elif (
|
858
|
-
isinstance(declarative_stream, AbstractStreamFacade)
|
859
|
-
and self.is_partially_declarative
|
860
|
-
):
|
861
|
-
concurrent_streams.append(declarative_stream.get_underlying_stream())
|
862
|
-
else:
|
863
|
-
synchronous_streams.append(declarative_stream)
|
864
|
-
|
865
|
-
return concurrent_streams, synchronous_streams
|
866
|
-
|
867
|
-
def _stream_configs(
|
868
|
-
self, manifest: Mapping[str, Any], config: Mapping[str, Any]
|
869
|
-
) -> List[Dict[str, Any]]:
|
515
|
+
def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]:
|
870
516
|
# This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config
|
871
517
|
stream_configs = []
|
872
518
|
for current_stream_config in manifest.get("streams", []):
|
@@ -879,7 +525,7 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
879
525
|
parameters={},
|
880
526
|
)
|
881
527
|
|
882
|
-
if interpolated_boolean.eval(config=
|
528
|
+
if interpolated_boolean.eval(config=self._config):
|
883
529
|
stream_configs.extend(current_stream_config.get("streams", []))
|
884
530
|
else:
|
885
531
|
if "type" not in current_stream_config:
|
@@ -890,7 +536,6 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
890
536
|
def _dynamic_stream_configs(
|
891
537
|
self,
|
892
538
|
manifest: Mapping[str, Any],
|
893
|
-
config: Mapping[str, Any],
|
894
539
|
with_dynamic_stream_name: Optional[bool] = None,
|
895
540
|
) -> List[Dict[str, Any]]:
|
896
541
|
dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
|
@@ -925,14 +570,14 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
925
570
|
components_resolver = self._constructor.create_component(
|
926
571
|
model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
|
927
572
|
component_definition=components_resolver_config,
|
928
|
-
config=
|
573
|
+
config=self._config,
|
929
574
|
stream_name=dynamic_definition.get("name"),
|
930
575
|
)
|
931
576
|
else:
|
932
577
|
components_resolver = self._constructor.create_component(
|
933
578
|
model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
|
934
579
|
component_definition=components_resolver_config,
|
935
|
-
config=
|
580
|
+
config=self._config,
|
936
581
|
)
|
937
582
|
|
938
583
|
stream_template_config = dynamic_definition["stream_template"]
|
@@ -985,40 +630,6 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
985
630
|
|
986
631
|
return dynamic_stream_configs
|
987
632
|
|
988
|
-
def _is_concurrent_cursor_incremental_without_partition_routing(
|
989
|
-
self,
|
990
|
-
declarative_stream: DeclarativeStream,
|
991
|
-
incremental_sync_component_definition: Mapping[str, Any] | None,
|
992
|
-
) -> bool:
|
993
|
-
return (
|
994
|
-
incremental_sync_component_definition is not None
|
995
|
-
and bool(incremental_sync_component_definition)
|
996
|
-
and (
|
997
|
-
incremental_sync_component_definition.get("type", "")
|
998
|
-
in (DatetimeBasedCursorModel.__name__, IncrementingCountCursorModel.__name__)
|
999
|
-
)
|
1000
|
-
and hasattr(declarative_stream.retriever, "stream_slicer")
|
1001
|
-
and (
|
1002
|
-
isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
|
1003
|
-
# IncrementingCountCursorModel is hardcoded to be of type DatetimeBasedCursor
|
1004
|
-
# add isintance check here if we want to create a Declarative IncrementingCountCursor
|
1005
|
-
# or isinstance(
|
1006
|
-
# declarative_stream.retriever.stream_slicer, IncrementingCountCursor
|
1007
|
-
# )
|
1008
|
-
or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
|
1009
|
-
)
|
1010
|
-
)
|
1011
|
-
|
1012
|
-
@staticmethod
|
1013
|
-
def _get_retriever(
|
1014
|
-
declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
|
1015
|
-
) -> Retriever:
|
1016
|
-
if declarative_stream and isinstance(declarative_stream.retriever, SimpleRetriever):
|
1017
|
-
# We zero it out here, but since this is a cursor reference, the state is still properly
|
1018
|
-
# instantiated for the other components that reference it
|
1019
|
-
declarative_stream.retriever.cursor = None
|
1020
|
-
return declarative_stream.retriever
|
1021
|
-
|
1022
633
|
@staticmethod
|
1023
634
|
def _select_streams(
|
1024
635
|
streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
|
@@ -1031,27 +642,3 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
|
|
1031
642
|
abstract_streams.append(stream_instance)
|
1032
643
|
|
1033
644
|
return abstract_streams
|
1034
|
-
|
1035
|
-
@staticmethod
|
1036
|
-
def _remove_concurrent_streams_from_catalog(
|
1037
|
-
catalog: ConfiguredAirbyteCatalog,
|
1038
|
-
concurrent_stream_names: set[str],
|
1039
|
-
) -> ConfiguredAirbyteCatalog:
|
1040
|
-
return ConfiguredAirbyteCatalog(
|
1041
|
-
streams=[
|
1042
|
-
stream
|
1043
|
-
for stream in catalog.streams
|
1044
|
-
if stream.stream.name not in concurrent_stream_names
|
1045
|
-
]
|
1046
|
-
)
|
1047
|
-
|
1048
|
-
@staticmethod
|
1049
|
-
def _migrate_state(
|
1050
|
-
declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
|
1051
|
-
) -> MutableMapping[str, Any]:
|
1052
|
-
for state_migration in declarative_stream.state_migrations:
|
1053
|
-
if state_migration.should_migrate(stream_state):
|
1054
|
-
# The state variable is expected to be mutable but the migrate method returns an immutable mapping.
|
1055
|
-
stream_state = dict(state_migration.migrate(stream_state))
|
1056
|
-
|
1057
|
-
return stream_state
|