airbyte-cdk 6.62.0.dev3__py3-none-any.whl → 6.62.0.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. airbyte_cdk/__init__.py +2 -2
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
  3. airbyte_cdk/connector_builder/main.py +2 -2
  4. airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
  5. airbyte_cdk/{sources → legacy/sources}/declarative/declarative_stream.py +2 -2
  6. airbyte_cdk/legacy/sources/declarative/incremental/__init__.py +30 -0
  7. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/datetime_based_cursor.py +1 -1
  8. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/global_substream_cursor.py +4 -2
  9. airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +1 -1
  10. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/per_partition_with_global.py +8 -6
  11. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/resumable_full_refresh_cursor.py +1 -1
  12. airbyte_cdk/manifest_server/Dockerfile +2 -2
  13. airbyte_cdk/manifest_server/README.md +22 -0
  14. airbyte_cdk/manifest_server/api_models/__init__.py +2 -0
  15. airbyte_cdk/manifest_server/api_models/manifest.py +12 -0
  16. airbyte_cdk/manifest_server/api_models/stream.py +2 -2
  17. airbyte_cdk/manifest_server/app.py +6 -0
  18. airbyte_cdk/manifest_server/cli/_common.py +1 -0
  19. airbyte_cdk/manifest_server/command_processor/processor.py +2 -5
  20. airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
  21. airbyte_cdk/manifest_server/helpers/__init__.py +0 -0
  22. airbyte_cdk/manifest_server/helpers/tracing.py +36 -0
  23. airbyte_cdk/manifest_server/routers/manifest.py +38 -2
  24. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +6 -3
  25. airbyte_cdk/sources/declarative/checks/check_stream.py +6 -3
  26. airbyte_cdk/sources/declarative/checks/connection_checker.py +5 -2
  27. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +38 -451
  28. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -27
  29. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -24
  30. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +58 -5
  31. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -2
  32. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1 -21
  33. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -5
  34. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +32 -24
  35. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -3
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -2
  37. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +21 -7
  38. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +1 -1
  39. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
  40. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +0 -4
  41. airbyte_cdk/sources/utils/schema_helpers.py +29 -9
  42. airbyte_cdk/sources/utils/transform.py +25 -13
  43. airbyte_cdk/utils/spec_schema_transformations.py +7 -5
  44. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/METADATA +4 -2
  45. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/RECORD +51 -49
  46. /airbyte_cdk/{sources → legacy/sources}/declarative/incremental/declarative_cursor.py +0 -0
  47. /airbyte_cdk/manifest_server/{auth.py → helpers/auth.py} +0 -0
  48. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/LICENSE.txt +0 -0
  49. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/LICENSE_SHORT +0 -0
  50. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/WHEEL +0 -0
  51. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/entry_points.txt +0 -0
@@ -11,15 +11,11 @@ from typing import (
11
11
  Any,
12
12
  ClassVar,
13
13
  Dict,
14
- Generic,
15
14
  Iterator,
16
15
  List,
17
16
  Mapping,
18
- MutableMapping,
19
17
  Optional,
20
18
  Set,
21
- Tuple,
22
- Union,
23
19
  )
24
20
 
25
21
  import orjson
@@ -43,37 +39,22 @@ from airbyte_cdk.models import (
43
39
  ConfiguredAirbyteCatalog,
44
40
  ConnectorSpecification,
45
41
  FailureType,
42
+ Status,
46
43
  )
47
44
  from airbyte_cdk.models.airbyte_protocol_serializers import AirbyteMessageSerializer
48
- from airbyte_cdk.sources.abstract_source import AbstractSource
45
+ from airbyte_cdk.sources import Source
49
46
  from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
50
47
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
51
48
  from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
52
49
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
53
50
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
54
- from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
55
- from airbyte_cdk.sources.declarative.incremental import (
56
- ConcurrentPerPartitionCursor,
57
- GlobalSubstreamCursor,
58
- )
59
- from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
60
- from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
61
- PerPartitionWithGlobalCursor,
62
- )
63
51
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedBoolean
64
- from airbyte_cdk.sources.declarative.models import FileUploader
65
52
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
66
53
  ConcurrencyLevel as ConcurrencyLevelModel,
67
54
  )
68
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
69
- DatetimeBasedCursor as DatetimeBasedCursorModel,
70
- )
71
55
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
72
56
  DeclarativeStream as DeclarativeStreamModel,
73
57
  )
74
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
75
- IncrementingCountCursor as IncrementingCountCursorModel,
76
- )
77
58
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
78
59
  Spec as SpecModel,
79
60
  )
@@ -95,24 +76,12 @@ from airbyte_cdk.sources.declarative.parsers.manifest_reference_resolver import
95
76
  from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
96
77
  ModelToComponentFactory,
97
78
  )
98
- from airbyte_cdk.sources.declarative.partition_routers import AsyncJobPartitionRouter
99
79
  from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING
100
- from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, Retriever, SimpleRetriever
101
80
  from airbyte_cdk.sources.declarative.spec.spec import Spec
102
- from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
103
- DeclarativePartitionFactory,
104
- StreamSlicerPartitionGenerator,
105
- )
106
81
  from airbyte_cdk.sources.declarative.types import Config, ConnectionDefinition
107
82
  from airbyte_cdk.sources.message.concurrent_repository import ConcurrentMessageRepository
108
- from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
109
- from airbyte_cdk.sources.source import TState
110
- from airbyte_cdk.sources.streams import Stream
83
+ from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
111
84
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
112
- from airbyte_cdk.sources.streams.concurrent.abstract_stream_facade import AbstractStreamFacade
113
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, FinalStateCursor
114
- from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
115
- from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
116
85
  from airbyte_cdk.sources.streams.concurrent.partitions.types import QueueItem
117
86
  from airbyte_cdk.sources.utils.slice_logger import (
118
87
  AlwaysLogSliceLogger,
@@ -155,23 +124,17 @@ def _get_declarative_component_schema() -> Dict[str, Any]:
155
124
  )
156
125
 
157
126
 
158
- # todo: AbstractSource can be removed once we've completely moved off all legacy synchronous CDK code paths
159
- # and replaced with implementing the source.py:Source class
160
- #
161
- # todo: The `ConcurrentDeclarativeSource.message_repository()` method can also be removed once AbstractSource
162
- # is no longer inherited from since the only external dependency is from that class.
163
- #
164
- # todo: It is worth investigating removal of the Generic[TState] since it will always be Optional[List[AirbyteStateMessage]]
165
- class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
127
+ class ConcurrentDeclarativeSource(Source):
166
128
  # By default, we defer to a value of 2. A value lower than could cause a PartitionEnqueuer to be stuck in a state of deadlock
167
129
  # because it has hit the limit of futures but not partition reader is consuming them.
168
130
  _LOWEST_SAFE_CONCURRENCY_LEVEL = 2
169
131
 
170
132
  def __init__(
171
133
  self,
172
- catalog: Optional[ConfiguredAirbyteCatalog],
173
- config: Optional[Mapping[str, Any]],
174
- state: TState,
134
+ catalog: Optional[ConfiguredAirbyteCatalog] = None,
135
+ config: Optional[Mapping[str, Any]] = None,
136
+ state: Optional[List[AirbyteStateMessage]] = None,
137
+ *,
175
138
  source_config: ConnectionDefinition,
176
139
  debug: bool = False,
177
140
  emit_connector_builder_messages: bool = False,
@@ -395,17 +358,6 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
395
358
  """
396
359
  return self._source_config
397
360
 
398
- # TODO: Deprecate this class once ConcurrentDeclarativeSource no longer inherits AbstractSource
399
- @property
400
- def message_repository(self) -> MessageRepository:
401
- return self._message_repository
402
-
403
- # TODO: Remove this. This property is necessary to safely migrate Stripe during the transition state.
404
- @property
405
- def is_partially_declarative(self) -> bool:
406
- """This flag used to avoid unexpected AbstractStreamFacade processing as concurrent streams."""
407
- return False
408
-
409
361
  def deprecation_warnings(self) -> List[ConnectorBuilderLogMessage]:
410
362
  return self._constructor.get_model_deprecations()
411
363
 
@@ -416,48 +368,23 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
416
368
  catalog: ConfiguredAirbyteCatalog,
417
369
  state: Optional[List[AirbyteStateMessage]] = None,
418
370
  ) -> Iterator[AirbyteMessage]:
419
- concurrent_streams, _ = self._group_streams(config=config)
420
-
421
- # ConcurrentReadProcessor pops streams that are finished being read so before syncing, the names of
422
- # the concurrent streams must be saved so that they can be removed from the catalog before starting
423
- # synchronous streams
424
- if len(concurrent_streams) > 0:
425
- concurrent_stream_names = set(
426
- [concurrent_stream.name for concurrent_stream in concurrent_streams]
427
- )
428
-
429
- selected_concurrent_streams = self._select_streams(
430
- streams=concurrent_streams, configured_catalog=catalog
431
- )
432
- # It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
433
- # This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
434
- if selected_concurrent_streams:
435
- yield from self._concurrent_source.read(selected_concurrent_streams)
436
-
437
- # Sync all streams that are not concurrent compatible. We filter out concurrent streams because the
438
- # existing AbstractSource.read() implementation iterates over the catalog when syncing streams. Many
439
- # of which were already synced using the Concurrent CDK
440
- filtered_catalog = self._remove_concurrent_streams_from_catalog(
441
- catalog=catalog, concurrent_stream_names=concurrent_stream_names
442
- )
443
- else:
444
- filtered_catalog = catalog
445
-
446
- # It is no need run read for synchronous streams if they are not exists.
447
- if not filtered_catalog.streams:
448
- return
371
+ selected_concurrent_streams = self._select_streams(
372
+ streams=self.streams(config=self._config), # type: ignore # We are migrating away from the DeclarativeStream implementation and streams() only returns the concurrent-compatible AbstractStream. To preserve compatibility, we retain the existing method interface
373
+ configured_catalog=catalog,
374
+ )
449
375
 
450
- yield from super().read(logger, config, filtered_catalog, state)
376
+ # It would appear that passing in an empty set of streams causes an infinite loop in ConcurrentReadProcessor.
377
+ # This is also evident in concurrent_source_adapter.py so I'll leave this out of scope to fix for now
378
+ if len(selected_concurrent_streams) > 0:
379
+ yield from self._concurrent_source.read(selected_concurrent_streams)
451
380
 
452
381
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
453
- concurrent_streams, synchronous_streams = self._group_streams(config=config)
454
382
  return AirbyteCatalog(
455
- streams=[
456
- stream.as_airbyte_stream() for stream in concurrent_streams + synchronous_streams
457
- ]
383
+ streams=[stream.as_airbyte_stream() for stream in self.streams(config=self._config)]
458
384
  )
459
385
 
460
- def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
386
+ # todo: add PR comment about whether we can change the signature to List[AbstractStream]
387
+ def streams(self, config: Mapping[str, Any]) -> List[AbstractStream]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
461
388
  """
462
389
  The `streams` method is used as part of the AbstractSource in the following cases:
463
390
  * ConcurrentDeclarativeSource.check -> ManifestDeclarativeSource.check -> AbstractSource.check -> DeclarativeSource.check_connection -> CheckStream.check_connection -> streams
@@ -468,15 +395,13 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
468
395
  """
469
396
 
470
397
  if self._spec_component:
471
- self._spec_component.validate_config(config)
398
+ self._spec_component.validate_config(self._config)
472
399
 
473
- stream_configs = (
474
- self._stream_configs(self._source_config, config=config) + self.dynamic_streams
475
- )
400
+ stream_configs = self._stream_configs(self._source_config) + self.dynamic_streams
476
401
 
477
402
  api_budget_model = self._source_config.get("api_budget")
478
403
  if api_budget_model:
479
- self._constructor.set_api_budget(api_budget_model, config)
404
+ self._constructor.set_api_budget(api_budget_model, self._config)
480
405
 
481
406
  source_streams = [
482
407
  self._constructor.create_component(
@@ -486,7 +411,7 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
486
411
  else DeclarativeStreamModel
487
412
  ),
488
413
  stream_config,
489
- config,
414
+ self._config,
490
415
  emit_connector_builder_messages=self._emit_connector_builder_messages,
491
416
  )
492
417
  for stream_config in self._initialize_cache_for_parent_streams(deepcopy(stream_configs))
@@ -558,315 +483,36 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
558
483
  )
559
484
 
560
485
  def check(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteConnectionStatus:
561
- return super().check(logger, config)
486
+ check = self._source_config.get("check")
487
+ if not check:
488
+ raise ValueError(f"Missing 'check' component definition within the manifest.")
562
489
 
563
- def check_connection(
564
- self, logger: logging.Logger, config: Mapping[str, Any]
565
- ) -> Tuple[bool, Any]:
566
- """
567
- :param logger: The source logger
568
- :param config: The user-provided configuration as specified by the source's spec.
569
- This usually contains information required to check connection e.g. tokens, secrets and keys etc.
570
- :return: A tuple of (boolean, error). If boolean is true, then the connection check is successful
571
- and we can connect to the underlying data source using the provided configuration.
572
- Otherwise, the input config cannot be used to connect to the underlying data source,
573
- and the "error" object should describe what went wrong.
574
- The error object will be cast to string to display the problem to the user.
575
- """
576
- return self.connection_checker.check_connection(self, logger, config)
577
-
578
- @property
579
- def connection_checker(self) -> ConnectionChecker:
580
- check = self._source_config["check"]
581
490
  if "type" not in check:
582
491
  check["type"] = "CheckStream"
583
- check_stream = self._constructor.create_component(
492
+ connection_checker = self._constructor.create_component(
584
493
  COMPONENTS_CHECKER_TYPE_MAPPING[check["type"]],
585
494
  check,
586
495
  dict(),
587
496
  emit_connector_builder_messages=self._emit_connector_builder_messages,
588
497
  )
589
- if isinstance(check_stream, ConnectionChecker):
590
- return check_stream
591
- else:
498
+ if not isinstance(connection_checker, ConnectionChecker):
592
499
  raise ValueError(
593
- f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}"
500
+ f"Expected to generate a ConnectionChecker component, but received {connection_checker.__class__}"
594
501
  )
595
502
 
503
+ check_succeeded, error = connection_checker.check_connection(self, logger, self._config)
504
+ if not check_succeeded:
505
+ return AirbyteConnectionStatus(status=Status.FAILED, message=repr(error))
506
+ return AirbyteConnectionStatus(status=Status.SUCCEEDED)
507
+
596
508
  @property
597
509
  def dynamic_streams(self) -> List[Dict[str, Any]]:
598
510
  return self._dynamic_stream_configs(
599
511
  manifest=self._source_config,
600
- config=self._config,
601
512
  with_dynamic_stream_name=True,
602
513
  )
603
514
 
604
- def _group_streams(
605
- self, config: Mapping[str, Any]
606
- ) -> Tuple[List[AbstractStream], List[Stream]]:
607
- concurrent_streams: List[AbstractStream] = []
608
- synchronous_streams: List[Stream] = []
609
-
610
- # Combine streams and dynamic_streams. Note: both cannot be empty at the same time,
611
- # and this is validated during the initialization of the source.
612
- streams = self._stream_configs(self._source_config, config) + self._dynamic_stream_configs(
613
- self._source_config, config
614
- )
615
-
616
- name_to_stream_mapping = {stream["name"]: stream for stream in streams}
617
-
618
- for declarative_stream in self.streams(config=config):
619
- # Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
620
- # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
621
- # so we need to treat them as synchronous
622
-
623
- if isinstance(declarative_stream, AbstractStream):
624
- concurrent_streams.append(declarative_stream)
625
- continue
626
-
627
- supports_file_transfer = (
628
- isinstance(declarative_stream, DeclarativeStream)
629
- and "file_uploader" in name_to_stream_mapping[declarative_stream.name]
630
- )
631
-
632
- if (
633
- isinstance(declarative_stream, DeclarativeStream)
634
- and name_to_stream_mapping[declarative_stream.name]["type"]
635
- == "StateDelegatingStream"
636
- ):
637
- stream_state = self._connector_state_manager.get_stream_state(
638
- stream_name=declarative_stream.name, namespace=declarative_stream.namespace
639
- )
640
-
641
- name_to_stream_mapping[declarative_stream.name] = (
642
- name_to_stream_mapping[declarative_stream.name]["incremental_stream"]
643
- if stream_state
644
- else name_to_stream_mapping[declarative_stream.name]["full_refresh_stream"]
645
- )
646
-
647
- if isinstance(declarative_stream, DeclarativeStream) and (
648
- name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
649
- == "SimpleRetriever"
650
- or name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
651
- == "AsyncRetriever"
652
- ):
653
- incremental_sync_component_definition = name_to_stream_mapping[
654
- declarative_stream.name
655
- ].get("incremental_sync")
656
-
657
- partition_router_component_definition = (
658
- name_to_stream_mapping[declarative_stream.name]
659
- .get("retriever", {})
660
- .get("partition_router")
661
- )
662
- is_without_partition_router_or_cursor = not bool(
663
- incremental_sync_component_definition
664
- ) and not bool(partition_router_component_definition)
665
-
666
- is_substream_without_incremental = (
667
- partition_router_component_definition
668
- and not incremental_sync_component_definition
669
- )
670
-
671
- if self._is_concurrent_cursor_incremental_without_partition_routing(
672
- declarative_stream, incremental_sync_component_definition
673
- ):
674
- stream_state = self._connector_state_manager.get_stream_state(
675
- stream_name=declarative_stream.name, namespace=declarative_stream.namespace
676
- )
677
- stream_state = self._migrate_state(declarative_stream, stream_state)
678
-
679
- retriever = self._get_retriever(declarative_stream, stream_state)
680
-
681
- if isinstance(declarative_stream.retriever, AsyncRetriever) and isinstance(
682
- declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter
683
- ):
684
- cursor = declarative_stream.retriever.stream_slicer.stream_slicer
685
-
686
- if not isinstance(cursor, ConcurrentCursor | ConcurrentPerPartitionCursor):
687
- # This should never happen since we instantiate ConcurrentCursor in
688
- # model_to_component_factory.py
689
- raise ValueError(
690
- f"Expected AsyncJobPartitionRouter stream_slicer to be of type ConcurrentCursor, but received{cursor.__class__}"
691
- )
692
-
693
- partition_generator = StreamSlicerPartitionGenerator(
694
- partition_factory=DeclarativePartitionFactory(
695
- stream_name=declarative_stream.name,
696
- schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
697
- retriever=retriever,
698
- message_repository=self._message_repository,
699
- max_records_limit=self._limits.max_records
700
- if self._limits
701
- else None,
702
- ),
703
- stream_slicer=declarative_stream.retriever.stream_slicer,
704
- slice_limit=self._limits.max_slices
705
- if self._limits
706
- else None, # technically not needed because create_default_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
707
- )
708
- else:
709
- if (
710
- incremental_sync_component_definition
711
- and incremental_sync_component_definition.get("type")
712
- == IncrementingCountCursorModel.__name__
713
- ):
714
- cursor = self._constructor.create_concurrent_cursor_from_incrementing_count_cursor(
715
- model_type=IncrementingCountCursorModel,
716
- component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
717
- stream_name=declarative_stream.name,
718
- stream_namespace=declarative_stream.namespace,
719
- config=config or {},
720
- )
721
- else:
722
- cursor = self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
723
- model_type=DatetimeBasedCursorModel,
724
- component_definition=incremental_sync_component_definition, # type: ignore # Not None because of the if condition above
725
- stream_name=declarative_stream.name,
726
- stream_namespace=declarative_stream.namespace,
727
- config=config or {},
728
- stream_state_migrations=declarative_stream.state_migrations,
729
- )
730
- partition_generator = StreamSlicerPartitionGenerator(
731
- partition_factory=DeclarativePartitionFactory(
732
- stream_name=declarative_stream.name,
733
- schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
734
- retriever=retriever,
735
- message_repository=self._message_repository,
736
- max_records_limit=self._limits.max_records
737
- if self._limits
738
- else None,
739
- ),
740
- stream_slicer=cursor,
741
- slice_limit=self._limits.max_slices if self._limits else None,
742
- )
743
-
744
- concurrent_streams.append(
745
- DefaultStream(
746
- partition_generator=partition_generator,
747
- name=declarative_stream.name,
748
- json_schema=declarative_stream.get_json_schema(),
749
- primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
750
- cursor_field=cursor.cursor_field.cursor_field_key
751
- if hasattr(cursor, "cursor_field")
752
- and hasattr(
753
- cursor.cursor_field, "cursor_field_key"
754
- ) # FIXME this will need to be updated once we do the per partition
755
- else None,
756
- logger=self.logger,
757
- cursor=cursor,
758
- supports_file_transfer=supports_file_transfer,
759
- )
760
- )
761
- elif (
762
- is_substream_without_incremental or is_without_partition_router_or_cursor
763
- ) and hasattr(declarative_stream.retriever, "stream_slicer"):
764
- partition_generator = StreamSlicerPartitionGenerator(
765
- DeclarativePartitionFactory(
766
- stream_name=declarative_stream.name,
767
- schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
768
- retriever=declarative_stream.retriever,
769
- message_repository=self._message_repository,
770
- max_records_limit=self._limits.max_records if self._limits else None,
771
- ),
772
- declarative_stream.retriever.stream_slicer,
773
- slice_limit=self._limits.max_slices
774
- if self._limits
775
- else None, # technically not needed because create_default_stream() -> create_simple_retriever() will apply the decorator. But for consistency and depending how we build create_default_stream, this may be needed later
776
- )
777
-
778
- final_state_cursor = FinalStateCursor(
779
- stream_name=declarative_stream.name,
780
- stream_namespace=declarative_stream.namespace,
781
- message_repository=self._message_repository,
782
- )
783
-
784
- concurrent_streams.append(
785
- DefaultStream(
786
- partition_generator=partition_generator,
787
- name=declarative_stream.name,
788
- json_schema=declarative_stream.get_json_schema(),
789
- primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
790
- cursor_field=None,
791
- logger=self.logger,
792
- cursor=final_state_cursor,
793
- supports_file_transfer=supports_file_transfer,
794
- )
795
- )
796
- elif (
797
- incremental_sync_component_definition
798
- and incremental_sync_component_definition.get("type", "")
799
- == DatetimeBasedCursorModel.__name__
800
- and hasattr(declarative_stream.retriever, "stream_slicer")
801
- and isinstance(
802
- declarative_stream.retriever.stream_slicer,
803
- (GlobalSubstreamCursor, PerPartitionWithGlobalCursor),
804
- )
805
- ):
806
- stream_state = self._connector_state_manager.get_stream_state(
807
- stream_name=declarative_stream.name, namespace=declarative_stream.namespace
808
- )
809
- stream_state = self._migrate_state(declarative_stream, stream_state)
810
-
811
- partition_router = declarative_stream.retriever.stream_slicer._partition_router
812
-
813
- perpartition_cursor = (
814
- self._constructor.create_concurrent_cursor_from_perpartition_cursor(
815
- state_manager=self._connector_state_manager,
816
- model_type=DatetimeBasedCursorModel,
817
- component_definition=incremental_sync_component_definition,
818
- stream_name=declarative_stream.name,
819
- stream_namespace=declarative_stream.namespace,
820
- config=config or {},
821
- stream_state=stream_state,
822
- partition_router=partition_router,
823
- )
824
- )
825
-
826
- retriever = self._get_retriever(declarative_stream, stream_state)
827
-
828
- partition_generator = StreamSlicerPartitionGenerator(
829
- DeclarativePartitionFactory(
830
- stream_name=declarative_stream.name,
831
- schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
832
- retriever=retriever,
833
- message_repository=self._message_repository,
834
- max_records_limit=self._limits.max_records if self._limits else None,
835
- ),
836
- perpartition_cursor,
837
- slice_limit=self._limits.max_slices if self._limits else None,
838
- )
839
-
840
- concurrent_streams.append(
841
- DefaultStream(
842
- partition_generator=partition_generator,
843
- name=declarative_stream.name,
844
- json_schema=declarative_stream.get_json_schema(),
845
- primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
846
- cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
847
- logger=self.logger,
848
- cursor=perpartition_cursor,
849
- supports_file_transfer=supports_file_transfer,
850
- )
851
- )
852
- else:
853
- synchronous_streams.append(declarative_stream)
854
- # TODO: Remove this. This check is necessary to safely migrate Stripe during the transition state.
855
- # Condition below needs to ensure that concurrent support is not lost for sources that already support
856
- # it before migration, but now are only partially migrated to declarative implementation (e.g., Stripe).
857
- elif (
858
- isinstance(declarative_stream, AbstractStreamFacade)
859
- and self.is_partially_declarative
860
- ):
861
- concurrent_streams.append(declarative_stream.get_underlying_stream())
862
- else:
863
- synchronous_streams.append(declarative_stream)
864
-
865
- return concurrent_streams, synchronous_streams
866
-
867
- def _stream_configs(
868
- self, manifest: Mapping[str, Any], config: Mapping[str, Any]
869
- ) -> List[Dict[str, Any]]:
515
+ def _stream_configs(self, manifest: Mapping[str, Any]) -> List[Dict[str, Any]]:
870
516
  # This has a warning flag for static, but after we finish part 4 we'll replace manifest with self._source_config
871
517
  stream_configs = []
872
518
  for current_stream_config in manifest.get("streams", []):
@@ -879,7 +525,7 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
879
525
  parameters={},
880
526
  )
881
527
 
882
- if interpolated_boolean.eval(config=config):
528
+ if interpolated_boolean.eval(config=self._config):
883
529
  stream_configs.extend(current_stream_config.get("streams", []))
884
530
  else:
885
531
  if "type" not in current_stream_config:
@@ -890,7 +536,6 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
890
536
  def _dynamic_stream_configs(
891
537
  self,
892
538
  manifest: Mapping[str, Any],
893
- config: Mapping[str, Any],
894
539
  with_dynamic_stream_name: Optional[bool] = None,
895
540
  ) -> List[Dict[str, Any]]:
896
541
  dynamic_stream_definitions: List[Dict[str, Any]] = manifest.get("dynamic_streams", [])
@@ -925,14 +570,14 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
925
570
  components_resolver = self._constructor.create_component(
926
571
  model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
927
572
  component_definition=components_resolver_config,
928
- config=config,
573
+ config=self._config,
929
574
  stream_name=dynamic_definition.get("name"),
930
575
  )
931
576
  else:
932
577
  components_resolver = self._constructor.create_component(
933
578
  model_type=COMPONENTS_RESOLVER_TYPE_MAPPING[resolver_type],
934
579
  component_definition=components_resolver_config,
935
- config=config,
580
+ config=self._config,
936
581
  )
937
582
 
938
583
  stream_template_config = dynamic_definition["stream_template"]
@@ -985,40 +630,6 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
985
630
 
986
631
  return dynamic_stream_configs
987
632
 
988
- def _is_concurrent_cursor_incremental_without_partition_routing(
989
- self,
990
- declarative_stream: DeclarativeStream,
991
- incremental_sync_component_definition: Mapping[str, Any] | None,
992
- ) -> bool:
993
- return (
994
- incremental_sync_component_definition is not None
995
- and bool(incremental_sync_component_definition)
996
- and (
997
- incremental_sync_component_definition.get("type", "")
998
- in (DatetimeBasedCursorModel.__name__, IncrementingCountCursorModel.__name__)
999
- )
1000
- and hasattr(declarative_stream.retriever, "stream_slicer")
1001
- and (
1002
- isinstance(declarative_stream.retriever.stream_slicer, DatetimeBasedCursor)
1003
- # IncrementingCountCursorModel is hardcoded to be of type DatetimeBasedCursor
1004
- # add isintance check here if we want to create a Declarative IncrementingCountCursor
1005
- # or isinstance(
1006
- # declarative_stream.retriever.stream_slicer, IncrementingCountCursor
1007
- # )
1008
- or isinstance(declarative_stream.retriever.stream_slicer, AsyncJobPartitionRouter)
1009
- )
1010
- )
1011
-
1012
- @staticmethod
1013
- def _get_retriever(
1014
- declarative_stream: DeclarativeStream, stream_state: Mapping[str, Any]
1015
- ) -> Retriever:
1016
- if declarative_stream and isinstance(declarative_stream.retriever, SimpleRetriever):
1017
- # We zero it out here, but since this is a cursor reference, the state is still properly
1018
- # instantiated for the other components that reference it
1019
- declarative_stream.retriever.cursor = None
1020
- return declarative_stream.retriever
1021
-
1022
633
  @staticmethod
1023
634
  def _select_streams(
1024
635
  streams: List[AbstractStream], configured_catalog: ConfiguredAirbyteCatalog
@@ -1031,27 +642,3 @@ class ConcurrentDeclarativeSource(AbstractSource, Generic[TState]):
1031
642
  abstract_streams.append(stream_instance)
1032
643
 
1033
644
  return abstract_streams
1034
-
1035
- @staticmethod
1036
- def _remove_concurrent_streams_from_catalog(
1037
- catalog: ConfiguredAirbyteCatalog,
1038
- concurrent_stream_names: set[str],
1039
- ) -> ConfiguredAirbyteCatalog:
1040
- return ConfiguredAirbyteCatalog(
1041
- streams=[
1042
- stream
1043
- for stream in catalog.streams
1044
- if stream.stream.name not in concurrent_stream_names
1045
- ]
1046
- )
1047
-
1048
- @staticmethod
1049
- def _migrate_state(
1050
- declarative_stream: DeclarativeStream, stream_state: MutableMapping[str, Any]
1051
- ) -> MutableMapping[str, Any]:
1052
- for state_migration in declarative_stream.state_migrations:
1053
- if state_migration.should_migrate(stream_state):
1054
- # The state variable is expected to be mutable but the migrate method returns an immutable mapping.
1055
- stream_state = dict(state_migration.migrate(stream_state))
1056
-
1057
- return stream_state