airbyte-cdk 6.62.0.dev3__py3-none-any.whl → 6.62.0.dev4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. airbyte_cdk/__init__.py +2 -2
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +7 -7
  3. airbyte_cdk/connector_builder/main.py +2 -2
  4. airbyte_cdk/connector_builder/test_reader/reader.py +2 -2
  5. airbyte_cdk/{sources → legacy/sources}/declarative/declarative_stream.py +2 -2
  6. airbyte_cdk/legacy/sources/declarative/incremental/__init__.py +30 -0
  7. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/datetime_based_cursor.py +1 -1
  8. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/global_substream_cursor.py +4 -2
  9. airbyte_cdk/legacy/sources/declarative/incremental/per_partition_cursor.py +1 -1
  10. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/per_partition_with_global.py +8 -6
  11. airbyte_cdk/{sources → legacy/sources}/declarative/incremental/resumable_full_refresh_cursor.py +1 -1
  12. airbyte_cdk/manifest_server/Dockerfile +2 -2
  13. airbyte_cdk/manifest_server/README.md +22 -0
  14. airbyte_cdk/manifest_server/api_models/__init__.py +2 -0
  15. airbyte_cdk/manifest_server/api_models/manifest.py +12 -0
  16. airbyte_cdk/manifest_server/api_models/stream.py +2 -2
  17. airbyte_cdk/manifest_server/app.py +6 -0
  18. airbyte_cdk/manifest_server/cli/_common.py +1 -0
  19. airbyte_cdk/manifest_server/command_processor/processor.py +2 -5
  20. airbyte_cdk/manifest_server/command_processor/utils.py +1 -1
  21. airbyte_cdk/manifest_server/helpers/__init__.py +0 -0
  22. airbyte_cdk/manifest_server/helpers/tracing.py +36 -0
  23. airbyte_cdk/manifest_server/routers/manifest.py +38 -2
  24. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +6 -3
  25. airbyte_cdk/sources/declarative/checks/check_stream.py +6 -3
  26. airbyte_cdk/sources/declarative/checks/connection_checker.py +5 -2
  27. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +38 -451
  28. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -27
  29. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -24
  30. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +58 -5
  31. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -2
  32. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +1 -21
  33. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +0 -5
  34. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +32 -24
  35. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -3
  36. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -2
  37. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +21 -7
  38. airbyte_cdk/sources/declarative/transformations/keys_replace_transformation.py +1 -1
  39. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -1
  40. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +0 -4
  41. airbyte_cdk/sources/utils/schema_helpers.py +29 -9
  42. airbyte_cdk/sources/utils/transform.py +25 -13
  43. airbyte_cdk/utils/spec_schema_transformations.py +7 -5
  44. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/METADATA +4 -2
  45. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/RECORD +51 -49
  46. /airbyte_cdk/{sources → legacy/sources}/declarative/incremental/declarative_cursor.py +0 -0
  47. /airbyte_cdk/manifest_server/{auth.py → helpers/auth.py} +0 -0
  48. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/LICENSE.txt +0 -0
  49. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/LICENSE_SHORT +0 -0
  50. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/WHEEL +0 -0
  51. {airbyte_cdk-6.62.0.dev3.dist-info → airbyte_cdk-6.62.0.dev4.dist-info}/entry_points.txt +0 -0
@@ -593,32 +593,6 @@ definitions:
593
593
  $parameters:
594
594
  type: object
595
595
  additionalProperties: true
596
- CustomIncrementalSync:
597
- title: Custom Incremental Sync
598
- description: Incremental component whose behavior is derived from a custom code implementation of the connector.
599
- type: object
600
- additionalProperties: true
601
- required:
602
- - type
603
- - class_name
604
- - cursor_field
605
- properties:
606
- type:
607
- type: string
608
- enum: [CustomIncrementalSync]
609
- class_name:
610
- title: Class Name
611
- description: Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_<name>.<package>.<class_name>`.
612
- type: string
613
- additionalProperties: true
614
- examples:
615
- - "source_railz.components.MyCustomIncrementalSync"
616
- cursor_field:
617
- description: The location of the value on a record that will be used as a bookmark during sync.
618
- type: string
619
- $parameters:
620
- type: object
621
- additionalProperties: true
622
596
  CustomPaginationStrategy:
623
597
  title: Custom Pagination Strategy
624
598
  description: Pagination strategy component whose behavior is derived from a custom code implementation of the connector.
@@ -1551,7 +1525,6 @@ definitions:
1551
1525
  anyOf:
1552
1526
  - "$ref": "#/definitions/DatetimeBasedCursor"
1553
1527
  - "$ref": "#/definitions/IncrementingCountCursor"
1554
- - "$ref": "#/definitions/CustomIncrementalSync"
1555
1528
  primary_key:
1556
1529
  title: Primary Key
1557
1530
  "$ref": "#/definitions/PrimaryKey"
@@ -2,36 +2,12 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.legacy.sources.declarative.incremental.per_partition_cursor import (
6
- CursorFactory,
7
- PerPartitionCursor,
8
- )
9
5
  from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
10
6
  ConcurrentCursorFactory,
11
7
  ConcurrentPerPartitionCursor,
12
8
  )
13
- from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
14
- from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
15
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
16
- GlobalSubstreamCursor,
17
- )
18
- from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
19
- PerPartitionWithGlobalCursor,
20
- )
21
- from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor import (
22
- ChildPartitionResumableFullRefreshCursor,
23
- ResumableFullRefreshCursor,
24
- )
25
9
 
26
10
  __all__ = [
27
- "CursorFactory",
28
11
  "ConcurrentCursorFactory",
29
12
  "ConcurrentPerPartitionCursor",
30
- "DatetimeBasedCursor",
31
- "DeclarativeCursor",
32
- "GlobalSubstreamCursor",
33
- "PerPartitionCursor",
34
- "PerPartitionWithGlobalCursor",
35
- "ResumableFullRefreshCursor",
36
- "ChildPartitionResumableFullRefreshCursor",
37
13
  ]
@@ -9,7 +9,7 @@ import time
9
9
  from collections import OrderedDict
10
10
  from copy import deepcopy
11
11
  from datetime import timedelta
12
- from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional
12
+ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, TypeVar
13
13
 
14
14
  from airbyte_cdk.models import (
15
15
  AirbyteStateBlob,
@@ -19,10 +19,6 @@ from airbyte_cdk.models import (
19
19
  StreamDescriptor,
20
20
  )
21
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
22
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
23
- Timer,
24
- iterate_with_last_flag_and_state,
25
- )
26
22
  from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
27
23
  from airbyte_cdk.sources.message import MessageRepository
28
24
  from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
@@ -38,6 +34,63 @@ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
38
34
  logger = logging.getLogger("airbyte")
39
35
 
40
36
 
37
+ T = TypeVar("T")
38
+
39
+
40
+ def iterate_with_last_flag_and_state(
41
+ generator: Iterable[T], get_stream_state_func: Callable[[], Optional[Mapping[str, StreamState]]]
42
+ ) -> Iterable[tuple[T, bool, Any]]:
43
+ """
44
+ Iterates over the given generator, yielding tuples containing the element, a flag
45
+ indicating whether it's the last element in the generator, and the result of
46
+ `get_stream_state_func` applied to the element.
47
+
48
+ Args:
49
+ generator: The iterable to iterate over.
50
+ get_stream_state_func: A function that takes an element from the generator and
51
+ returns its state.
52
+
53
+ Returns:
54
+ An iterator that yields tuples of the form (element, is_last, state).
55
+ """
56
+
57
+ iterator = iter(generator)
58
+
59
+ try:
60
+ current = next(iterator)
61
+ state = get_stream_state_func()
62
+ except StopIteration:
63
+ return # Return an empty iterator
64
+
65
+ for next_item in iterator:
66
+ yield current, False, state
67
+ current = next_item
68
+ state = get_stream_state_func()
69
+
70
+ yield current, True, state
71
+
72
+
73
+ class Timer:
74
+ """
75
+ A simple timer class that measures elapsed time in seconds using a high-resolution performance counter.
76
+ """
77
+
78
+ def __init__(self) -> None:
79
+ self._start: Optional[int] = None
80
+
81
+ def start(self) -> None:
82
+ self._start = time.perf_counter_ns()
83
+
84
+ def finish(self) -> int:
85
+ if self._start:
86
+ return ((time.perf_counter_ns() - self._start) / 1e9).__ceil__()
87
+ else:
88
+ raise RuntimeError("Global substream cursor timer not started")
89
+
90
+ def is_running(self) -> bool:
91
+ return self._start is not None
92
+
93
+
41
94
  class ConcurrentCursorFactory:
42
95
  def __init__(self, create_function: Callable[..., ConcurrentCursor]):
43
96
  self._create_function = create_function
@@ -5,7 +5,6 @@ from typing import Any, Mapping
5
5
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
6
6
  from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
7
7
  from airbyte_cdk.sources.declarative.models import (
8
- CustomIncrementalSync,
9
8
  DatetimeBasedCursor,
10
9
  SubstreamPartitionRouter,
11
10
  )
@@ -36,7 +35,7 @@ class LegacyToPerPartitionStateMigration(StateMigration):
36
35
  def __init__(
37
36
  self,
38
37
  partition_router: SubstreamPartitionRouter,
39
- cursor: CustomIncrementalSync | DatetimeBasedCursor,
38
+ cursor: DatetimeBasedCursor,
40
39
  config: Mapping[str, Any],
41
40
  parameters: Mapping[str, Any],
42
41
  ):
@@ -174,24 +174,6 @@ class CustomErrorHandler(BaseModel):
174
174
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
175
175
 
176
176
 
177
- class CustomIncrementalSync(BaseModel):
178
- class Config:
179
- extra = Extra.allow
180
-
181
- type: Literal["CustomIncrementalSync"]
182
- class_name: str = Field(
183
- ...,
184
- description="Fully-qualified name of the class that will be implementing the custom incremental sync. The format is `source_<name>.<package>.<class_name>`.",
185
- examples=["source_railz.components.MyCustomIncrementalSync"],
186
- title="Class Name",
187
- )
188
- cursor_field: str = Field(
189
- ...,
190
- description="The location of the value on a record that will be used as a bookmark during sync.",
191
- )
192
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
193
-
194
-
195
177
  class CustomPaginationStrategy(BaseModel):
196
178
  class Config:
197
179
  extra = Extra.allow
@@ -2432,9 +2414,7 @@ class DeclarativeStream(BaseModel):
2432
2414
  description="Component used to coordinate how records are extracted across stream slices and request pages.",
2433
2415
  title="Retriever",
2434
2416
  )
2435
- incremental_sync: Optional[
2436
- Union[DatetimeBasedCursor, IncrementingCountCursor, CustomIncrementalSync]
2437
- ] = Field(
2417
+ incremental_sync: Optional[Union[DatetimeBasedCursor, IncrementingCountCursor]] = Field(
2438
2418
  None,
2439
2419
  description="Component used to fetch data incrementally based on a time field in the data.",
2440
2420
  title="Incremental Sync",
@@ -19,11 +19,6 @@ DEFAULT_MODEL_TYPES: Mapping[str, str] = {
19
19
  "DatetimeBasedCursor.end_time_option": "RequestOption",
20
20
  "DatetimeBasedCursor.start_datetime": "MinMaxDatetime",
21
21
  "DatetimeBasedCursor.start_time_option": "RequestOption",
22
- # CustomIncrementalSync
23
- "CustomIncrementalSync.end_datetime": "MinMaxDatetime",
24
- "CustomIncrementalSync.end_time_option": "RequestOption",
25
- "CustomIncrementalSync.start_datetime": "MinMaxDatetime",
26
- "CustomIncrementalSync.start_time_option": "RequestOption",
27
22
  # DeclarativeSource
28
23
  "DeclarativeSource.check": "CheckStream",
29
24
  "DeclarativeSource.spec": "Spec",
@@ -33,6 +33,10 @@ from requests import Response
33
33
  from airbyte_cdk.connector_builder.models import (
34
34
  LogMessage as ConnectorBuilderLogMessage,
35
35
  )
36
+ from airbyte_cdk.legacy.sources.declarative.declarative_stream import DeclarativeStream
37
+ from airbyte_cdk.legacy.sources.declarative.incremental import (
38
+ DatetimeBasedCursor,
39
+ )
36
40
  from airbyte_cdk.models import (
37
41
  AirbyteStateBlob,
38
42
  AirbyteStateMessage,
@@ -75,7 +79,6 @@ from airbyte_cdk.sources.declarative.checks import (
75
79
  )
76
80
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
77
81
  from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
78
- from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
79
82
  from airbyte_cdk.sources.declarative.decoders import (
80
83
  Decoder,
81
84
  IterableDecoder,
@@ -105,10 +108,6 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
105
108
  from airbyte_cdk.sources.declarative.incremental import (
106
109
  ConcurrentCursorFactory,
107
110
  ConcurrentPerPartitionCursor,
108
- CursorFactory,
109
- DatetimeBasedCursor,
110
- GlobalSubstreamCursor,
111
- PerPartitionWithGlobalCursor,
112
111
  )
113
112
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
114
113
  from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
@@ -200,9 +199,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
200
199
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
201
200
  CustomErrorHandler as CustomErrorHandlerModel,
202
201
  )
203
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
204
- CustomIncrementalSync as CustomIncrementalSyncModel,
205
- )
206
202
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
207
203
  CustomPaginationStrategy as CustomPaginationStrategyModel,
208
204
  )
@@ -701,7 +697,6 @@ class ModelToComponentFactory:
701
697
  CustomBackoffStrategyModel: self.create_custom_component,
702
698
  CustomDecoderModel: self.create_custom_component,
703
699
  CustomErrorHandlerModel: self.create_custom_component,
704
- CustomIncrementalSyncModel: self.create_custom_component,
705
700
  CustomRecordExtractorModel: self.create_custom_component,
706
701
  CustomRecordFilterModel: self.create_custom_component,
707
702
  CustomRequesterModel: self.create_custom_component,
@@ -752,7 +747,7 @@ class ModelToComponentFactory:
752
747
  OAuthAuthenticatorModel: self.create_oauth_authenticator,
753
748
  OffsetIncrementModel: self.create_offset_increment,
754
749
  PageIncrementModel: self.create_page_increment,
755
- ParentStreamConfigModel: self._create_message_repository_substream_wrapper,
750
+ ParentStreamConfigModel: self.create_parent_stream_config_with_substream_wrapper,
756
751
  PredicateValidatorModel: self.create_predicate_validator,
757
752
  PropertiesFromEndpointModel: self.create_properties_from_endpoint,
758
753
  PropertyChunkingModel: self.create_property_chunking,
@@ -1748,7 +1743,11 @@ class ModelToComponentFactory:
1748
1743
 
1749
1744
  if self._is_component(model_value):
1750
1745
  model_args[model_field] = self._create_nested_component(
1751
- model, model_field, model_value, config, **kwargs,
1746
+ model,
1747
+ model_field,
1748
+ model_value,
1749
+ config,
1750
+ **kwargs,
1752
1751
  )
1753
1752
  elif isinstance(model_value, list):
1754
1753
  vals = []
@@ -1760,7 +1759,15 @@ class ModelToComponentFactory:
1760
1759
  if derived_type:
1761
1760
  v["type"] = derived_type
1762
1761
  if self._is_component(v):
1763
- vals.append(self._create_nested_component(model, model_field, v, config, **kwargs,))
1762
+ vals.append(
1763
+ self._create_nested_component(
1764
+ model,
1765
+ model_field,
1766
+ v,
1767
+ config,
1768
+ **kwargs,
1769
+ )
1770
+ )
1764
1771
  else:
1765
1772
  vals.append(v)
1766
1773
  model_args[model_field] = vals
@@ -1965,7 +1972,7 @@ class ModelToComponentFactory:
1965
1972
 
1966
1973
  def create_default_stream(
1967
1974
  self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
1968
- ) -> Union[DeclarativeStream, AbstractStream]:
1975
+ ) -> AbstractStream:
1969
1976
  primary_key = model.primary_key.__root__ if model.primary_key else None
1970
1977
 
1971
1978
  partition_router = self._build_stream_slicer_from_partition_router(
@@ -2525,7 +2532,9 @@ class ModelToComponentFactory:
2525
2532
  config=config,
2526
2533
  name=name,
2527
2534
  primary_key=None,
2528
- partition_router=self._build_stream_slicer_from_partition_router(model.retriever, config),
2535
+ partition_router=self._build_stream_slicer_from_partition_router(
2536
+ model.retriever, config
2537
+ ),
2529
2538
  transformations=[],
2530
2539
  use_cache=True,
2531
2540
  log_formatter=(
@@ -2604,6 +2613,8 @@ class ModelToComponentFactory:
2604
2613
  fallback_parser=gzip_parser.inner_parser,
2605
2614
  )
2606
2615
 
2616
+ # todo: This method should be removed once we deprecate the SimpleRetriever.cursor field and the various
2617
+ # state methods
2607
2618
  @staticmethod
2608
2619
  def create_incrementing_count_cursor(
2609
2620
  model: IncrementingCountCursorModel, config: Config, **kwargs: Any
@@ -3134,9 +3145,7 @@ class ModelToComponentFactory:
3134
3145
  transformations: List[RecordTransformation],
3135
3146
  file_uploader: Optional[DefaultFileUploader] = None,
3136
3147
  incremental_sync: Optional[
3137
- Union[
3138
- IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
3139
- ]
3148
+ Union[IncrementingCountCursorModel, DatetimeBasedCursorModel]
3140
3149
  ] = None,
3141
3150
  use_cache: Optional[bool] = None,
3142
3151
  log_formatter: Optional[Callable[[Response], Any]] = None,
@@ -3238,7 +3247,9 @@ class ModelToComponentFactory:
3238
3247
 
3239
3248
  if not request_options_provider:
3240
3249
  request_options_provider = DefaultRequestOptionsProvider(parameters={})
3241
- if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance(partition_router, PartitionRouter):
3250
+ if isinstance(request_options_provider, DefaultRequestOptionsProvider) and isinstance(
3251
+ partition_router, PartitionRouter
3252
+ ):
3242
3253
  request_options_provider = partition_router
3243
3254
 
3244
3255
  paginator = (
@@ -3688,7 +3699,7 @@ class ModelToComponentFactory:
3688
3699
  if model.parent_stream_configs:
3689
3700
  parent_stream_configs.extend(
3690
3701
  [
3691
- self._create_message_repository_substream_wrapper(
3702
+ self.create_parent_stream_config_with_substream_wrapper(
3692
3703
  model=parent_stream_config, config=config, **kwargs
3693
3704
  )
3694
3705
  for parent_stream_config in model.parent_stream_configs
@@ -3701,13 +3712,11 @@ class ModelToComponentFactory:
3701
3712
  config=config,
3702
3713
  )
3703
3714
 
3704
- def _create_message_repository_substream_wrapper(
3715
+ def create_parent_stream_config_with_substream_wrapper(
3705
3716
  self, model: ParentStreamConfigModel, config: Config, *, stream_name: str, **kwargs: Any
3706
3717
  ) -> Any:
3707
3718
  # getting the parent state
3708
- child_state = self._connector_state_manager.get_stream_state(
3709
- stream_name, None
3710
- )
3719
+ child_state = self._connector_state_manager.get_stream_state(stream_name, None)
3711
3720
 
3712
3721
  # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3713
3722
  has_parent_state = bool(
@@ -3775,7 +3784,6 @@ class ModelToComponentFactory:
3775
3784
  incremental_sync_model: Union[
3776
3785
  DatetimeBasedCursorModel,
3777
3786
  IncrementingCountCursorModel,
3778
- CustomIncrementalSyncModel,
3779
3787
  ] = (
3780
3788
  model.stream.incremental_sync # type: ignore # if we are there, it is because there is incremental_dependency and therefore there is an incremental_sync on the parent stream
3781
3789
  if isinstance(model.stream, DeclarativeStreamModel)
@@ -1,4 +1,5 @@
1
- # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
1
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
2
+
2
3
  import logging
3
4
  import uuid
4
5
  from dataclasses import dataclass, field
@@ -8,9 +9,8 @@ from typing import Any, Dict, Iterable, Mapping, Optional
8
9
  import requests
9
10
  from requests import Response
10
11
 
11
- from airbyte_cdk import AirbyteMessage
12
12
  from airbyte_cdk.logger import lazy_log
13
- from airbyte_cdk.models import FailureType, Type
13
+ from airbyte_cdk.models import AirbyteMessage, FailureType, Type
14
14
  from airbyte_cdk.sources.declarative.async_job.job import AsyncJob
15
15
  from airbyte_cdk.sources.declarative.async_job.repository import AsyncJobRepository
16
16
  from airbyte_cdk.sources.declarative.async_job.status import AsyncJobStatus
@@ -23,10 +23,10 @@ from typing import (
23
23
  import requests
24
24
  from typing_extensions import deprecated
25
25
 
26
+ from airbyte_cdk.legacy.sources.declarative.incremental import ResumableFullRefreshCursor
27
+ from airbyte_cdk.legacy.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
26
28
  from airbyte_cdk.models import AirbyteMessage
27
29
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
28
- from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
29
- from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
30
30
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
31
31
  from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import (
32
32
  SinglePartitionRouter,
@@ -14,10 +14,21 @@ from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import Stre
14
14
  from airbyte_cdk.sources.types import Record, StreamSlice
15
15
  from airbyte_cdk.utils.slice_hasher import SliceHasher
16
16
 
17
+
17
18
  # For Connector Builder test read operations, we track the total number of records
18
- # read for the stream at the global level so that we can stop reading early if we
19
- # exceed the record limit
20
- total_record_counter = 0
19
+ # read for the stream so that we can stop reading early if we exceed the record limit.
20
+ class RecordCounter:
21
+ def __init__(self) -> None:
22
+ self.total_record_counter = 0
23
+
24
+ def increment(self) -> None:
25
+ self.total_record_counter += 1
26
+
27
+ def reset(self) -> None:
28
+ self.total_record_counter = 0
29
+
30
+ def get_total_records(self) -> int:
31
+ return self.total_record_counter
21
32
 
22
33
 
23
34
  class SchemaLoaderCachingDecorator(SchemaLoader):
@@ -51,6 +62,7 @@ class DeclarativePartitionFactory:
51
62
  self._retriever = retriever
52
63
  self._message_repository = message_repository
53
64
  self._max_records_limit = max_records_limit
65
+ self._record_counter = RecordCounter()
54
66
 
55
67
  def create(self, stream_slice: StreamSlice) -> Partition:
56
68
  return DeclarativePartition(
@@ -60,6 +72,7 @@ class DeclarativePartitionFactory:
60
72
  message_repository=self._message_repository,
61
73
  max_records_limit=self._max_records_limit,
62
74
  stream_slice=stream_slice,
75
+ record_counter=self._record_counter,
63
76
  )
64
77
 
65
78
 
@@ -72,6 +85,7 @@ class DeclarativePartition(Partition):
72
85
  message_repository: MessageRepository,
73
86
  max_records_limit: Optional[int],
74
87
  stream_slice: StreamSlice,
88
+ record_counter: RecordCounter,
75
89
  ):
76
90
  self._stream_name = stream_name
77
91
  self._schema_loader = schema_loader
@@ -80,17 +94,17 @@ class DeclarativePartition(Partition):
80
94
  self._max_records_limit = max_records_limit
81
95
  self._stream_slice = stream_slice
82
96
  self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
97
+ self._record_counter = record_counter
83
98
 
84
99
  def read(self) -> Iterable[Record]:
85
100
  if self._max_records_limit is not None:
86
- global total_record_counter
87
- if total_record_counter >= self._max_records_limit:
101
+ if self._record_counter.get_total_records() >= self._max_records_limit:
88
102
  return
89
103
  for stream_data in self._retriever.read_records(
90
104
  self._schema_loader.get_json_schema(), self._stream_slice
91
105
  ):
92
106
  if self._max_records_limit is not None:
93
- if total_record_counter >= self._max_records_limit:
107
+ if self._record_counter.get_total_records() >= self._max_records_limit:
94
108
  break
95
109
 
96
110
  if isinstance(stream_data, Mapping):
@@ -108,7 +122,7 @@ class DeclarativePartition(Partition):
108
122
  self._message_repository.emit_message(stream_data)
109
123
 
110
124
  if self._max_records_limit is not None:
111
- total_record_counter += 1
125
+ self._record_counter.increment()
112
126
 
113
127
  def to_slice(self) -> Optional[Mapping[str, Any]]:
114
128
  return self._stream_slice
@@ -5,7 +5,7 @@
5
5
  from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Dict, Mapping, Optional
7
7
 
8
- from airbyte_cdk import InterpolatedString
8
+ from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
9
9
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
10
10
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
11
11
 
@@ -14,7 +14,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
14
14
  from airbyte_cdk.sources.types import ConnectionDefinition
15
15
 
16
16
 
17
- class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage]]):
17
+ class YamlDeclarativeSource(ConcurrentDeclarativeSource):
18
18
  """Declarative source defined by a yaml file"""
19
19
 
20
20
  def __init__(
@@ -14,10 +14,6 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
14
14
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
15
15
 
16
16
 
17
- @deprecated(
18
- "This class is experimental. Use at your own risk.",
19
- category=ExperimentalClassWarning,
20
- )
21
17
  class AbstractStream(ABC):
22
18
  """
23
19
  AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
@@ -7,12 +7,16 @@ import importlib
7
7
  import json
8
8
  import os
9
9
  import pkgutil
10
- from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
10
+ from copy import deepcopy
11
+ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Mapping, MutableMapping, Tuple, cast
11
12
 
12
13
  import jsonref
13
- from jsonschema import RefResolver, validate
14
+ from jsonschema import validate
14
15
  from jsonschema.exceptions import ValidationError
15
16
  from pydantic.v1 import BaseModel, Field
17
+ from referencing import Registry, Resource
18
+ from referencing._core import Resolver # used for type hints
19
+ from referencing.jsonschema import DRAFT7
16
20
 
17
21
  from airbyte_cdk.models import ConnectorSpecification, FailureType
18
22
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
@@ -63,18 +67,30 @@ def resolve_ref_links(obj: Any) -> Any:
63
67
  return obj
64
68
 
65
69
 
66
- def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> None:
70
+ def get_ref_resolver_registry(schema: dict[str, Any]) -> Registry:
71
+ """Get a reference resolver registry for the given schema."""
72
+ resource: Resource = Resource.from_contents(
73
+ contents=schema,
74
+ default_specification=DRAFT7,
75
+ )
76
+ return cast( # Mypy has a hard time detecting this return type.
77
+ "Registry",
78
+ Registry().with_resource(
79
+ uri="",
80
+ resource=resource,
81
+ ),
82
+ )
83
+
84
+
85
+ def _expand_refs(schema: Any, ref_resolver: Resolver) -> None:
67
86
  """Internal function to iterate over schema and replace all occurrences of $ref with their definitions. Recursive.
68
87
 
69
88
  :param schema: schema that will be patched
70
- :param ref_resolver: resolver to get definition from $ref, if None pass it will be instantiated
71
89
  """
72
- ref_resolver = ref_resolver or RefResolver.from_schema(schema)
73
-
74
90
  if isinstance(schema, MutableMapping):
75
91
  if "$ref" in schema:
76
92
  ref_url = schema.pop("$ref")
77
- _, definition = ref_resolver.resolve(ref_url)
93
+ definition = ref_resolver.lookup(ref_url).contents
78
94
  _expand_refs(
79
95
  definition, ref_resolver=ref_resolver
80
96
  ) # expand refs in definitions as well
@@ -90,10 +106,14 @@ def _expand_refs(schema: Any, ref_resolver: Optional[RefResolver] = None) -> Non
90
106
  def expand_refs(schema: Any) -> None:
91
107
  """Iterate over schema and replace all occurrences of $ref with their definitions.
92
108
 
109
+ If a "definitions" section is present at the root of the schema, it will be removed
110
+ after $ref resolution is complete.
111
+
93
112
  :param schema: schema that will be patched
94
113
  """
95
- _expand_refs(schema)
96
- schema.pop("definitions", None) # remove definitions created by $ref
114
+ ref_resolver = get_ref_resolver_registry(schema).resolver()
115
+ _expand_refs(schema, ref_resolver)
116
+ schema.pop("definitions", None)
97
117
 
98
118
 
99
119
  def rename_key(schema: Any, old_key: str, new_key: str) -> None: