airbyte-cdk 6.60.13__py3-none-any.whl → 6.60.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -120,7 +120,11 @@ class TestReader:
120
120
  deprecation_warnings: List[LogMessage] = source.deprecation_warnings()
121
121
 
122
122
  schema_inferrer = SchemaInferrer(
123
- self._pk_to_nested_and_composite_field(stream.primary_key) if stream else None,
123
+ self._pk_to_nested_and_composite_field(
124
+ stream.primary_key if hasattr(stream, "primary_key") else stream._primary_key # type: ignore # We are accessing the private property here as the primary key is not exposed. We should either expose it or use `as_airbyte_stream` to retrieve it as this is the "official" way where it is exposed in the Airbyte protocol
125
+ )
126
+ if stream
127
+ else None,
124
128
  self._cursor_field_to_nested_and_composite_field(stream.cursor_field)
125
129
  if stream
126
130
  else None,
@@ -3,7 +3,7 @@
3
3
  #
4
4
 
5
5
  import logging
6
- from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple
6
+ from typing import Any, Generic, Iterator, List, Mapping, MutableMapping, Optional, Tuple, Union
7
7
 
8
8
  from airbyte_cdk.models import (
9
9
  AirbyteCatalog,
@@ -15,10 +15,6 @@ from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSo
15
15
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
16
16
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
17
17
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
18
- from airbyte_cdk.sources.declarative.extractors import RecordSelector
19
- from airbyte_cdk.sources.declarative.extractors.record_filter import (
20
- ClientSideIncrementalRecordFilterDecorator,
21
- )
22
18
  from airbyte_cdk.sources.declarative.incremental import (
23
19
  ConcurrentPerPartitionCursor,
24
20
  GlobalSubstreamCursor,
@@ -28,7 +24,6 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global impor
28
24
  PerPartitionWithGlobalCursor,
29
25
  )
30
26
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
31
- from airbyte_cdk.sources.declarative.models import FileUploader
32
27
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
33
28
  ConcurrencyLevel as ConcurrencyLevelModel,
34
29
  )
@@ -84,7 +79,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
84
79
  # incremental streams running in full refresh.
85
80
  component_factory = component_factory or ModelToComponentFactory(
86
81
  emit_connector_builder_messages=emit_connector_builder_messages,
87
- disable_resumable_full_refresh=True,
88
82
  connector_state_manager=self._connector_state_manager,
89
83
  max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
90
84
  )
@@ -180,7 +174,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
180
174
  ]
181
175
  )
182
176
 
183
- def streams(self, config: Mapping[str, Any]) -> List[Stream]:
177
+ def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
184
178
  """
185
179
  The `streams` method is used as part of the AbstractSource in the following cases:
186
180
  * ConcurrentDeclarativeSource.check -> ManifestDeclarativeSource.check -> AbstractSource.check -> DeclarativeSource.check_connection -> CheckStream.check_connection -> streams
@@ -210,6 +204,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
210
204
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
211
205
  # so we need to treat them as synchronous
212
206
 
207
+ if isinstance(declarative_stream, AbstractStream):
208
+ concurrent_streams.append(declarative_stream)
209
+ continue
210
+
213
211
  supports_file_transfer = (
214
212
  isinstance(declarative_stream, DeclarativeStream)
215
213
  and "file_uploader" in name_to_stream_mapping[declarative_stream.name]
@@ -278,10 +276,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
278
276
 
279
277
  partition_generator = StreamSlicerPartitionGenerator(
280
278
  partition_factory=DeclarativePartitionFactory(
281
- declarative_stream.name,
282
- declarative_stream.get_json_schema(),
283
- retriever,
284
- self.message_repository,
279
+ stream_name=declarative_stream.name,
280
+ schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
281
+ retriever=retriever,
282
+ message_repository=self.message_repository,
285
283
  ),
286
284
  stream_slicer=declarative_stream.retriever.stream_slicer,
287
285
  )
@@ -309,10 +307,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
309
307
  )
310
308
  partition_generator = StreamSlicerPartitionGenerator(
311
309
  partition_factory=DeclarativePartitionFactory(
312
- declarative_stream.name,
313
- declarative_stream.get_json_schema(),
314
- retriever,
315
- self.message_repository,
310
+ stream_name=declarative_stream.name,
311
+ schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
312
+ retriever=retriever,
313
+ message_repository=self.message_repository,
316
314
  ),
317
315
  stream_slicer=cursor,
318
316
  )
@@ -339,10 +337,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
339
337
  ) and hasattr(declarative_stream.retriever, "stream_slicer"):
340
338
  partition_generator = StreamSlicerPartitionGenerator(
341
339
  DeclarativePartitionFactory(
342
- declarative_stream.name,
343
- declarative_stream.get_json_schema(),
344
- declarative_stream.retriever,
345
- self.message_repository,
340
+ stream_name=declarative_stream.name,
341
+ schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
342
+ retriever=declarative_stream.retriever,
343
+ message_repository=self.message_repository,
346
344
  ),
347
345
  declarative_stream.retriever.stream_slicer,
348
346
  )
@@ -399,10 +397,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
399
397
 
400
398
  partition_generator = StreamSlicerPartitionGenerator(
401
399
  DeclarativePartitionFactory(
402
- declarative_stream.name,
403
- declarative_stream.get_json_schema(),
404
- retriever,
405
- self.message_repository,
400
+ stream_name=declarative_stream.name,
401
+ schema_loader=declarative_stream._schema_loader, # type: ignore # We are accessing the private property but the public one is optional and we will remove this code soonish
402
+ retriever=retriever,
403
+ message_repository=self.message_repository,
406
404
  ),
407
405
  perpartition_cursor,
408
406
  )
@@ -8,7 +8,7 @@ import pkgutil
8
8
  from copy import deepcopy
9
9
  from importlib import metadata
10
10
  from types import ModuleType
11
- from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
11
+ from typing import Any, Dict, Iterator, List, Mapping, Optional, Set, Union
12
12
 
13
13
  import orjson
14
14
  import yaml
@@ -66,6 +66,7 @@ from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
66
66
  from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING
67
67
  from airbyte_cdk.sources.declarative.spec.spec import Spec
68
68
  from airbyte_cdk.sources.message import MessageRepository
69
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
69
70
  from airbyte_cdk.sources.streams.core import Stream
70
71
  from airbyte_cdk.sources.types import Config, ConnectionDefinition
71
72
  from airbyte_cdk.sources.utils.slice_logger import (
@@ -297,7 +298,12 @@ class ManifestDeclarativeSource(DeclarativeSource):
297
298
  f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}"
298
299
  )
299
300
 
300
- def streams(self, config: Mapping[str, Any]) -> List[Stream]:
301
+ def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]: # type: ignore # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
302
+ """
303
+ As a migration step, this method will return both legacy stream (Stream) and concurrent stream (AbstractStream).
304
+ Once the migration is done, we can probably have this method throw "not implemented" as we figure out how to
305
+ fully decouple this from the AbstractSource.
306
+ """
301
307
  if self._spec_component:
302
308
  self._spec_component.validate_config(config)
303
309
 
@@ -7,6 +7,7 @@ from __future__ import annotations
7
7
  import datetime
8
8
  import importlib
9
9
  import inspect
10
+ import logging
10
11
  import re
11
12
  from functools import partial
12
13
  from typing import (
@@ -543,6 +544,10 @@ from airbyte_cdk.sources.declarative.stream_slicers import (
543
544
  StreamSlicer,
544
545
  StreamSlicerTestReadDecorator,
545
546
  )
547
+ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
548
+ DeclarativePartitionFactory,
549
+ StreamSlicerPartitionGenerator,
550
+ )
546
551
  from airbyte_cdk.sources.declarative.transformations import (
547
552
  AddFields,
548
553
  RecordTransformation,
@@ -594,6 +599,7 @@ from airbyte_cdk.sources.streams.call_rate import (
594
599
  Rate,
595
600
  UnlimitedCallRatePolicy,
596
601
  )
602
+ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
597
603
  from airbyte_cdk.sources.streams.concurrent.clamping import (
598
604
  ClampingEndProvider,
599
605
  ClampingStrategy,
@@ -603,7 +609,14 @@ from airbyte_cdk.sources.streams.concurrent.clamping import (
603
609
  WeekClampingStrategy,
604
610
  Weekday,
605
611
  )
606
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
612
+ from airbyte_cdk.sources.streams.concurrent.cursor import (
613
+ ConcurrentCursor,
614
+ Cursor,
615
+ CursorField,
616
+ FinalStateCursor,
617
+ )
618
+ from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
619
+ from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
607
620
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
608
621
  CustomFormatConcurrentStreamStateConverter,
609
622
  DateTimeStreamStateConverter,
@@ -633,7 +646,6 @@ class ModelToComponentFactory:
633
646
  emit_connector_builder_messages: bool = False,
634
647
  disable_retries: bool = False,
635
648
  disable_cache: bool = False,
636
- disable_resumable_full_refresh: bool = False,
637
649
  message_repository: Optional[MessageRepository] = None,
638
650
  connector_state_manager: Optional[ConnectorStateManager] = None,
639
651
  max_concurrent_async_job_count: Optional[int] = None,
@@ -644,7 +656,6 @@ class ModelToComponentFactory:
644
656
  self._emit_connector_builder_messages = emit_connector_builder_messages
645
657
  self._disable_retries = disable_retries
646
658
  self._disable_cache = disable_cache
647
- self._disable_resumable_full_refresh = disable_resumable_full_refresh
648
659
  self._message_repository = message_repository or InMemoryMessageRepository(
649
660
  self._evaluate_log_level(emit_connector_builder_messages)
650
661
  )
@@ -1920,8 +1931,8 @@ class ModelToComponentFactory:
1920
1931
  )
1921
1932
 
1922
1933
  def create_declarative_stream(
1923
- self, model: DeclarativeStreamModel, config: Config, **kwargs: Any
1924
- ) -> DeclarativeStream:
1934
+ self, model: DeclarativeStreamModel, config: Config, is_parent: bool = False, **kwargs: Any
1935
+ ) -> Union[DeclarativeStream, AbstractStream]:
1925
1936
  # When constructing a declarative stream, we assemble the incremental_sync component and retriever's partition_router field
1926
1937
  # components if they exist into a single CartesianProductStreamSlicer. This is then passed back as an argument when constructing the
1927
1938
  # Retriever. This is done in the declarative stream not the retriever to support custom retrievers. The custom create methods in
@@ -2027,15 +2038,6 @@ class ModelToComponentFactory:
2027
2038
  file_uploader=file_uploader,
2028
2039
  incremental_sync=model.incremental_sync,
2029
2040
  )
2030
- cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
2031
-
2032
- if model.state_migrations:
2033
- state_transformations = [
2034
- self._create_component_from_model(state_migration, config, declarative_stream=model)
2035
- for state_migration in model.state_migrations
2036
- ]
2037
- else:
2038
- state_transformations = []
2039
2041
 
2040
2042
  schema_loader: Union[
2041
2043
  CompositeSchemaLoader,
@@ -2063,6 +2065,56 @@ class ModelToComponentFactory:
2063
2065
  options["name"] = model.name
2064
2066
  schema_loader = DefaultSchemaLoader(config=config, parameters=options)
2065
2067
 
2068
+ if (
2069
+ isinstance(combined_slicers, PartitionRouter)
2070
+ and not self._emit_connector_builder_messages
2071
+ and not is_parent
2072
+ ):
2073
+ # We are starting to migrate streams to instantiate directly the DefaultStream instead of instantiating the
2074
+ # DeclarativeStream and assembling the DefaultStream from that. The plan is the following:
2075
+ # * Streams without partition router nor cursors and streams with only partition router. This is the `isinstance(combined_slicers, PartitionRouter)` condition as the first kind with have a SinglePartitionRouter
2076
+ # * Streams without partition router but with cursor
2077
+ # * Streams with both partition router and cursor
2078
+ # We specifically exclude parent streams here because SubstreamPartitionRouter has not been updated yet
2079
+ # We specifically exclude Connector Builder stuff for now as Brian is working on this anyway
2080
+ stream_name = model.name or ""
2081
+ partition_generator = StreamSlicerPartitionGenerator(
2082
+ DeclarativePartitionFactory(
2083
+ stream_name,
2084
+ schema_loader,
2085
+ retriever,
2086
+ self._message_repository,
2087
+ ),
2088
+ stream_slicer=cast(
2089
+ StreamSlicer,
2090
+ StreamSlicerTestReadDecorator(
2091
+ wrapped_slicer=combined_slicers,
2092
+ maximum_number_of_slices=self._limit_slices_fetched or 5,
2093
+ ),
2094
+ ),
2095
+ )
2096
+ return DefaultStream(
2097
+ partition_generator=partition_generator,
2098
+ name=stream_name,
2099
+ json_schema=schema_loader.get_json_schema,
2100
+ primary_key=get_primary_key_from_stream(primary_key),
2101
+ cursor_field=None,
2102
+ # FIXME we should have the cursor field has part of the interface of cursor
2103
+ logger=logging.getLogger(f"airbyte.{stream_name}"),
2104
+ # FIXME this is a breaking change compared to the old implementation,
2105
+ cursor=FinalStateCursor(stream_name, None, self._message_repository),
2106
+ supports_file_transfer=hasattr(model, "file_uploader")
2107
+ and bool(model.file_uploader),
2108
+ )
2109
+
2110
+ cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
2111
+ if model.state_migrations:
2112
+ state_transformations = [
2113
+ self._create_component_from_model(state_migration, config, declarative_stream=model)
2114
+ for state_migration in model.state_migrations
2115
+ ]
2116
+ else:
2117
+ state_transformations = []
2066
2118
  return DeclarativeStream(
2067
2119
  name=model.name or "",
2068
2120
  primary_key=primary_key,
@@ -2083,7 +2135,7 @@ class ModelToComponentFactory:
2083
2135
  ],
2084
2136
  config: Config,
2085
2137
  stream_name: Optional[str] = None,
2086
- ) -> Optional[PartitionRouter]:
2138
+ ) -> PartitionRouter:
2087
2139
  if (
2088
2140
  hasattr(model, "partition_router")
2089
2141
  and isinstance(model, SimpleRetrieverModel | AsyncRetrieverModel)
@@ -2104,7 +2156,7 @@ class ModelToComponentFactory:
2104
2156
  return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
2105
2157
  model=stream_slicer_model, config=config, stream_name=stream_name or ""
2106
2158
  )
2107
- return None
2159
+ return SinglePartitionRouter(parameters={})
2108
2160
 
2109
2161
  def _build_incremental_cursor(
2110
2162
  self,
@@ -2121,7 +2173,9 @@ class ModelToComponentFactory:
2121
2173
  else []
2122
2174
  )
2123
2175
 
2124
- if model.incremental_sync and stream_slicer:
2176
+ if model.incremental_sync and (
2177
+ stream_slicer and not isinstance(stream_slicer, SinglePartitionRouter)
2178
+ ):
2125
2179
  if model.retriever.type == "AsyncRetriever":
2126
2180
  stream_name = model.name or ""
2127
2181
  stream_namespace = None
@@ -2194,7 +2248,11 @@ class ModelToComponentFactory:
2194
2248
  else:
2195
2249
  state_transformations = []
2196
2250
 
2197
- if model.incremental_sync and stream_slicer:
2251
+ if (
2252
+ model.incremental_sync
2253
+ and stream_slicer
2254
+ and not isinstance(stream_slicer, SinglePartitionRouter)
2255
+ ):
2198
2256
  return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2199
2257
  state_manager=self._connector_state_manager,
2200
2258
  model_type=DatetimeBasedCursorModel,
@@ -2233,28 +2291,6 @@ class ModelToComponentFactory:
2233
2291
  )
2234
2292
  return None
2235
2293
 
2236
- def _build_resumable_cursor(
2237
- self,
2238
- model: Union[
2239
- AsyncRetrieverModel,
2240
- CustomRetrieverModel,
2241
- SimpleRetrieverModel,
2242
- ],
2243
- stream_slicer: Optional[PartitionRouter],
2244
- ) -> Optional[StreamSlicer]:
2245
- if hasattr(model, "paginator") and model.paginator and not stream_slicer:
2246
- # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
2247
- return ResumableFullRefreshCursor(parameters={})
2248
- elif stream_slicer:
2249
- # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
2250
- return PerPartitionCursor(
2251
- cursor_factory=CursorFactory(
2252
- create_function=partial(ChildPartitionResumableFullRefreshCursor, {})
2253
- ),
2254
- partition_router=stream_slicer,
2255
- )
2256
- return None
2257
-
2258
2294
  def _merge_stream_slicers(
2259
2295
  self, model: DeclarativeStreamModel, config: Config
2260
2296
  ) -> Optional[StreamSlicer]:
@@ -2291,11 +2327,7 @@ class ModelToComponentFactory:
2291
2327
  if model.incremental_sync:
2292
2328
  return self._build_incremental_cursor(model, stream_slicer, config)
2293
2329
 
2294
- return (
2295
- stream_slicer
2296
- if self._disable_resumable_full_refresh
2297
- else self._build_resumable_cursor(retriever_model, stream_slicer)
2298
- )
2330
+ return stream_slicer
2299
2331
 
2300
2332
  def create_default_error_handler(
2301
2333
  self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
@@ -2577,9 +2609,6 @@ class ModelToComponentFactory:
2577
2609
  def create_dynamic_schema_loader(
2578
2610
  self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2579
2611
  ) -> DynamicSchemaLoader:
2580
- stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2581
- combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2582
-
2583
2612
  schema_transformations = []
2584
2613
  if model.schema_transformations:
2585
2614
  for transformation_model in model.schema_transformations:
@@ -2592,7 +2621,7 @@ class ModelToComponentFactory:
2592
2621
  config=config,
2593
2622
  name=name,
2594
2623
  primary_key=None,
2595
- stream_slicer=combined_slicers,
2624
+ stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
2596
2625
  transformations=[],
2597
2626
  use_cache=True,
2598
2627
  log_formatter=(
@@ -2945,7 +2974,10 @@ class ModelToComponentFactory:
2945
2974
  self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2946
2975
  ) -> ParentStreamConfig:
2947
2976
  declarative_stream = self._create_component_from_model(
2948
- model.stream, config=config, **kwargs
2977
+ model.stream,
2978
+ config=config,
2979
+ is_parent=True,
2980
+ **kwargs,
2949
2981
  )
2950
2982
  request_option = (
2951
2983
  self._create_component_from_model(model.request_option, config=config)
@@ -3855,15 +3887,12 @@ class ModelToComponentFactory:
3855
3887
  def create_http_components_resolver(
3856
3888
  self, model: HttpComponentsResolverModel, config: Config, stream_name: Optional[str] = None
3857
3889
  ) -> Any:
3858
- stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3859
- combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3860
-
3861
3890
  retriever = self._create_component_from_model(
3862
3891
  model=model.retriever,
3863
3892
  config=config,
3864
3893
  name=f"{stream_name if stream_name else '__http_components_resolver'}",
3865
3894
  primary_key=None,
3866
- stream_slicer=stream_slicer if stream_slicer else combined_slicers,
3895
+ stream_slicer=self._build_stream_slicer_from_partition_router(model.retriever, config),
3867
3896
  transformations=[],
3868
3897
  )
3869
3898
 
@@ -3,6 +3,7 @@
3
3
  from typing import Any, Iterable, Mapping, Optional
4
4
 
5
5
  from airbyte_cdk.sources.declarative.retrievers import Retriever
6
+ from airbyte_cdk.sources.declarative.schema import SchemaLoader
6
7
  from airbyte_cdk.sources.message import MessageRepository
7
8
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
8
9
  from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
@@ -11,11 +12,23 @@ from airbyte_cdk.sources.types import Record, StreamSlice
11
12
  from airbyte_cdk.utils.slice_hasher import SliceHasher
12
13
 
13
14
 
15
+ class SchemaLoaderCachingDecorator(SchemaLoader):
16
+ def __init__(self, schema_loader: SchemaLoader):
17
+ self._decorated = schema_loader
18
+ self._loaded_schema: Optional[Mapping[str, Any]] = None
19
+
20
+ def get_json_schema(self) -> Mapping[str, Any]:
21
+ if self._loaded_schema is None:
22
+ self._loaded_schema = self._decorated.get_json_schema()
23
+
24
+ return self._loaded_schema # type: ignore # at that point, we assume the schema will be populated
25
+
26
+
14
27
  class DeclarativePartitionFactory:
15
28
  def __init__(
16
29
  self,
17
30
  stream_name: str,
18
- json_schema: Mapping[str, Any],
31
+ schema_loader: SchemaLoader,
19
32
  retriever: Retriever,
20
33
  message_repository: MessageRepository,
21
34
  ) -> None:
@@ -25,17 +38,17 @@ class DeclarativePartitionFactory:
25
38
  In order to avoid these problems, we will create one retriever per thread which should make the processing thread-safe.
26
39
  """
27
40
  self._stream_name = stream_name
28
- self._json_schema = json_schema
41
+ self._schema_loader = SchemaLoaderCachingDecorator(schema_loader)
29
42
  self._retriever = retriever
30
43
  self._message_repository = message_repository
31
44
 
32
45
  def create(self, stream_slice: StreamSlice) -> Partition:
33
46
  return DeclarativePartition(
34
- self._stream_name,
35
- self._json_schema,
36
- self._retriever,
37
- self._message_repository,
38
- stream_slice,
47
+ stream_name=self._stream_name,
48
+ schema_loader=self._schema_loader,
49
+ retriever=self._retriever,
50
+ message_repository=self._message_repository,
51
+ stream_slice=stream_slice,
39
52
  )
40
53
 
41
54
 
@@ -43,20 +56,22 @@ class DeclarativePartition(Partition):
43
56
  def __init__(
44
57
  self,
45
58
  stream_name: str,
46
- json_schema: Mapping[str, Any],
59
+ schema_loader: SchemaLoader,
47
60
  retriever: Retriever,
48
61
  message_repository: MessageRepository,
49
62
  stream_slice: StreamSlice,
50
63
  ):
51
64
  self._stream_name = stream_name
52
- self._json_schema = json_schema
65
+ self._schema_loader = schema_loader
53
66
  self._retriever = retriever
54
67
  self._message_repository = message_repository
55
68
  self._stream_slice = stream_slice
56
69
  self._hash = SliceHasher.hash(self._stream_name, self._stream_slice)
57
70
 
58
71
  def read(self) -> Iterable[Record]:
59
- for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
72
+ for stream_data in self._retriever.read_records(
73
+ self._schema_loader.get_json_schema(), self._stream_slice
74
+ ):
60
75
  if isinstance(stream_data, Mapping):
61
76
  record = (
62
77
  stream_data
@@ -6,7 +6,7 @@ import copy
6
6
  import json
7
7
  import logging
8
8
  from functools import lru_cache
9
- from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
9
+ from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Union
10
10
 
11
11
  from typing_extensions import deprecated
12
12
 
@@ -196,6 +196,7 @@ class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
196
196
  def cursor(self) -> Optional[Cursor]: # type: ignore[override] # StreamFaced expects to use only airbyte_cdk.sources.streams.concurrent.cursor.Cursor
197
197
  return self._cursor
198
198
 
199
+ # FIXME the lru_cache seems to be mostly there because of typing issue
199
200
  @lru_cache(maxsize=None)
200
201
  def get_json_schema(self) -> Mapping[str, Any]:
201
202
  return self._abstract_stream.get_json_schema()
@@ -2,9 +2,8 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from functools import lru_cache
6
5
  from logging import Logger
7
- from typing import Any, Iterable, List, Mapping, Optional
6
+ from typing import Any, Callable, Iterable, List, Mapping, Optional, Union
8
7
 
9
8
  from airbyte_cdk.models import AirbyteStream, SyncMode
10
9
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
@@ -20,7 +19,7 @@ class DefaultStream(AbstractStream):
20
19
  self,
21
20
  partition_generator: PartitionGenerator,
22
21
  name: str,
23
- json_schema: Mapping[str, Any],
22
+ json_schema: Union[Mapping[str, Any], Callable[[], Mapping[str, Any]]],
24
23
  primary_key: List[str],
25
24
  cursor_field: Optional[str],
26
25
  logger: Logger,
@@ -53,14 +52,13 @@ class DefaultStream(AbstractStream):
53
52
  def cursor_field(self) -> Optional[str]:
54
53
  return self._cursor_field
55
54
 
56
- @lru_cache(maxsize=None)
57
55
  def get_json_schema(self) -> Mapping[str, Any]:
58
- return self._json_schema
56
+ return self._json_schema() if callable(self._json_schema) else self._json_schema
59
57
 
60
58
  def as_airbyte_stream(self) -> AirbyteStream:
61
59
  stream = AirbyteStream(
62
60
  name=self.name,
63
- json_schema=dict(self._json_schema),
61
+ json_schema=dict(self.get_json_schema()),
64
62
  supported_sync_modes=[SyncMode.full_refresh],
65
63
  is_resumable=False,
66
64
  is_file_based=self._supports_file_transfer,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.60.13
3
+ Version: 6.60.14
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -21,7 +21,7 @@ airbyte_cdk/connector_builder/models.py,sha256=9pIZ98LW_d6fRS39VdnUOf3cxGt4TkC5M
21
21
  airbyte_cdk/connector_builder/test_reader/__init__.py,sha256=iTwBMoI9vaJotEgpqZbFjlxRcbxXYypSVJ9YxeHk7wc,120
22
22
  airbyte_cdk/connector_builder/test_reader/helpers.py,sha256=vqoHpZeQ0BLIw2NiTNGXr0euA8gI_X0pcNRcHOv8sHM,27942
23
23
  airbyte_cdk/connector_builder/test_reader/message_grouper.py,sha256=LDNl-xFQwA4RsUpn7684KbWaVH-SWWBIwhHvIgduLTE,7090
24
- airbyte_cdk/connector_builder/test_reader/reader.py,sha256=3jLy3tUUHkG1rmGWrZuo4SmPYNVD9oiAqy8mdaUwzvo,21301
24
+ airbyte_cdk/connector_builder/test_reader/reader.py,sha256=DugoqS6SMrtOJ--2Y0F0h_9x8m632i7fSOPMAA0JHnc,21654
25
25
  airbyte_cdk/connector_builder/test_reader/types.py,sha256=hPZG3jO03kBaPyW94NI3JHRS1jxXGSNBcN1HFzOxo5Y,2528
26
26
  airbyte_cdk/destinations/__init__.py,sha256=FyDp28PT_YceJD5HDFhA-mrGfX9AONIyMQ4d68CHNxQ,213
27
27
  airbyte_cdk/destinations/destination.py,sha256=CIq-yb8C_0QvcKCtmStaHfiqn53GEfRAIGGCkJhKP1Q,5880
@@ -86,7 +86,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=sV-ZY7dZ03V8GdAxPY
86
86
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
87
87
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
88
88
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
89
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=_C0IKcPlumc3HAGxccmeyS8lSQo7HxPr4jcNFok_9oU,26637
89
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=HQxvDEoMWtVdVRZgJylrT0YLx-R8sOgICjY3HnifvWs,27391
90
90
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
91
91
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
92
92
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
@@ -128,7 +128,7 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkH
128
128
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
129
129
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=oFGKs3oX0xO6DOL4E9x8rhxwbEoRcgx4HJVIL1RQ9c4,7269
130
130
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=RpsAYG75bW0js2fQCzAN1nf3oeGyXwyt0LhJCHnlaUA,6031
131
- airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=NNywOq7i0N0oEBvZWbKmo09jHEZVsrY_TAiYR-P4m5k,26558
131
+ airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=VqR3lti_RLRRe0_1EwUn8_OsJTxQrGqU3n-T9GowAKk,27154
132
132
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
133
133
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=V2lpYE9LJKvz6BUViHk4vaRGndxNABmPbDCtyYdkqaE,4013
134
134
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
@@ -141,7 +141,7 @@ airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9R
141
141
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=2UdpCz3yi7ISZTyqkQXSSy3dMxeyOWqV7OlAS5b9GVg,11568
142
142
  airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
143
143
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
144
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=zr4oBy1MCbsJtOf4gffEQOFo5wX5oq9lhIpVUmyNJoE,181452
144
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=3oJ7JdtlpkzeTB5PAu0FRH0iVfBlk2wcFGB1U_0owdI,183073
145
145
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
146
146
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
147
147
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -220,7 +220,7 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
220
220
  airbyte_cdk/sources/declarative/spec/__init__.py,sha256=9FYO-fVOclrwjAW4qwRTbZRVopTc9rOaauAJfThdNCQ,177
221
221
  airbyte_cdk/sources/declarative/spec/spec.py,sha256=SwL_pfXZgcLYLJY-MAeFMHug9oYh2tOWjgG0C3DoLOY,3602
222
222
  airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=UX-cP_C-9FIFFPL9z8nuxu_rglssRsMOqQmQHN8FLB8,341
223
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=cjKGm4r438dd1GxrFHJ4aYrdzG2bkncnwaWxAwlXR3M,3585
223
+ airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=Wk7P-Jpy3f3a59mwnc9ycJbpA3zVcgykNt2grBSXhBA,4272
224
224
  airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
225
225
  airbyte_cdk/sources/declarative/stream_slicers/stream_slicer_test_read_decorator.py,sha256=aUSleOw9elq3-5TaDUvp7H8W-2qUKqpr__kaJd8-ZFA,983
226
226
  airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
@@ -316,12 +316,12 @@ airbyte_cdk/sources/streams/concurrent/README.md,sha256=0nvgnlCBfZJiPDAofT8yFmUh
316
316
  airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
317
317
  airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=aalik3FvyEjoeA1S3wUYEV3bgQLGrTnhYKPvT-rgy9E,3919
318
318
  airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
319
- airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=R9EZza1jF0e_HaLgN9Q_VREjvmlk0p9UfBLsnHB2y48,13936
319
+ airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=h4ZewhWn2PzPTt0lZZjcUL4rrpW9E_of7prnI3bm-c4,14004
320
320
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=M0XmvF3vjlr4GbCM0XH1hAj7udiAONM9SnmXjqufzLM,1035
321
321
  airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
322
322
  airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=xFFB8eEbtjGUdb42vkyWT5JB-WTUsaJlZ0gjKoVEycc,22307
323
323
  airbyte_cdk/sources/streams/concurrent/cursor_types.py,sha256=ZyWLPpeLX1qXcP5MwS-wxK11IBMsnVPCw9zx8gA2_Ro,843
324
- airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=cVBMwCM8OVlsLA39e1YN_eGb7qApqH-GQtnWMs19sGU,4687
324
+ airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=SSufbo5f7OOYS8DZaABXeJVvodcfp9wb8J9lT5Xik3s,4744
325
325
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
326
326
  airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
327
327
  airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=2t64b_z9cEPmlHZnjSiMTO8PEtEdiAJDG0JcYOtUqAE,3363
@@ -424,9 +424,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
424
424
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
425
425
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
426
426
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
427
- airbyte_cdk-6.60.13.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
428
- airbyte_cdk-6.60.13.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
429
- airbyte_cdk-6.60.13.dist-info/METADATA,sha256=Zy-y3rjiuGIGzc3BLW-7F8zJJAxZMsX85KCPZHIxUXc,6478
430
- airbyte_cdk-6.60.13.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
431
- airbyte_cdk-6.60.13.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
432
- airbyte_cdk-6.60.13.dist-info/RECORD,,
427
+ airbyte_cdk-6.60.14.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
428
+ airbyte_cdk-6.60.14.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
429
+ airbyte_cdk-6.60.14.dist-info/METADATA,sha256=m-YkMQwaHLjes8d92IMFWcX4rh0zNCQcOkMK0VkCWzI,6478
430
+ airbyte_cdk-6.60.14.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
431
+ airbyte_cdk-6.60.14.dist-info/entry_points.txt,sha256=AKWbEkHfpzzk9nF9tqBUaw1MbvTM4mGtEzmZQm0ZWvM,139
432
+ airbyte_cdk-6.60.14.dist-info/RECORD,,