airbyte-cdk 6.39.1__py3-none-any.whl → 6.39.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -162,6 +162,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
162
162
  else:
163
163
  filtered_catalog = catalog
164
164
 
165
+ # It is no need run read for synchronous streams if they are not exists.
166
+ if not filtered_catalog.streams:
167
+ return
168
+
165
169
  yield from super().read(logger, config, filtered_catalog, state)
166
170
 
167
171
  def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> AirbyteCatalog:
@@ -201,6 +205,18 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
201
205
  # Some low-code sources use a combination of DeclarativeStream and regular Python streams. We can't inspect
202
206
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
203
207
  # so we need to treat them as synchronous
208
+
209
+ if name_to_stream_mapping[declarative_stream.name]["type"] == "StateDelegatingStream":
210
+ stream_state = self._connector_state_manager.get_stream_state(
211
+ stream_name=declarative_stream.name, namespace=declarative_stream.namespace
212
+ )
213
+
214
+ name_to_stream_mapping[declarative_stream.name] = (
215
+ name_to_stream_mapping[declarative_stream.name]["incremental_stream"]
216
+ if stream_state
217
+ else name_to_stream_mapping[declarative_stream.name]["full_refresh_stream"]
218
+ )
219
+
204
220
  if isinstance(declarative_stream, DeclarativeStream) and (
205
221
  name_to_stream_mapping[declarative_stream.name]["retriever"]["type"]
206
222
  == "SimpleRetriever"
@@ -24,7 +24,9 @@ properties:
24
24
  streams:
25
25
  type: array
26
26
  items:
27
- "$ref": "#/definitions/DeclarativeStream"
27
+ anyOf:
28
+ - "$ref": "#/definitions/DeclarativeStream"
29
+ - "$ref": "#/definitions/StateDelegatingStream"
28
30
  dynamic_streams:
29
31
  type: array
30
32
  items:
@@ -2881,7 +2883,9 @@ definitions:
2881
2883
  stream:
2882
2884
  title: Parent Stream
2883
2885
  description: Reference to the parent stream.
2884
- "$ref": "#/definitions/DeclarativeStream"
2886
+ anyOf:
2887
+ - "$ref": "#/definitions/DeclarativeStream"
2888
+ - "$ref": "#/definitions/StateDelegatingStream"
2885
2889
  partition_field:
2886
2890
  title: Current Parent Key Value Identifier
2887
2891
  description: While iterating over parent records during a sync, the parent_key value can be referenced by using this field.
@@ -3154,6 +3158,36 @@ definitions:
3154
3158
  $parameters:
3155
3159
  type: object
3156
3160
  additionalProperties: true
3161
+ StateDelegatingStream:
3162
+ description: (This component is experimental. Use at your own risk.) Orchestrate the retriever's usage based on the state value.
3163
+ type: object
3164
+ required:
3165
+ - type
3166
+ - name
3167
+ - full_refresh_stream
3168
+ - incremental_stream
3169
+ properties:
3170
+ type:
3171
+ type: string
3172
+ enum: [ StateDelegatingStream ]
3173
+ name:
3174
+ title: Name
3175
+ description: The stream name.
3176
+ type: string
3177
+ default: ""
3178
+ example:
3179
+ - "Users"
3180
+ full_refresh_stream:
3181
+ title: Retriever
3182
+ description: Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.
3183
+ "$ref": "#/definitions/DeclarativeStream"
3184
+ incremental_stream:
3185
+ title: Retriever
3186
+ description: Component used to coordinate how records are extracted across stream slices and request pages when the state provided.
3187
+ "$ref": "#/definitions/DeclarativeStream"
3188
+ $parameters:
3189
+ type: object
3190
+ additionalProperties: true
3157
3191
  SimpleRetriever:
3158
3192
  description: Retrieves records by synchronously sending requests to fetch records. The retriever acts as an orchestrator between the requester, the record selector, the paginator, and the partition router.
3159
3193
  type: object
@@ -14,6 +14,7 @@ from airbyte_cdk.sources.declarative.incremental import (
14
14
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
15
15
  from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
16
16
  from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever
17
+ from airbyte_cdk.sources.declarative.retrievers.async_retriever import AsyncRetriever
17
18
  from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
18
19
  from airbyte_cdk.sources.declarative.schema import DefaultSchemaLoader
19
20
  from airbyte_cdk.sources.declarative.schema.schema_loader import SchemaLoader
@@ -76,11 +77,17 @@ class DeclarativeStream(Stream):
76
77
 
77
78
  @property
78
79
  def exit_on_rate_limit(self) -> bool:
80
+ if isinstance(self.retriever, AsyncRetriever):
81
+ return self.retriever.exit_on_rate_limit
82
+
79
83
  return self.retriever.requester.exit_on_rate_limit # type: ignore # abstract Retriever class has not requester attribute
80
84
 
81
85
  @exit_on_rate_limit.setter
82
86
  def exit_on_rate_limit(self, value: bool) -> None:
83
- self.retriever.requester.exit_on_rate_limit = value # type: ignore[attr-defined]
87
+ if isinstance(self.retriever, AsyncRetriever):
88
+ self.retriever.exit_on_rate_limit = value
89
+ else:
90
+ self.retriever.requester.exit_on_rate_limit = value # type: ignore[attr-defined]
84
91
 
85
92
  @property # type: ignore
86
93
  def name(self) -> str:
@@ -30,6 +30,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
30
30
  DeclarativeStream as DeclarativeStreamModel,
31
31
  )
32
32
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
33
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
34
+ StateDelegatingStream as StateDelegatingStreamModel,
35
+ )
33
36
  from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
34
37
  get_registered_components_module,
35
38
  )
@@ -146,7 +149,9 @@ class ManifestDeclarativeSource(DeclarativeSource):
146
149
 
147
150
  source_streams = [
148
151
  self._constructor.create_component(
149
- DeclarativeStreamModel,
152
+ StateDelegatingStreamModel
153
+ if stream_config.get("type") == StateDelegatingStreamModel.__name__
154
+ else DeclarativeStreamModel,
150
155
  stream_config,
151
156
  config,
152
157
  emit_connector_builder_messages=self._emit_connector_builder_messages,
@@ -165,7 +170,15 @@ class ManifestDeclarativeSource(DeclarativeSource):
165
170
  def update_with_cache_parent_configs(parent_configs: list[dict[str, Any]]) -> None:
166
171
  for parent_config in parent_configs:
167
172
  parent_streams.add(parent_config["stream"]["name"])
168
- parent_config["stream"]["retriever"]["requester"]["use_cache"] = True
173
+ if parent_config["stream"]["type"] == "StateDelegatingStream":
174
+ parent_config["stream"]["full_refresh_stream"]["retriever"]["requester"][
175
+ "use_cache"
176
+ ] = True
177
+ parent_config["stream"]["incremental_stream"]["retriever"]["requester"][
178
+ "use_cache"
179
+ ] = True
180
+ else:
181
+ parent_config["stream"]["retriever"]["requester"]["use_cache"] = True
169
182
 
170
183
  for stream_config in stream_configs:
171
184
  if stream_config.get("incremental_sync", {}).get("parent_stream"):
@@ -188,7 +201,15 @@ class ManifestDeclarativeSource(DeclarativeSource):
188
201
 
189
202
  for stream_config in stream_configs:
190
203
  if stream_config["name"] in parent_streams:
191
- stream_config["retriever"]["requester"]["use_cache"] = True
204
+ if stream_config["type"] == "StateDelegatingStream":
205
+ stream_config["full_refresh_stream"]["retriever"]["requester"]["use_cache"] = (
206
+ True
207
+ )
208
+ stream_config["incremental_stream"]["retriever"]["requester"]["use_cache"] = (
209
+ True
210
+ )
211
+ else:
212
+ stream_config["retriever"]["requester"]["use_cache"] = True
192
213
 
193
214
  return stream_configs
194
215
 
@@ -1860,7 +1860,7 @@ class DeclarativeSource1(BaseModel):
1860
1860
 
1861
1861
  type: Literal["DeclarativeSource"]
1862
1862
  check: Union[CheckStream, CheckDynamicStream]
1863
- streams: List[DeclarativeStream]
1863
+ streams: List[Union[DeclarativeStream, StateDelegatingStream]]
1864
1864
  dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
1865
1865
  version: str = Field(
1866
1866
  ...,
@@ -1892,7 +1892,7 @@ class DeclarativeSource2(BaseModel):
1892
1892
 
1893
1893
  type: Literal["DeclarativeSource"]
1894
1894
  check: Union[CheckStream, CheckDynamicStream]
1895
- streams: Optional[List[DeclarativeStream]] = None
1895
+ streams: Optional[List[Union[DeclarativeStream, StateDelegatingStream]]] = None
1896
1896
  dynamic_streams: List[DynamicDeclarativeStream]
1897
1897
  version: str = Field(
1898
1898
  ...,
@@ -2211,7 +2211,7 @@ class ParentStreamConfig(BaseModel):
2211
2211
  examples=["id", "{{ config['parent_record_id'] }}"],
2212
2212
  title="Parent Key",
2213
2213
  )
2214
- stream: DeclarativeStream = Field(
2214
+ stream: Union[DeclarativeStream, StateDelegatingStream] = Field(
2215
2215
  ..., description="Reference to the parent stream.", title="Parent Stream"
2216
2216
  )
2217
2217
  partition_field: str = Field(
@@ -2238,6 +2238,22 @@ class ParentStreamConfig(BaseModel):
2238
2238
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2239
2239
 
2240
2240
 
2241
+ class StateDelegatingStream(BaseModel):
2242
+ type: Literal["StateDelegatingStream"]
2243
+ name: str = Field(..., description="The stream name.", example=["Users"], title="Name")
2244
+ full_refresh_stream: DeclarativeStream = Field(
2245
+ ...,
2246
+ description="Component used to coordinate how records are extracted across stream slices and request pages when the state is empty or not provided.",
2247
+ title="Retriever",
2248
+ )
2249
+ incremental_stream: DeclarativeStream = Field(
2250
+ ...,
2251
+ description="Component used to coordinate how records are extracted across stream slices and request pages when the state provided.",
2252
+ title="Retriever",
2253
+ )
2254
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2255
+
2256
+
2241
2257
  class SimpleRetriever(BaseModel):
2242
2258
  type: Literal["SimpleRetriever"]
2243
2259
  record_selector: RecordSelector = Field(
@@ -2423,5 +2439,6 @@ SelectiveAuthenticator.update_forward_refs()
2423
2439
  DeclarativeStream.update_forward_refs()
2424
2440
  SessionTokenAuthenticator.update_forward_refs()
2425
2441
  DynamicSchemaLoader.update_forward_refs()
2442
+ ParentStreamConfig.update_forward_refs()
2426
2443
  SimpleRetriever.update_forward_refs()
2427
2444
  AsyncRetriever.update_forward_refs()
@@ -351,6 +351,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
351
351
  SimpleRetriever as SimpleRetrieverModel,
352
352
  )
353
353
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
354
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
355
+ StateDelegatingStream as StateDelegatingStreamModel,
356
+ )
354
357
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
355
358
  StreamConfig as StreamConfigModel,
356
359
  )
@@ -617,6 +620,7 @@ class ModelToComponentFactory:
617
620
  LegacySessionTokenAuthenticatorModel: self.create_legacy_session_token_authenticator,
618
621
  SelectiveAuthenticatorModel: self.create_selective_authenticator,
619
622
  SimpleRetrieverModel: self.create_simple_retriever,
623
+ StateDelegatingStreamModel: self.create_state_delegating_stream,
620
624
  SpecModel: self.create_spec,
621
625
  SubstreamPartitionRouterModel: self.create_substream_partition_router,
622
626
  WaitTimeFromHeaderModel: self.create_wait_time_from_header,
@@ -1785,8 +1789,13 @@ class ModelToComponentFactory:
1785
1789
 
1786
1790
  def _build_stream_slicer_from_partition_router(
1787
1791
  self,
1788
- model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1792
+ model: Union[
1793
+ AsyncRetrieverModel,
1794
+ CustomRetrieverModel,
1795
+ SimpleRetrieverModel,
1796
+ ],
1789
1797
  config: Config,
1798
+ stream_name: Optional[str] = None,
1790
1799
  ) -> Optional[PartitionRouter]:
1791
1800
  if (
1792
1801
  hasattr(model, "partition_router")
@@ -1794,95 +1803,65 @@ class ModelToComponentFactory:
1794
1803
  and model.partition_router
1795
1804
  ):
1796
1805
  stream_slicer_model = model.partition_router
1797
-
1798
1806
  if isinstance(stream_slicer_model, list):
1799
1807
  return CartesianProductStreamSlicer(
1800
1808
  [
1801
- self._create_component_from_model(model=slicer, config=config)
1809
+ self._create_component_from_model(
1810
+ model=slicer, config=config, stream_name=stream_name or ""
1811
+ )
1802
1812
  for slicer in stream_slicer_model
1803
1813
  ],
1804
1814
  parameters={},
1805
1815
  )
1806
1816
  else:
1807
- return self._create_component_from_model(model=stream_slicer_model, config=config) # type: ignore[no-any-return]
1808
- # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1817
+ return self._create_component_from_model( # type: ignore[no-any-return] # Will be created PartitionRouter as stream_slicer_model is model.partition_router
1818
+ model=stream_slicer_model, config=config, stream_name=stream_name or ""
1819
+ )
1809
1820
  return None
1810
1821
 
1811
- def _build_resumable_cursor_from_paginator(
1822
+ def _build_incremental_cursor(
1812
1823
  self,
1813
- model: Union[AsyncRetrieverModel, CustomRetrieverModel, SimpleRetrieverModel],
1814
- stream_slicer: Optional[StreamSlicer],
1815
- ) -> Optional[StreamSlicer]:
1816
- if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1817
- # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1818
- return ResumableFullRefreshCursor(parameters={})
1819
- return None
1820
-
1821
- def _merge_stream_slicers(
1822
- self, model: DeclarativeStreamModel, config: Config
1824
+ model: DeclarativeStreamModel,
1825
+ stream_slicer: Optional[PartitionRouter],
1826
+ config: Config,
1823
1827
  ) -> Optional[StreamSlicer]:
1824
- stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
1825
-
1826
1828
  if model.incremental_sync and stream_slicer:
1827
1829
  if model.retriever.type == "AsyncRetriever":
1828
- if model.incremental_sync.type != "DatetimeBasedCursor":
1829
- # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1830
- raise ValueError(
1831
- "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1832
- )
1833
- if stream_slicer:
1834
- return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1835
- state_manager=self._connector_state_manager,
1836
- model_type=DatetimeBasedCursorModel,
1837
- component_definition=model.incremental_sync.__dict__,
1838
- stream_name=model.name or "",
1839
- stream_namespace=None,
1840
- config=config or {},
1841
- stream_state={},
1842
- partition_router=stream_slicer,
1843
- )
1844
- return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1830
+ return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1831
+ state_manager=self._connector_state_manager,
1845
1832
  model_type=DatetimeBasedCursorModel,
1846
1833
  component_definition=model.incremental_sync.__dict__,
1847
1834
  stream_name=model.name or "",
1848
1835
  stream_namespace=None,
1849
1836
  config=config or {},
1837
+ stream_state={},
1838
+ partition_router=stream_slicer,
1850
1839
  )
1851
1840
 
1852
1841
  incremental_sync_model = model.incremental_sync
1853
- if (
1842
+ cursor_component = self._create_component_from_model(
1843
+ model=incremental_sync_model, config=config
1844
+ )
1845
+ is_global_cursor = (
1854
1846
  hasattr(incremental_sync_model, "global_substream_cursor")
1855
1847
  and incremental_sync_model.global_substream_cursor
1856
- ):
1857
- cursor_component = self._create_component_from_model(
1858
- model=incremental_sync_model, config=config
1859
- )
1848
+ )
1849
+
1850
+ if is_global_cursor:
1860
1851
  return GlobalSubstreamCursor(
1861
1852
  stream_cursor=cursor_component, partition_router=stream_slicer
1862
1853
  )
1863
- else:
1864
- cursor_component = self._create_component_from_model(
1865
- model=incremental_sync_model, config=config
1866
- )
1867
- return PerPartitionWithGlobalCursor(
1868
- cursor_factory=CursorFactory(
1869
- lambda: self._create_component_from_model(
1870
- model=incremental_sync_model, config=config
1871
- ),
1854
+ return PerPartitionWithGlobalCursor(
1855
+ cursor_factory=CursorFactory(
1856
+ lambda: self._create_component_from_model(
1857
+ model=incremental_sync_model, config=config
1872
1858
  ),
1873
- partition_router=stream_slicer,
1874
- stream_cursor=cursor_component,
1875
- )
1859
+ ),
1860
+ partition_router=stream_slicer,
1861
+ stream_cursor=cursor_component,
1862
+ )
1876
1863
  elif model.incremental_sync:
1877
1864
  if model.retriever.type == "AsyncRetriever":
1878
- if model.incremental_sync.type != "DatetimeBasedCursor":
1879
- # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the support or unordered slices (for example, when we trigger reports for January and February, the report in February can be completed first). Once we have support for custom concurrent cursor or have a new implementation available in the CDK, we can enable more cursors here.
1880
- raise ValueError(
1881
- "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet"
1882
- )
1883
- if model.retriever.partition_router:
1884
- # Note that this development is also done in parallel to the per partition development which once merged we could support here by calling `create_concurrent_cursor_from_perpartition_cursor`
1885
- raise ValueError("Per partition state is not supported yet for AsyncRetriever")
1886
1865
  return self.create_concurrent_cursor_from_datetime_based_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
1887
1866
  model_type=DatetimeBasedCursorModel,
1888
1867
  component_definition=model.incremental_sync.__dict__,
@@ -1891,13 +1870,21 @@ class ModelToComponentFactory:
1891
1870
  config=config or {},
1892
1871
  stream_state_migrations=model.state_migrations,
1893
1872
  )
1894
- return (
1895
- self._create_component_from_model(model=model.incremental_sync, config=config)
1896
- if model.incremental_sync
1897
- else None
1898
- )
1899
- elif self._disable_resumable_full_refresh:
1900
- return stream_slicer
1873
+ return self._create_component_from_model(model=model.incremental_sync, config=config) # type: ignore[no-any-return] # Will be created Cursor as stream_slicer_model is model.incremental_sync
1874
+ return None
1875
+
1876
+ def _build_resumable_cursor(
1877
+ self,
1878
+ model: Union[
1879
+ AsyncRetrieverModel,
1880
+ CustomRetrieverModel,
1881
+ SimpleRetrieverModel,
1882
+ ],
1883
+ stream_slicer: Optional[PartitionRouter],
1884
+ ) -> Optional[StreamSlicer]:
1885
+ if hasattr(model, "paginator") and model.paginator and not stream_slicer:
1886
+ # For the regular Full-Refresh streams, we use the high level `ResumableFullRefreshCursor`
1887
+ return ResumableFullRefreshCursor(parameters={})
1901
1888
  elif stream_slicer:
1902
1889
  # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
1903
1890
  return PerPartitionCursor(
@@ -1906,7 +1893,47 @@ class ModelToComponentFactory:
1906
1893
  ),
1907
1894
  partition_router=stream_slicer,
1908
1895
  )
1909
- return self._build_resumable_cursor_from_paginator(model.retriever, stream_slicer)
1896
+ return None
1897
+
1898
+ def _merge_stream_slicers(
1899
+ self, model: DeclarativeStreamModel, config: Config
1900
+ ) -> Optional[StreamSlicer]:
1901
+ retriever_model = model.retriever
1902
+
1903
+ if retriever_model.type == "AsyncRetriever":
1904
+ is_not_datetime_cursor = (
1905
+ model.incremental_sync.type != "DatetimeBasedCursor"
1906
+ if model.incremental_sync
1907
+ else None
1908
+ )
1909
+ is_partition_router = (
1910
+ bool(retriever_model.partition_router) if model.incremental_sync else None
1911
+ )
1912
+
1913
+ if is_not_datetime_cursor:
1914
+ # We are currently in a transition to the Concurrent CDK and AsyncRetriever can only work with the
1915
+ # support or unordered slices (for example, when we trigger reports for January and February, the report
1916
+ # in February can be completed first). Once we have support for custom concurrent cursor or have a new
1917
+ # implementation available in the CDK, we can enable more cursors here.
1918
+ raise ValueError(
1919
+ "AsyncRetriever with cursor other than DatetimeBasedCursor is not supported yet."
1920
+ )
1921
+
1922
+ if is_partition_router:
1923
+ # Note that this development is also done in parallel to the per partition development which once merged
1924
+ # we could support here by calling create_concurrent_cursor_from_perpartition_cursor
1925
+ raise ValueError("Per partition state is not supported yet for AsyncRetriever.")
1926
+
1927
+ stream_slicer = self._build_stream_slicer_from_partition_router(retriever_model, config)
1928
+
1929
+ if model.incremental_sync:
1930
+ return self._build_incremental_cursor(model, stream_slicer, config)
1931
+
1932
+ return (
1933
+ stream_slicer
1934
+ if self._disable_resumable_full_refresh
1935
+ else self._build_resumable_cursor(retriever_model, stream_slicer)
1936
+ )
1910
1937
 
1911
1938
  def create_default_error_handler(
1912
1939
  self, model: DefaultErrorHandlerModel, config: Config, **kwargs: Any
@@ -2167,9 +2194,7 @@ class ModelToComponentFactory:
2167
2194
  self, model: DynamicSchemaLoaderModel, config: Config, **kwargs: Any
2168
2195
  ) -> DynamicSchemaLoader:
2169
2196
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
2170
- combined_slicers = self._build_resumable_cursor_from_paginator(
2171
- model.retriever, stream_slicer
2172
- )
2197
+ combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
2173
2198
 
2174
2199
  schema_transformations = []
2175
2200
  if model.schema_transformations:
@@ -2492,7 +2517,9 @@ class ModelToComponentFactory:
2492
2517
  def create_parent_stream_config(
2493
2518
  self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2494
2519
  ) -> ParentStreamConfig:
2495
- declarative_stream = self._create_component_from_model(model.stream, config=config)
2520
+ declarative_stream = self._create_component_from_model(
2521
+ model.stream, config=config, **kwargs
2522
+ )
2496
2523
  request_option = (
2497
2524
  self._create_component_from_model(model.request_option, config=config)
2498
2525
  if model.request_option
@@ -2733,6 +2760,29 @@ class ModelToComponentFactory:
2733
2760
  parameters=model.parameters or {},
2734
2761
  )
2735
2762
 
2763
+ def create_state_delegating_stream(
2764
+ self,
2765
+ model: StateDelegatingStreamModel,
2766
+ config: Config,
2767
+ has_parent_state: Optional[bool] = None,
2768
+ **kwargs: Any,
2769
+ ) -> DeclarativeStream:
2770
+ if (
2771
+ model.full_refresh_stream.name != model.name
2772
+ or model.name != model.incremental_stream.name
2773
+ ):
2774
+ raise ValueError(
2775
+ f"state_delegating_stream, full_refresh_stream name and incremental_stream must have equal names. Instead has {model.name}, {model.full_refresh_stream.name} and {model.incremental_stream.name}."
2776
+ )
2777
+
2778
+ stream_model = (
2779
+ model.incremental_stream
2780
+ if self._connector_state_manager.get_stream_state(model.name, None) or has_parent_state
2781
+ else model.full_refresh_stream
2782
+ )
2783
+
2784
+ return self._create_component_from_model(stream_model, config=config, **kwargs) # type: ignore[no-any-return] # Will be created DeclarativeStream as stream_model is stream description
2785
+
2736
2786
  def _create_async_job_status_mapping(
2737
2787
  self, model: AsyncJobStatusMapModel, config: Config, **kwargs: Any
2738
2788
  ) -> Mapping[str, AsyncJobStatus]:
@@ -2964,7 +3014,7 @@ class ModelToComponentFactory:
2964
3014
  parent_stream_configs.extend(
2965
3015
  [
2966
3016
  self._create_message_repository_substream_wrapper(
2967
- model=parent_stream_config, config=config
3017
+ model=parent_stream_config, config=config, **kwargs
2968
3018
  )
2969
3019
  for parent_stream_config in model.parent_stream_configs
2970
3020
  ]
@@ -2977,7 +3027,7 @@ class ModelToComponentFactory:
2977
3027
  )
2978
3028
 
2979
3029
  def _create_message_repository_substream_wrapper(
2980
- self, model: ParentStreamConfigModel, config: Config
3030
+ self, model: ParentStreamConfigModel, config: Config, **kwargs: Any
2981
3031
  ) -> Any:
2982
3032
  substream_factory = ModelToComponentFactory(
2983
3033
  limit_pages_fetched_per_slice=self._limit_pages_fetched_per_slice,
@@ -2991,7 +3041,16 @@ class ModelToComponentFactory:
2991
3041
  self._evaluate_log_level(self._emit_connector_builder_messages),
2992
3042
  ),
2993
3043
  )
2994
- return substream_factory._create_component_from_model(model=model, config=config)
3044
+
3045
+ # This flag will be used exclusively for StateDelegatingStream when a parent stream is created
3046
+ has_parent_state = bool(
3047
+ self._connector_state_manager.get_stream_state(kwargs.get("stream_name", ""), None)
3048
+ if model.incremental_dependency
3049
+ else False
3050
+ )
3051
+ return substream_factory._create_component_from_model(
3052
+ model=model, config=config, has_parent_state=has_parent_state, **kwargs
3053
+ )
2995
3054
 
2996
3055
  @staticmethod
2997
3056
  def create_wait_time_from_header(
@@ -3047,9 +3106,7 @@ class ModelToComponentFactory:
3047
3106
  self, model: HttpComponentsResolverModel, config: Config
3048
3107
  ) -> Any:
3049
3108
  stream_slicer = self._build_stream_slicer_from_partition_router(model.retriever, config)
3050
- combined_slicers = self._build_resumable_cursor_from_paginator(
3051
- model.retriever, stream_slicer
3052
- )
3109
+ combined_slicers = self._build_resumable_cursor(model.retriever, stream_slicer)
3053
3110
 
3054
3111
  retriever = self._create_component_from_model(
3055
3112
  model=model.retriever,
@@ -9,4 +9,9 @@ from airbyte_cdk.sources.declarative.retrievers.simple_retriever import (
9
9
  SimpleRetrieverTestReadDecorator,
10
10
  )
11
11
 
12
- __all__ = ["Retriever", "SimpleRetriever", "SimpleRetrieverTestReadDecorator", "AsyncRetriever"]
12
+ __all__ = [
13
+ "Retriever",
14
+ "SimpleRetriever",
15
+ "SimpleRetrieverTestReadDecorator",
16
+ "AsyncRetriever",
17
+ ]
@@ -36,6 +36,36 @@ class AsyncRetriever(Retriever):
36
36
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
37
37
  self._parameters = parameters
38
38
 
39
+ @property
40
+ def exit_on_rate_limit(self) -> bool:
41
+ """
42
+ Whether to exit on rate limit. This is a property of the job repository
43
+ and not the stream slicer. The stream slicer is responsible for creating
44
+ the jobs, but the job repository is responsible for managing the rate
45
+ limits and other job-related properties.
46
+
47
+ Note:
48
+ - If the `creation_requester` cannot place / create the job - it might be the case of the RateLimits
49
+ - If the `creation_requester` can place / create the job - it means all other requesters should successfully manage
50
+ to complete the results.
51
+ """
52
+ job_orchestrator = self.stream_slicer._job_orchestrator
53
+ if job_orchestrator is None:
54
+ # Default value when orchestrator is not available
55
+ return False
56
+ return job_orchestrator._job_repository.creation_requester.exit_on_rate_limit # type: ignore
57
+
58
+ @exit_on_rate_limit.setter
59
+ def exit_on_rate_limit(self, value: bool) -> None:
60
+ """
61
+ Sets the `exit_on_rate_limit` property of the job repository > creation_requester,
62
+ meaning that the Job cannot be placed / created if the rate limit is reached.
63
+ Thus no further work on managing jobs is expected to be done.
64
+ """
65
+ job_orchestrator = self.stream_slicer._job_orchestrator
66
+ if job_orchestrator is not None:
67
+ job_orchestrator._job_repository.creation_requester.exit_on_rate_limit = value # type: ignore[attr-defined, assignment]
68
+
39
69
  @property
40
70
  def state(self) -> StreamState:
41
71
  """
@@ -356,7 +356,10 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
356
356
  if "null" not in v:
357
357
  schema[k] = ["null"] + v
358
358
  elif v != "null":
359
- schema[k] = ["null", v]
359
+ if isinstance(v, (str, list)):
360
+ schema[k] = ["null", v]
361
+ else:
362
+ DefaultFileBasedStream._fill_nulls(v)
360
363
  else:
361
364
  DefaultFileBasedStream._fill_nulls(v)
362
365
  elif isinstance(schema, list):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.39.1
3
+ Version: 6.39.3
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -67,13 +67,13 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
67
67
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
68
68
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
69
69
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
70
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=rAp-sgld4n8Tmybz-51m7VcYXqKwzKDpCJVr1elmkRc,26824
70
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=0I1lOxV7oQEsUxyg7q9EgcW2zvhai4_7-IIDF79WiOU,27569
71
71
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
72
72
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
73
73
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
74
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=2kFMQC2TpM_dmNJe0vYtez5XzdFy4nnWo1WowqKG1pA,148008
74
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=TW-7fw3OXmp8hZmbvGJiW_5SCU4f6bzJbTIveQkAPZE,149239
75
75
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
76
- airbyte_cdk/sources/declarative/declarative_stream.py,sha256=venZjfpvtqr3oFSuvMBWtn4h9ayLhD4L65ACuXCDZ64,10445
76
+ airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
77
77
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
78
78
  airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=Jd7URkDQBoHSDQHQuYUqzeex1HYfLRtGcY_-dVW33pA,7884
79
79
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
@@ -109,18 +109,18 @@ airbyte_cdk/sources/declarative/interpolation/interpolated_string.py,sha256=CQkH
109
109
  airbyte_cdk/sources/declarative/interpolation/interpolation.py,sha256=9IoeuWam3L6GyN10L6U8xNWXmkt9cnahSDNkez1OmFY,982
110
110
  airbyte_cdk/sources/declarative/interpolation/jinja.py,sha256=UQeuS4Vpyp4hlOn-R3tRyeBX0e9IoV6jQ6gH-Jz8lY0,7182
111
111
  airbyte_cdk/sources/declarative/interpolation/macros.py,sha256=HQKHKnjE17zKoPn27ZpTpugRZZQSaof4GVzUUZaV2eE,5081
112
- airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=DJuLm_5iG66cyD16mRWG_rwmgNIORWSkFLxstC93Tc8,17209
112
+ airbyte_cdk/sources/declarative/manifest_declarative_source.py,sha256=bgrVP227hsiPJO6QeZy0v1kmdjrjQM63dlDTaI0pAC8,18300
113
113
  airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
114
114
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
115
115
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
116
116
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
117
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=IbSrniMEvDhmiXtArtBpC2ie5pIC0tHh1JKnBSe3EcM,104712
117
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=FIq1hfhFjMmGB2gnTo7mohxgpwmH-8C4wuiEPKxH_hQ,105591
118
118
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
119
119
  airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=jDw_TttD3_hpfevXOH-0Ws0eRuqt6wvED0BqosGPRjI,5938
120
120
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
121
121
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
122
122
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
123
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=2VkO2gqw4j8sqmuIB-4JKhle4NJrFJy586M9YPatDc0,142496
123
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=Qe28QMKiAWEWeeybPqseSL5xm-_qrgjMY-lFn4VDtJM,143588
124
124
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
125
125
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
126
126
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -169,8 +169,8 @@ airbyte_cdk/sources/declarative/resolvers/__init__.py,sha256=NiDcz5qi8HPsfX94MUm
169
169
  airbyte_cdk/sources/declarative/resolvers/components_resolver.py,sha256=KPjKc0yb9artL4ZkeqN8RmEykHH6FJgqXD7fCEnh1X0,1936
170
170
  airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=dz4iJV9liD_LzY_Mn4XmAStoUll60R3MIGWV4aN3pgg,5223
171
171
  airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=AiojNs8wItJFrENZBFUaDvau3sgwudO6Wkra36upSPo,4639
172
- airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC5RK_ZZM7ojTLBQ4IkWQTfmfCk,456
173
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=dwYZ70eg9DKHEqZydHhMFPkEILbNcXu7E-djOCikNgI,3530
172
+ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=U9Hf9OK1bWdVa3cgs2cJm_-O-wOKuvhmRzP3SckL3rg,475
173
+ airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=Fxwg53i_9R3kMNFtD3gEwZbdW8xlcXYXA5evEhrKunM,5072
174
174
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
175
175
  airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=fDhc6dMx75UImh1_TfLm4Le59tsHpqIUZnau7uIJyYw,25043
176
176
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
@@ -242,7 +242,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
242
242
  airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
243
243
  airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
244
244
  airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
245
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
245
+ airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=jyJLu2BUCYWKqrqD0ZUFxnrD0qybny7KbzKznxjIIpM,18199
246
246
  airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=FZH83Geoy3K3nwUk2VVNJERFcXUTnl-4XljjucUM23s,1893
247
247
  airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=ke82qgm7snOlQTDx94Lqsc0cDkHWi3OJDTrPxffpFqc,3914
248
248
  airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
@@ -358,9 +358,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
358
358
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
359
359
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
360
360
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
361
- airbyte_cdk-6.39.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
- airbyte_cdk-6.39.1.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
- airbyte_cdk-6.39.1.dist-info/METADATA,sha256=TYoI67spX0vCgCzJmQnZxzArXsiU-FFX6tMJNOCq914,6071
364
- airbyte_cdk-6.39.1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
- airbyte_cdk-6.39.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
- airbyte_cdk-6.39.1.dist-info/RECORD,,
361
+ airbyte_cdk-6.39.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
362
+ airbyte_cdk-6.39.3.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
363
+ airbyte_cdk-6.39.3.dist-info/METADATA,sha256=oToy3NTEXtxefbvn_Nu0CuDIH_EjOpwwZ8hmUrmsZ14,6071
364
+ airbyte_cdk-6.39.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
365
+ airbyte_cdk-6.39.3.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
366
+ airbyte_cdk-6.39.3.dist-info/RECORD,,