airbyte-cdk 6.7.2__py3-none-any.whl → 6.7.2.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,6 +49,7 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
49
49
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
50
50
  AlwaysAvailableAvailabilityStrategy,
51
51
  )
52
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
52
53
  from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
53
54
  from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
54
55
  from airbyte_cdk.sources.types import Config, StreamState
@@ -69,6 +70,15 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
69
70
  component_factory: Optional[ModelToComponentFactory] = None,
70
71
  **kwargs: Any,
71
72
  ) -> None:
73
+ # To reduce the complexity of the concurrent framework, we are not enabling RFR with synthetic
74
+ # cursors. We do this by no longer automatically instantiating RFR cursors when converting
75
+ # the declarative models into runtime components. Concurrent sources will continue to checkpoint
76
+ # incremental streams running in full refresh.
77
+ component_factory = component_factory or ModelToComponentFactory(
78
+ emit_connector_builder_messages=emit_connector_builder_messages,
79
+ disable_resumable_full_refresh=True,
80
+ )
81
+
72
82
  super().__init__(
73
83
  source_config=source_config,
74
84
  debug=debug,
@@ -191,13 +201,24 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
191
201
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
192
202
  # so we need to treat them as synchronous
193
203
  if isinstance(declarative_stream, DeclarativeStream):
194
- datetime_based_cursor_component_definition = name_to_stream_mapping[
204
+ incremental_sync_component_definition = name_to_stream_mapping[
195
205
  declarative_stream.name
196
206
  ].get("incremental_sync")
197
207
 
208
+ partition_router_component_definition = (
209
+ name_to_stream_mapping[declarative_stream.name]
210
+ .get("retriever")
211
+ .get("partition_router")
212
+ )
213
+
214
+ is_substream_without_incremental = (
215
+ partition_router_component_definition
216
+ and not incremental_sync_component_definition
217
+ )
218
+
198
219
  if (
199
- datetime_based_cursor_component_definition
200
- and datetime_based_cursor_component_definition.get("type", "")
220
+ incremental_sync_component_definition
221
+ and incremental_sync_component_definition.get("type", "")
201
222
  == DatetimeBasedCursorModel.__name__
202
223
  and self._stream_supports_concurrent_partition_processing(
203
224
  declarative_stream=declarative_stream
@@ -213,7 +234,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
213
234
  self._constructor.create_concurrent_cursor_from_datetime_based_cursor(
214
235
  state_manager=state_manager,
215
236
  model_type=DatetimeBasedCursorModel,
216
- component_definition=datetime_based_cursor_component_definition,
237
+ component_definition=incremental_sync_component_definition,
217
238
  stream_name=declarative_stream.name,
218
239
  stream_namespace=declarative_stream.namespace,
219
240
  config=config or {},
@@ -247,6 +268,41 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
247
268
  cursor=cursor,
248
269
  )
249
270
  )
271
+ elif is_substream_without_incremental and hasattr(
272
+ declarative_stream.retriever, "stream_slicer"
273
+ ):
274
+ partition_generator = StreamSlicerPartitionGenerator(
275
+ DeclarativePartitionFactory(
276
+ declarative_stream.name,
277
+ declarative_stream.get_json_schema(),
278
+ self._retriever_factory(
279
+ name_to_stream_mapping[declarative_stream.name],
280
+ config,
281
+ {},
282
+ ),
283
+ self.message_repository,
284
+ ),
285
+ declarative_stream.retriever.stream_slicer,
286
+ )
287
+
288
+ final_state_cursor = FinalStateCursor(
289
+ stream_name=declarative_stream.name,
290
+ stream_namespace=declarative_stream.namespace,
291
+ message_repository=self.message_repository,
292
+ )
293
+
294
+ concurrent_streams.append(
295
+ DefaultStream(
296
+ partition_generator=partition_generator,
297
+ name=declarative_stream.name,
298
+ json_schema=declarative_stream.get_json_schema(),
299
+ availability_strategy=AlwaysAvailableAvailabilityStrategy(),
300
+ primary_key=get_primary_key_from_stream(declarative_stream.primary_key),
301
+ cursor_field=None,
302
+ logger=self.logger,
303
+ cursor=final_state_cursor,
304
+ )
305
+ )
250
306
  else:
251
307
  synchronous_streams.append(declarative_stream)
252
308
  else:
@@ -387,6 +387,7 @@ class ModelToComponentFactory:
387
387
  emit_connector_builder_messages: bool = False,
388
388
  disable_retries: bool = False,
389
389
  disable_cache: bool = False,
390
+ disable_resumable_full_refresh: bool = False,
390
391
  message_repository: Optional[MessageRepository] = None,
391
392
  ):
392
393
  self._init_mappings()
@@ -395,6 +396,7 @@ class ModelToComponentFactory:
395
396
  self._emit_connector_builder_messages = emit_connector_builder_messages
396
397
  self._disable_retries = disable_retries
397
398
  self._disable_cache = disable_cache
399
+ self._disable_resumable_full_refresh = disable_resumable_full_refresh
398
400
  self._message_repository = message_repository or InMemoryMessageRepository( # type: ignore
399
401
  self._evaluate_log_level(emit_connector_builder_messages)
400
402
  )
@@ -1339,6 +1341,8 @@ class ModelToComponentFactory:
1339
1341
  if model.incremental_sync
1340
1342
  else None
1341
1343
  )
1344
+ elif self._disable_resumable_full_refresh:
1345
+ return stream_slicer
1342
1346
  elif stream_slicer:
1343
1347
  # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
1344
1348
  return PerPartitionCursor(
@@ -5,7 +5,18 @@
5
5
  import functools
6
6
  import logging
7
7
  from abc import ABC, abstractmethod
8
- from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Protocol, Tuple
8
+ from typing import (
9
+ Any,
10
+ Callable,
11
+ Iterable,
12
+ List,
13
+ Mapping,
14
+ MutableMapping,
15
+ Optional,
16
+ Protocol,
17
+ Tuple,
18
+ Union,
19
+ )
9
20
 
10
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
11
22
  from airbyte_cdk.sources.message import MessageRepository
@@ -175,7 +186,9 @@ class ConcurrentCursor(Cursor):
175
186
  self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
176
187
  self._lookback_window = lookback_window
177
188
  self._slice_range = slice_range
178
- self._most_recent_cursor_value_per_partition: MutableMapping[StreamSlice, Any] = {}
189
+ self._most_recent_cursor_value_per_partition: MutableMapping[
190
+ Union[StreamSlice, Mapping[str, Any], None], Any
191
+ ] = {}
179
192
  self._has_closed_at_least_one_slice = False
180
193
  self._cursor_granularity = cursor_granularity
181
194
  # Flag to track if the logger has been triggered (per stream)
@@ -216,10 +229,13 @@ class ConcurrentCursor(Cursor):
216
229
  most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
217
230
  record.associated_slice
218
231
  )
219
- cursor_value = self._extract_cursor_value(record)
232
+ try:
233
+ cursor_value = self._extract_cursor_value(record)
220
234
 
221
- if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
222
- self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value
235
+ if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
236
+ self._most_recent_cursor_value_per_partition[record.associated_slice] = cursor_value
237
+ except ValueError:
238
+ self._log_for_record_without_cursor_value()
223
239
 
224
240
  def _extract_cursor_value(self, record: Record) -> Any:
225
241
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
@@ -459,10 +475,13 @@ class ConcurrentCursor(Cursor):
459
475
  try:
460
476
  record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
461
477
  except ValueError:
462
- if not self._should_be_synced_logger_triggered:
463
- LOGGER.warning(
464
- f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record. The incremental sync will assume it needs to be synced"
465
- )
466
- self._should_be_synced_logger_triggered = True
478
+ self._log_for_record_without_cursor_value()
467
479
  return True
468
480
  return self.start <= record_cursor_value <= self._end_provider()
481
+
482
+ def _log_for_record_without_cursor_value(self) -> None:
483
+ if not self._should_be_synced_logger_triggered:
484
+ LOGGER.warning(
485
+ f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record for stream {self._stream_name}. The incremental sync will assume it needs to be synced"
486
+ )
487
+ self._should_be_synced_logger_triggered = True
@@ -67,6 +67,7 @@ class DefaultStream(AbstractStream):
67
67
  name=self.name,
68
68
  json_schema=dict(self._json_schema),
69
69
  supported_sync_modes=[SyncMode.full_refresh],
70
+ is_resumable=False,
70
71
  )
71
72
 
72
73
  if self._namespace:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.7.2
3
+ Version: 6.7.2.dev1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -62,7 +62,7 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=dAA-UhmMj0WLXCkRQr
62
62
  airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
63
63
  airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
64
64
  airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
65
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=XQVc7xMRffm9v4qwDMTGIfWzNB2pf9GvILB4MvB0icU,19818
65
+ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=k2rHM3FIyoRUCZ7egwO36gd9fyX_J1ZokfAtpVzNvac,22641
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=8VZJP18eJLabSPP1XBSPDaagUBG6q1ynIiPJy3rE2mc,5344
@@ -109,7 +109,7 @@ airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQ
109
109
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
110
110
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=jVZ3ZV5YZrmDNIX5cM2mugXmnbH27zHRcD22_3oatpo,8454
111
111
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
112
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=4WksEBtXaUqVtCO1xMCvcwxo72lOAfflCpY9hWqXcLg,96693
112
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=-1J_fjk3XQB42ApcpJEZmu5EqHiGmw5e1i42iGVmCWI,96909
113
113
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=8uGos2u7TFTx_EJBdcjdUGn3Eyx6jUuEa1_VB8UP_dI,631
114
114
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
115
115
  airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
@@ -245,8 +245,8 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=sVFzrJq9YVuMalf
245
245
  airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
246
246
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=f2TmcQaDRN9ttiaD_wsgDCCXUG4C_UtIQy19yd49tp0,15176
247
247
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=xqErZU9v9QTe9Fv-MSJAICABs3Ke27mdA7QpgyFFj8g,2877
248
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=D_kQxKAmIwgs3eoJeVZPTjMToRT1N2FGd2RR8RnpX90,20555
249
- airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=WdZYzION3q6nIhIIcpFqlovDcouOHdbnB0U1YIDP2Jk,3175
248
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=JX1MEtQZzRhYlr8wghBd5uC5geO7_Xh2aA4wIFDLE8s,20897
249
+ airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=K3rLMpYhS7nnmvwQ52lqBy7DQdFMJpvvT7sgBg_ckA8,3207
250
250
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
251
251
  airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=gtj9p0clZwgnClrIRH6V2Wl0Jwu11Plq-9FP4FU2VQA,1327
252
252
  airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=2t64b_z9cEPmlHZnjSiMTO8PEtEdiAJDG0JcYOtUqAE,3363
@@ -330,8 +330,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
330
330
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=LVc9KbtMeV_z99jWo0Ou8u4l6eBJ0BWNhxj4zrrGKRs,763
331
331
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
332
332
  airbyte_cdk/utils/traced_exception.py,sha256=a6q51tBS3IdtefuOiL1eBwSmnNAXfjFMlMjSIQ_Tl-o,6165
333
- airbyte_cdk-6.7.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
334
- airbyte_cdk-6.7.2.dist-info/METADATA,sha256=Vwh7KkG8etaFx_bfo5a_sJD4mjVJxA6oT_r8Czej8Tk,13519
335
- airbyte_cdk-6.7.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
336
- airbyte_cdk-6.7.2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
337
- airbyte_cdk-6.7.2.dist-info/RECORD,,
333
+ airbyte_cdk-6.7.2.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
334
+ airbyte_cdk-6.7.2.dev1.dist-info/METADATA,sha256=ma4LYuHRWl3TGQ5cg8No2xG-XO96j3SL29pdnV2YLdw,13524
335
+ airbyte_cdk-6.7.2.dev1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
336
+ airbyte_cdk-6.7.2.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
337
+ airbyte_cdk-6.7.2.dev1.dist-info/RECORD,,