airbyte-cdk 6.23.0__py3-none-any.whl → 6.23.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,6 @@ import datetime
8
8
  import importlib
9
9
  import inspect
10
10
  import re
11
- import sys
12
11
  from functools import partial
13
12
  from typing import (
14
13
  Any,
@@ -88,6 +87,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
88
87
  )
89
88
  from airbyte_cdk.sources.declarative.incremental import (
90
89
  ChildPartitionResumableFullRefreshCursor,
90
+ ConcurrentCursorFactory,
91
+ ConcurrentPerPartitionCursor,
91
92
  CursorFactory,
92
93
  DatetimeBasedCursor,
93
94
  DeclarativeCursor,
@@ -102,6 +103,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
102
103
  LegacyToPerPartitionStateMigration,
103
104
  )
104
105
  from airbyte_cdk.sources.declarative.models import (
106
+ Clamping,
105
107
  CustomStateMigration,
106
108
  )
107
109
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -461,6 +463,16 @@ from airbyte_cdk.sources.message import (
461
463
  InMemoryMessageRepository,
462
464
  LogAppenderMessageRepositoryDecorator,
463
465
  MessageRepository,
466
+ NoopMessageRepository,
467
+ )
468
+ from airbyte_cdk.sources.streams.concurrent.clamping import (
469
+ ClampingEndProvider,
470
+ ClampingStrategy,
471
+ DayClampingStrategy,
472
+ MonthClampingStrategy,
473
+ NoClamping,
474
+ WeekClampingStrategy,
475
+ Weekday,
464
476
  )
465
477
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
466
478
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -917,6 +929,8 @@ class ModelToComponentFactory:
917
929
  stream_namespace: Optional[str],
918
930
  config: Config,
919
931
  stream_state: MutableMapping[str, Any],
932
+ message_repository: Optional[MessageRepository] = None,
933
+ runtime_lookback_window: Optional[datetime.timedelta] = None,
920
934
  **kwargs: Any,
921
935
  ) -> ConcurrentCursor:
922
936
  component_type = component_definition.get("type")
@@ -978,10 +992,22 @@ class ModelToComponentFactory:
978
992
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
979
993
  datetime_format=datetime_format,
980
994
  input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
981
- is_sequential_state=True,
995
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
982
996
  cursor_granularity=cursor_granularity,
983
997
  )
984
998
 
999
+ # Adjusts the stream state by applying the runtime lookback window.
1000
+ # This is used to ensure correct state handling in case of failed partitions.
1001
+ stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1002
+ if runtime_lookback_window and stream_state_value:
1003
+ new_stream_state = (
1004
+ connector_state_converter.parse_timestamp(stream_state_value)
1005
+ - runtime_lookback_window
1006
+ )
1007
+ stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1008
+ new_stream_state
1009
+ )
1010
+
985
1011
  start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
986
1012
  if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
987
1013
  start_date_runtime_value = self.create_min_max_datetime(
@@ -1048,11 +1074,58 @@ class ModelToComponentFactory:
1048
1074
  if evaluated_step:
1049
1075
  step_length = parse_duration(evaluated_step)
1050
1076
 
1077
+ clamping_strategy: ClampingStrategy = NoClamping()
1078
+ if datetime_based_cursor_model.clamping:
1079
+ # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1080
+ # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1081
+ # object which we want to keep agnostic of being low-code
1082
+ target = InterpolatedString(
1083
+ string=datetime_based_cursor_model.clamping.target,
1084
+ parameters=datetime_based_cursor_model.parameters or {},
1085
+ )
1086
+ evaluated_target = target.eval(config=config)
1087
+ match evaluated_target:
1088
+ case "DAY":
1089
+ clamping_strategy = DayClampingStrategy()
1090
+ end_date_provider = ClampingEndProvider(
1091
+ DayClampingStrategy(is_ceiling=False),
1092
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1093
+ granularity=cursor_granularity or datetime.timedelta(seconds=1),
1094
+ )
1095
+ case "WEEK":
1096
+ if (
1097
+ not datetime_based_cursor_model.clamping.target_details
1098
+ or "weekday" not in datetime_based_cursor_model.clamping.target_details
1099
+ ):
1100
+ raise ValueError(
1101
+ "Given WEEK clamping, weekday needs to be provided as target_details"
1102
+ )
1103
+ weekday = self._assemble_weekday(
1104
+ datetime_based_cursor_model.clamping.target_details["weekday"]
1105
+ )
1106
+ clamping_strategy = WeekClampingStrategy(weekday)
1107
+ end_date_provider = ClampingEndProvider(
1108
+ WeekClampingStrategy(weekday, is_ceiling=False),
1109
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1110
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1111
+ )
1112
+ case "MONTH":
1113
+ clamping_strategy = MonthClampingStrategy()
1114
+ end_date_provider = ClampingEndProvider(
1115
+ MonthClampingStrategy(is_ceiling=False),
1116
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1117
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1118
+ )
1119
+ case _:
1120
+ raise ValueError(
1121
+ f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1122
+ )
1123
+
1051
1124
  return ConcurrentCursor(
1052
1125
  stream_name=stream_name,
1053
1126
  stream_namespace=stream_namespace,
1054
1127
  stream_state=stream_state,
1055
- message_repository=self._message_repository,
1128
+ message_repository=message_repository or self._message_repository,
1056
1129
  connector_state_manager=state_manager,
1057
1130
  connector_state_converter=connector_state_converter,
1058
1131
  cursor_field=cursor_field,
@@ -1062,6 +1135,83 @@ class ModelToComponentFactory:
1062
1135
  lookback_window=lookback_window,
1063
1136
  slice_range=step_length,
1064
1137
  cursor_granularity=cursor_granularity,
1138
+ clamping_strategy=clamping_strategy,
1139
+ )
1140
+
1141
+ def _assemble_weekday(self, weekday: str) -> Weekday:
1142
+ match weekday:
1143
+ case "MONDAY":
1144
+ return Weekday.MONDAY
1145
+ case "TUESDAY":
1146
+ return Weekday.TUESDAY
1147
+ case "WEDNESDAY":
1148
+ return Weekday.WEDNESDAY
1149
+ case "THURSDAY":
1150
+ return Weekday.THURSDAY
1151
+ case "FRIDAY":
1152
+ return Weekday.FRIDAY
1153
+ case "SATURDAY":
1154
+ return Weekday.SATURDAY
1155
+ case "SUNDAY":
1156
+ return Weekday.SUNDAY
1157
+ case _:
1158
+ raise ValueError(f"Unknown weekday {weekday}")
1159
+
1160
+ def create_concurrent_cursor_from_perpartition_cursor(
1161
+ self,
1162
+ state_manager: ConnectorStateManager,
1163
+ model_type: Type[BaseModel],
1164
+ component_definition: ComponentDefinition,
1165
+ stream_name: str,
1166
+ stream_namespace: Optional[str],
1167
+ config: Config,
1168
+ stream_state: MutableMapping[str, Any],
1169
+ partition_router: PartitionRouter,
1170
+ **kwargs: Any,
1171
+ ) -> ConcurrentPerPartitionCursor:
1172
+ component_type = component_definition.get("type")
1173
+ if component_definition.get("type") != model_type.__name__:
1174
+ raise ValueError(
1175
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1176
+ )
1177
+
1178
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1179
+
1180
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1181
+ raise ValueError(
1182
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1183
+ )
1184
+
1185
+ interpolated_cursor_field = InterpolatedString.create(
1186
+ datetime_based_cursor_model.cursor_field,
1187
+ parameters=datetime_based_cursor_model.parameters or {},
1188
+ )
1189
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1190
+
1191
+ # Create the cursor factory
1192
+ cursor_factory = ConcurrentCursorFactory(
1193
+ partial(
1194
+ self.create_concurrent_cursor_from_datetime_based_cursor,
1195
+ state_manager=state_manager,
1196
+ model_type=model_type,
1197
+ component_definition=component_definition,
1198
+ stream_name=stream_name,
1199
+ stream_namespace=stream_namespace,
1200
+ config=config,
1201
+ message_repository=NoopMessageRepository(),
1202
+ )
1203
+ )
1204
+
1205
+ # Return the concurrent cursor and state converter
1206
+ return ConcurrentPerPartitionCursor(
1207
+ cursor_factory=cursor_factory,
1208
+ partition_router=partition_router,
1209
+ stream_name=stream_name,
1210
+ stream_namespace=stream_namespace,
1211
+ stream_state=stream_state,
1212
+ message_repository=self._message_repository, # type: ignore
1213
+ connector_state_manager=state_manager,
1214
+ cursor_field=cursor_field,
1065
1215
  )
1066
1216
 
1067
1217
  @staticmethod
@@ -1369,18 +1519,15 @@ class ModelToComponentFactory:
1369
1519
  raise ValueError(
1370
1520
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1371
1521
  )
1372
- client_side_incremental_sync = {
1373
- "date_time_based_cursor": self._create_component_from_model(
1374
- model=model.incremental_sync, config=config
1375
- ),
1376
- "substream_cursor": (
1377
- combined_slicers
1378
- if isinstance(
1379
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1380
- )
1381
- else None
1382
- ),
1383
- }
1522
+ cursor = (
1523
+ combined_slicers
1524
+ if isinstance(
1525
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1526
+ )
1527
+ else self._create_component_from_model(model=model.incremental_sync, config=config)
1528
+ )
1529
+
1530
+ client_side_incremental_sync = {"cursor": cursor}
1384
1531
 
1385
1532
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1386
1533
  cursor_model = model.incremental_sync
@@ -2227,7 +2374,7 @@ class ModelToComponentFactory:
2227
2374
  if (
2228
2375
  not isinstance(stream_slicer, DatetimeBasedCursor)
2229
2376
  or type(stream_slicer) is not DatetimeBasedCursor
2230
- ):
2377
+ ) and not isinstance(stream_slicer, PerPartitionWithGlobalCursor):
2231
2378
  # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
2232
2379
  # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
2233
2380
  # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
160
160
  stream_slice,
161
161
  next_page_token,
162
162
  self._paginator.get_request_headers,
163
- self.stream_slicer.get_request_headers,
163
+ self.request_option_provider.get_request_headers,
164
164
  )
165
165
  if isinstance(headers, str):
166
166
  raise ValueError("Request headers cannot be a string")
@@ -0,0 +1,99 @@
1
+ from abc import ABC
2
+ from datetime import datetime, timedelta
3
+ from enum import Enum
4
+ from typing import Callable
5
+
6
+ from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType
7
+
8
+
9
+ class ClampingStrategy(ABC):
10
+ def clamp(self, value: CursorValueType) -> CursorValueType:
11
+ raise NotImplementedError()
12
+
13
+
14
+ class NoClamping(ClampingStrategy):
15
+ def clamp(self, value: CursorValueType) -> CursorValueType:
16
+ return value
17
+
18
+
19
+ class ClampingEndProvider:
20
+ def __init__(
21
+ self,
22
+ clamping_strategy: ClampingStrategy,
23
+ end_provider: Callable[[], CursorValueType],
24
+ granularity: timedelta,
25
+ ) -> None:
26
+ self._clamping_strategy = clamping_strategy
27
+ self._end_provider = end_provider
28
+ self._granularity = granularity
29
+
30
+ def __call__(self) -> CursorValueType:
31
+ return self._clamping_strategy.clamp(self._end_provider()) - self._granularity
32
+
33
+
34
+ class DayClampingStrategy(ClampingStrategy):
35
+ def __init__(self, is_ceiling: bool = True) -> None:
36
+ self._is_ceiling = is_ceiling
37
+
38
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
39
+ return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
40
+ if self._is_ceiling:
41
+ return return_value + timedelta(days=1)
42
+ return return_value
43
+
44
+
45
+ class MonthClampingStrategy(ClampingStrategy):
46
+ def __init__(self, is_ceiling: bool = True) -> None:
47
+ self._is_ceiling = is_ceiling
48
+
49
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
50
+ return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
51
+ needs_to_round = value.day != 1
52
+ if not needs_to_round:
53
+ return return_value
54
+
55
+ return self._ceil(return_value) if self._is_ceiling else return_value.replace(day=1)
56
+
57
+ def _ceil(self, value: datetime) -> datetime:
58
+ return value.replace(
59
+ year=value.year + 1 if value.month == 12 else value.year,
60
+ month=(value.month % 12) + 1,
61
+ day=1,
62
+ hour=0,
63
+ minute=0,
64
+ second=0,
65
+ microsecond=0,
66
+ )
67
+
68
+
69
+ class Weekday(Enum):
70
+ """
71
+ These integer values map to the same ones used by the Datetime.date.weekday() implementation
72
+ """
73
+
74
+ MONDAY = 0
75
+ TUESDAY = 1
76
+ WEDNESDAY = 2
77
+ THURSDAY = 3
78
+ FRIDAY = 4
79
+ SATURDAY = 5
80
+ SUNDAY = 6
81
+
82
+
83
+ class WeekClampingStrategy(ClampingStrategy):
84
+ def __init__(self, day_of_week: Weekday, is_ceiling: bool = True) -> None:
85
+ self._day_of_week = day_of_week.value
86
+ self._is_ceiling = is_ceiling
87
+
88
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
89
+ days_diff_to_ceiling = (
90
+ 7 - (value.weekday() - self._day_of_week)
91
+ if value.weekday() > self._day_of_week
92
+ else abs(value.weekday() - self._day_of_week)
93
+ )
94
+ delta = (
95
+ timedelta(days_diff_to_ceiling)
96
+ if self._is_ceiling
97
+ else timedelta(days_diff_to_ceiling - 7)
98
+ )
99
+ return value.replace(hour=0, minute=0, second=0, microsecond=0) + delta
@@ -13,7 +13,6 @@ from typing import (
13
13
  Mapping,
14
14
  MutableMapping,
15
15
  Optional,
16
- Protocol,
17
16
  Tuple,
18
17
  Union,
19
18
  )
@@ -21,6 +20,8 @@ from typing import (
21
20
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
22
21
  from airbyte_cdk.sources.message import MessageRepository
23
22
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
23
+ from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
24
+ from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
24
25
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
26
  from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
26
27
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
@@ -35,36 +36,6 @@ def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
35
36
  return functools.reduce(lambda a, b: a[b], path, mapping)
36
37
 
37
38
 
38
- class GapType(Protocol):
39
- """
40
- This is the representation of gaps between two cursor values. Examples:
41
- * if cursor values are datetimes, GapType is timedelta
42
- * if cursor values are integer, GapType will also be integer
43
- """
44
-
45
- pass
46
-
47
-
48
- class CursorValueType(Protocol):
49
- """Protocol for annotating comparable types."""
50
-
51
- @abstractmethod
52
- def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
53
- pass
54
-
55
- @abstractmethod
56
- def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
57
- pass
58
-
59
- @abstractmethod
60
- def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
61
- pass
62
-
63
- @abstractmethod
64
- def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
65
- pass
66
-
67
-
68
39
  class CursorField:
69
40
  def __init__(self, cursor_field_key: str) -> None:
70
41
  self.cursor_field_key = cursor_field_key
@@ -172,6 +143,7 @@ class ConcurrentCursor(Cursor):
172
143
  lookback_window: Optional[GapType] = None,
173
144
  slice_range: Optional[GapType] = None,
174
145
  cursor_granularity: Optional[GapType] = None,
146
+ clamping_strategy: ClampingStrategy = NoClamping(),
175
147
  ) -> None:
176
148
  self._stream_name = stream_name
177
149
  self._stream_namespace = stream_namespace
@@ -193,10 +165,13 @@ class ConcurrentCursor(Cursor):
193
165
  self._cursor_granularity = cursor_granularity
194
166
  # Flag to track if the logger has been triggered (per stream)
195
167
  self._should_be_synced_logger_triggered = False
168
+ self._clamping_strategy = clamping_strategy
196
169
 
197
170
  @property
198
171
  def state(self) -> MutableMapping[str, Any]:
199
- return self._concurrent_state
172
+ return self._connector_state_converter.convert_to_state_message(
173
+ self.cursor_field, self._concurrent_state
174
+ )
200
175
 
201
176
  @property
202
177
  def cursor_field(self) -> CursorField:
@@ -241,10 +216,10 @@ class ConcurrentCursor(Cursor):
241
216
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
242
217
 
243
218
  def close_partition(self, partition: Partition) -> None:
244
- slice_count_before = len(self.state.get("slices", []))
219
+ slice_count_before = len(self._concurrent_state.get("slices", []))
245
220
  self._add_slice_to_state(partition)
246
221
  if slice_count_before < len(
247
- self.state["slices"]
222
+ self._concurrent_state["slices"]
248
223
  ): # only emit if at least one slice has been processed
249
224
  self._merge_partitions()
250
225
  self._emit_state_message()
@@ -256,11 +231,11 @@ class ConcurrentCursor(Cursor):
256
231
  )
257
232
 
258
233
  if self._slice_boundary_fields:
259
- if "slices" not in self.state:
234
+ if "slices" not in self._concurrent_state:
260
235
  raise RuntimeError(
261
236
  f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
262
237
  )
263
- self.state["slices"].append(
238
+ self._concurrent_state["slices"].append(
264
239
  {
265
240
  self._connector_state_converter.START_KEY: self._extract_from_slice(
266
241
  partition, self._slice_boundary_fields[self._START_BOUNDARY]
@@ -288,7 +263,7 @@ class ConcurrentCursor(Cursor):
288
263
  "expected. Please contact the Airbyte team."
289
264
  )
290
265
 
291
- self.state["slices"].append(
266
+ self._concurrent_state["slices"].append(
292
267
  {
293
268
  self._connector_state_converter.START_KEY: self.start,
294
269
  self._connector_state_converter.END_KEY: most_recent_cursor_value,
@@ -300,9 +275,7 @@ class ConcurrentCursor(Cursor):
300
275
  self._connector_state_manager.update_state_for_stream(
301
276
  self._stream_name,
302
277
  self._stream_namespace,
303
- self._connector_state_converter.convert_to_state_message(
304
- self._cursor_field, self.state
305
- ),
278
+ self.state,
306
279
  )
307
280
  state_message = self._connector_state_manager.create_state_message(
308
281
  self._stream_name, self._stream_namespace
@@ -310,7 +283,9 @@ class ConcurrentCursor(Cursor):
310
283
  self._message_repository.emit_message(state_message)
311
284
 
312
285
  def _merge_partitions(self) -> None:
313
- self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
286
+ self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
287
+ self._concurrent_state["slices"]
288
+ )
314
289
 
315
290
  def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
316
291
  try:
@@ -347,36 +322,42 @@ class ConcurrentCursor(Cursor):
347
322
  if self._start is not None and self._is_start_before_first_slice():
348
323
  yield from self._split_per_slice_range(
349
324
  self._start,
350
- self.state["slices"][0][self._connector_state_converter.START_KEY],
325
+ self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
351
326
  False,
352
327
  )
353
328
 
354
- if len(self.state["slices"]) == 1:
329
+ if len(self._concurrent_state["slices"]) == 1:
355
330
  yield from self._split_per_slice_range(
356
331
  self._calculate_lower_boundary_of_last_slice(
357
- self.state["slices"][0][self._connector_state_converter.END_KEY]
332
+ self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
358
333
  ),
359
334
  self._end_provider(),
360
335
  True,
361
336
  )
362
- elif len(self.state["slices"]) > 1:
363
- for i in range(len(self.state["slices"]) - 1):
337
+ elif len(self._concurrent_state["slices"]) > 1:
338
+ for i in range(len(self._concurrent_state["slices"]) - 1):
364
339
  if self._cursor_granularity:
365
340
  yield from self._split_per_slice_range(
366
- self.state["slices"][i][self._connector_state_converter.END_KEY]
341
+ self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
367
342
  + self._cursor_granularity,
368
- self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
343
+ self._concurrent_state["slices"][i + 1][
344
+ self._connector_state_converter.START_KEY
345
+ ],
369
346
  False,
370
347
  )
371
348
  else:
372
349
  yield from self._split_per_slice_range(
373
- self.state["slices"][i][self._connector_state_converter.END_KEY],
374
- self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
350
+ self._concurrent_state["slices"][i][
351
+ self._connector_state_converter.END_KEY
352
+ ],
353
+ self._concurrent_state["slices"][i + 1][
354
+ self._connector_state_converter.START_KEY
355
+ ],
375
356
  False,
376
357
  )
377
358
  yield from self._split_per_slice_range(
378
359
  self._calculate_lower_boundary_of_last_slice(
379
- self.state["slices"][-1][self._connector_state_converter.END_KEY]
360
+ self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
380
361
  ),
381
362
  self._end_provider(),
382
363
  True,
@@ -387,7 +368,8 @@ class ConcurrentCursor(Cursor):
387
368
  def _is_start_before_first_slice(self) -> bool:
388
369
  return (
389
370
  self._start is not None
390
- and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
371
+ and self._start
372
+ < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
391
373
  )
392
374
 
393
375
  def _calculate_lower_boundary_of_last_slice(
@@ -408,10 +390,12 @@ class ConcurrentCursor(Cursor):
408
390
 
409
391
  lower = max(lower, self._start) if self._start else lower
410
392
  if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
393
+ clamped_lower = self._clamping_strategy.clamp(lower)
394
+ clamped_upper = self._clamping_strategy.clamp(upper)
411
395
  start_value, end_value = (
412
- (lower, upper - self._cursor_granularity)
396
+ (clamped_lower, clamped_upper - self._cursor_granularity)
413
397
  if self._cursor_granularity and not upper_is_end
414
- else (lower, upper)
398
+ else (clamped_lower, clamped_upper)
415
399
  )
416
400
  yield StreamSlice(
417
401
  partition={},
@@ -433,11 +417,21 @@ class ConcurrentCursor(Cursor):
433
417
  )
434
418
  has_reached_upper_boundary = current_upper_boundary >= upper
435
419
 
420
+ clamped_upper = (
421
+ self._clamping_strategy.clamp(current_upper_boundary)
422
+ if current_upper_boundary != upper
423
+ else current_upper_boundary
424
+ )
425
+ clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
426
+ if clamped_lower >= clamped_upper:
427
+ # clamping collapsed both values which means that it is time to stop processing
428
+ # FIXME should this be replace by proper end_provider
429
+ break
436
430
  start_value, end_value = (
437
- (current_lower_boundary, current_upper_boundary - self._cursor_granularity)
431
+ (clamped_lower, clamped_upper - self._cursor_granularity)
438
432
  if self._cursor_granularity
439
433
  and (not upper_is_end or not has_reached_upper_boundary)
440
- else (current_lower_boundary, current_upper_boundary)
434
+ else (clamped_lower, clamped_upper)
441
435
  )
442
436
  yield StreamSlice(
443
437
  partition={},
@@ -450,7 +444,7 @@ class ConcurrentCursor(Cursor):
450
444
  ]: self._connector_state_converter.output_format(end_value),
451
445
  },
452
446
  )
453
- current_lower_boundary = current_upper_boundary
447
+ current_lower_boundary = clamped_upper
454
448
  if current_upper_boundary >= upper:
455
449
  stop_processing = True
456
450
 
@@ -0,0 +1,32 @@
1
+ from abc import abstractmethod
2
+ from typing import Protocol
3
+
4
+
5
+ class GapType(Protocol):
6
+ """
7
+ This is the representation of gaps between two cursor values. Examples:
8
+ * if cursor values are datetimes, GapType is timedelta
9
+ * if cursor values are integer, GapType will also be integer
10
+ """
11
+
12
+ pass
13
+
14
+
15
+ class CursorValueType(Protocol):
16
+ """Protocol for annotating comparable types."""
17
+
18
+ @abstractmethod
19
+ def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
24
+ pass
25
+
26
+ @abstractmethod
27
+ def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
28
+ pass
29
+
30
+ @abstractmethod
31
+ def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
32
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.23.0
3
+ Version: 6.23.0.dev1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk