airbyte-cdk 0.58.9__py3-none-any.whl → 0.59.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,8 @@
3
3
  #
4
4
  import functools
5
5
  from abc import ABC, abstractmethod
6
- from typing import Any, List, Mapping, Optional, Protocol, Tuple
6
+ from datetime import datetime
7
+ from typing import Any, List, Mapping, MutableMapping, Optional, Protocol, Tuple
7
8
 
8
9
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
9
10
  from airbyte_cdk.sources.message import MessageRepository
@@ -36,6 +37,11 @@ class CursorField:
36
37
 
37
38
 
38
39
  class Cursor(ABC):
40
+ @property
41
+ @abstractmethod
42
+ def state(self) -> MutableMapping[str, Any]:
43
+ ...
44
+
39
45
  @abstractmethod
40
46
  def observe(self, record: Record) -> None:
41
47
  """
@@ -52,6 +58,10 @@ class Cursor(ABC):
52
58
 
53
59
 
54
60
  class NoopCursor(Cursor):
61
+ @property
62
+ def state(self) -> MutableMapping[str, Any]:
63
+ return {}
64
+
55
65
  def observe(self, record: Record) -> None:
56
66
  pass
57
67
 
@@ -73,6 +83,7 @@ class ConcurrentCursor(Cursor):
73
83
  connector_state_converter: AbstractStreamStateConverter,
74
84
  cursor_field: CursorField,
75
85
  slice_boundary_fields: Optional[Tuple[str, str]],
86
+ start: Optional[Any],
76
87
  ) -> None:
77
88
  self._stream_name = stream_name
78
89
  self._stream_namespace = stream_namespace
@@ -82,9 +93,19 @@ class ConcurrentCursor(Cursor):
82
93
  self._cursor_field = cursor_field
83
94
  # To see some example where the slice boundaries might not be defined, check https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L363-L379
84
95
  self._slice_boundary_fields = slice_boundary_fields if slice_boundary_fields else tuple()
96
+ self._start = start
85
97
  self._most_recent_record: Optional[Record] = None
86
98
  self._has_closed_at_least_one_slice = False
87
- self.state = stream_state
99
+ self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
100
+
101
+ @property
102
+ def state(self) -> MutableMapping[str, Any]:
103
+ return self._concurrent_state
104
+
105
+ def _get_concurrent_state(self, state: MutableMapping[str, Any]) -> Tuple[datetime, MutableMapping[str, Any]]:
106
+ if self._connector_state_converter.is_state_message_compatible(state):
107
+ return self._start or self._connector_state_converter.zero_value, self._connector_state_converter.deserialize(state)
108
+ return self._connector_state_converter.convert_from_sequential_state(self._cursor_field, state, self._start)
88
109
 
89
110
  def observe(self, record: Record) -> None:
90
111
  if self._slice_boundary_fields:
@@ -102,7 +123,7 @@ class ConcurrentCursor(Cursor):
102
123
  def close_partition(self, partition: Partition) -> None:
103
124
  slice_count_before = len(self.state.get("slices", []))
104
125
  self._add_slice_to_state(partition)
105
- if slice_count_before < len(self.state["slices"]):
126
+ if slice_count_before < len(self.state["slices"]): # only emit if at least one slice has been processed
106
127
  self._merge_partitions()
107
128
  self._emit_state_message()
108
129
  self._has_closed_at_least_one_slice = True
@@ -110,7 +131,9 @@ class ConcurrentCursor(Cursor):
110
131
  def _add_slice_to_state(self, partition: Partition) -> None:
111
132
  if self._slice_boundary_fields:
112
133
  if "slices" not in self.state:
113
- self.state["slices"] = []
134
+ raise RuntimeError(
135
+ f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
136
+ )
114
137
  self.state["slices"].append(
115
138
  {
116
139
  "start": self._extract_from_slice(partition, self._slice_boundary_fields[self._START_BOUNDARY]),
@@ -126,10 +149,8 @@ class ConcurrentCursor(Cursor):
126
149
 
127
150
  self.state["slices"].append(
128
151
  {
129
- # TODO: if we migrate stored state to the concurrent state format, we may want this to be the config start date
130
- # instead of zero_value.
131
- "start": self._connector_state_converter.zero_value,
132
- "end": self._extract_cursor_value(self._most_recent_record),
152
+ self._connector_state_converter.START_KEY: self.start,
153
+ self._connector_state_converter.END_KEY: self._extract_cursor_value(self._most_recent_record),
133
154
  }
134
155
  )
135
156
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import ABC, abstractmethod
6
6
  from enum import Enum
7
- from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional
7
+ from typing import TYPE_CHECKING, Any, List, MutableMapping, Tuple
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
@@ -18,15 +18,6 @@ class AbstractStreamStateConverter(ABC):
18
18
  START_KEY = "start"
19
19
  END_KEY = "end"
20
20
 
21
- def get_concurrent_stream_state(
22
- self, cursor_field: Optional["CursorField"], state: MutableMapping[str, Any]
23
- ) -> Optional[MutableMapping[str, Any]]:
24
- if not cursor_field:
25
- return None
26
- if self.is_state_message_compatible(state):
27
- return self.deserialize(state)
28
- return self.convert_from_sequential_state(cursor_field, state)
29
-
30
21
  @abstractmethod
31
22
  def deserialize(self, state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
32
23
  """
@@ -40,8 +31,11 @@ class AbstractStreamStateConverter(ABC):
40
31
 
41
32
  @abstractmethod
42
33
  def convert_from_sequential_state(
43
- self, cursor_field: "CursorField", stream_state: MutableMapping[str, Any]
44
- ) -> MutableMapping[str, Any]:
34
+ self,
35
+ cursor_field: "CursorField",
36
+ stream_state: MutableMapping[str, Any],
37
+ start: Any,
38
+ ) -> Tuple[Any, MutableMapping[str, Any]]:
45
39
  """
46
40
  Convert the state message to the format required by the ConcurrentCursor.
47
41
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from datetime import datetime, timedelta
7
- from typing import Any, List, MutableMapping, Optional
7
+ from typing import Any, List, MutableMapping, Optional, Tuple
8
8
 
9
9
  import pendulum
10
10
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
@@ -16,9 +16,6 @@ from pendulum.datetime import DateTime
16
16
 
17
17
 
18
18
  class DateTimeStreamStateConverter(AbstractStreamStateConverter):
19
- START_KEY = "start"
20
- END_KEY = "end"
21
-
22
19
  @property
23
20
  @abstractmethod
24
21
  def _zero_value(self) -> Any:
@@ -62,7 +59,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
62
59
  for interval in sorted_intervals[1:]:
63
60
  last_end_time = merged_intervals[-1][self.END_KEY]
64
61
  current_start_time = interval[self.START_KEY]
65
- if self.compare_intervals(last_end_time, current_start_time):
62
+ if self._compare_intervals(last_end_time, current_start_time):
66
63
  merged_end_time = max(last_end_time, interval[self.END_KEY])
67
64
  merged_intervals[-1][self.END_KEY] = merged_end_time
68
65
  else:
@@ -70,10 +67,12 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
70
67
 
71
68
  return merged_intervals
72
69
 
73
- def compare_intervals(self, end_time: Any, start_time: Any) -> bool:
70
+ def _compare_intervals(self, end_time: Any, start_time: Any) -> bool:
74
71
  return bool(self.increment(end_time) >= start_time)
75
72
 
76
- def convert_from_sequential_state(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
73
+ def convert_from_sequential_state(
74
+ self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], start: datetime
75
+ ) -> Tuple[datetime, MutableMapping[str, Any]]:
77
76
  """
78
77
  Convert the state message to the format required by the ConcurrentCursor.
79
78
 
@@ -82,28 +81,35 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
82
81
  "state_type": ConcurrencyCompatibleStateType.date_range.value,
83
82
  "metadata": { … },
84
83
  "slices": [
85
- {starts: 0, end: "2021-01-18T21:18:20.000+00:00", finished_processing: true}]
84
+ {"start": "2021-01-18T21:18:20.000+00:00", "end": "2021-01-18T21:18:20.000+00:00"},
85
+ ]
86
86
  }
87
87
  """
88
+ sync_start = self._get_sync_start(cursor_field, stream_state, start)
88
89
  if self.is_state_message_compatible(stream_state):
89
- return stream_state
90
- if cursor_field.cursor_field_key in stream_state:
91
- slices = [
92
- {
93
- # TODO: if we migrate stored state to the concurrent state format, we may want this to be the config start date
94
- # instead of `zero_value`
95
- self.START_KEY: self.zero_value,
96
- self.END_KEY: self.parse_timestamp(stream_state[cursor_field.cursor_field_key]),
97
- },
98
- ]
99
- else:
100
- slices = []
101
- return {
90
+ return sync_start, stream_state
91
+
92
+ # Create a slice to represent the records synced during prior syncs.
93
+ # The start and end are the same to avoid confusion as to whether the records for this slice
94
+ # were actually synced
95
+ slices = [{self.START_KEY: sync_start, self.END_KEY: sync_start}]
96
+
97
+ return sync_start, {
102
98
  "state_type": ConcurrencyCompatibleStateType.date_range.value,
103
99
  "slices": slices,
104
100
  "legacy": stream_state,
105
101
  }
106
102
 
103
+ def _get_sync_start(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], start: Optional[Any]) -> datetime:
104
+ sync_start = self.parse_timestamp(start) if start is not None else self.zero_value
105
+ prev_sync_low_water_mark = (
106
+ self.parse_timestamp(stream_state[cursor_field.cursor_field_key]) if cursor_field.cursor_field_key in stream_state else None
107
+ )
108
+ if prev_sync_low_water_mark and prev_sync_low_water_mark >= sync_start:
109
+ return prev_sync_low_water_mark
110
+ else:
111
+ return sync_start
112
+
107
113
  def convert_to_sequential_state(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
108
114
  """
109
115
  Convert the state message from the concurrency-compatible format to the stream's original format.
@@ -113,10 +119,9 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
113
119
  """
114
120
  if self.is_state_message_compatible(stream_state):
115
121
  legacy_state = stream_state.get("legacy", {})
116
- if slices := stream_state.pop("slices", None):
117
- latest_complete_time = self._get_latest_complete_time(slices)
118
- if latest_complete_time:
119
- legacy_state.update({cursor_field.cursor_field_key: self.output_format(latest_complete_time)})
122
+ latest_complete_time = self._get_latest_complete_time(stream_state.get("slices", []))
123
+ if latest_complete_time is not None:
124
+ legacy_state.update({cursor_field.cursor_field_key: self.output_format(latest_complete_time)})
120
125
  return legacy_state or {}
121
126
  else:
122
127
  return stream_state
@@ -125,11 +130,12 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
125
130
  """
126
131
  Get the latest time before which all records have been processed.
127
132
  """
128
- if slices:
129
- first_interval = self.merge_intervals(slices)[0][self.END_KEY]
130
- return first_interval
131
- else:
132
- return None
133
+ if not slices:
134
+ raise RuntimeError("Expected at least one slice but there were none. This is unexpected; please contact Support.")
135
+
136
+ merged_intervals = self.merge_intervals(slices)
137
+ first_interval = merged_intervals[0]
138
+ return first_interval[self.END_KEY]
133
139
 
134
140
 
135
141
  class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.58.9
3
+ Version: 0.59.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -202,7 +202,7 @@ airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuY
202
202
  airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=W7WEz6FrfAjb0o_msnMBIESSVO1qJC2_A8ocYg55Rw4,3579
203
203
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=f48kLzOHYNeD7Tfsdy7WaZ__hB24SfCTcW5WpQedqTc,18648
204
204
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=8xDRpfktnARBbRi_RwznvKuoGrpPF2b6tQyloMwogkM,2013
205
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=GnSRDkEEvg2GNy_fEc9cWFoYI1oEfvzwg1vhRrusqWg,7105
205
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=_mAbnJILeiGOGBNXeeXrSSoz7rveEBMoL97569EPEBY,8106
206
206
  airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=w83pvFbw9vjfhbovw-LrCFiwQMO8hfo1Vm-1CB1SeXQ,2777
207
207
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=-WETGIY5_QFmVeDFiqm4WhRJ_nNCkfcDwOQqx6cSqrI,365
208
208
  airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=TicEVRyLt5Y85xa8bXsrGzjmNMJAulBLqe1LfBLnHmk,1540
@@ -214,8 +214,8 @@ airbyte_cdk/sources/streams/concurrent/partitions/record.py,sha256=-Q3zLex3CHOXi
214
214
  airbyte_cdk/sources/streams/concurrent/partitions/throttled_queue.py,sha256=P6KrMb4GtcDUbMcx7pVb7dfF_igeW9Utn2MFoVHkH6o,1589
215
215
  airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=iVARnsGOSdvlSCqAf-yxc4_PUT3oOR9B6cyVNcLTjY8,932
216
216
  airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
217
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=8_97TvqOSn5jxmHsNMmnFi_A4XxGtMrX5GG6eNOyLtA,2954
218
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=4K_Fa0nviG3L3Mu4YX3oEs1cAfE0QqzrKkkdJsL21M4,7002
217
+ airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=jeVP3V9uDM_aCMh1G3kZNKafjopy1rZzIAJ8sU-69KU,2613
218
+ airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=BvVwPiVHE-R7mWDjcqhsKnZyD6OcpmAYaih7pcmOUvo,7589
219
219
  airbyte_cdk/sources/streams/http/__init__.py,sha256=cTP2d7Wf0hYXaN20U0dtxKa1pFZ9rI-lBbkQ0UM1apQ,261
220
220
  airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=MHgW42gwaevaCVnNLrUSE6WJHT4reeZ417nMWrmbC7o,6884
221
221
  airbyte_cdk/sources/streams/http/exceptions.py,sha256=OokLDI7W8hZvq9e15sL3em2AdwmzmcAl72Ms-i5l0Nw,1334
@@ -411,8 +411,8 @@ unit_tests/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYC
411
411
  unit_tests/sources/streams/concurrent/test_adapters.py,sha256=Y_c1vKCtGKEzrUSncmpgp0lgFnArmBrIrmLFaOIAxRg,15439
412
412
  unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py,sha256=a5JWWWc20zeEGmMzYzzk-_6XwfDEZOGgW287dVgft_8,1339
413
413
  unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py,sha256=Dc7PGfQge1ymxnaTlPKGMVOLCV81JCXoK1ciJPwHIhg,26347
414
- unit_tests/sources/streams/concurrent/test_cursor.py,sha256=ere6x4nhhtSmEdvDoksLvLJTFkcl29N7VI7Mj6jin-I,5385
415
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py,sha256=mzlm8JL5AuYmNX4r9VViLKNaU1zUyBvBQ3lIdChW9pU,19153
414
+ unit_tests/sources/streams/concurrent/test_cursor.py,sha256=0nFp9xauWxDVxJmFQLedvnxWKDoAlk9ChDkd0Mmrhm4,5951
415
+ unit_tests/sources/streams/concurrent/test_datetime_state_converter.py,sha256=vx-oPmGSzSfBM37ZXN_wXLeTOlozErqQoa5sc5zP42o,13287
416
416
  unit_tests/sources/streams/concurrent/test_default_stream.py,sha256=VLF46ESoRqcoALYCdrdZ2NDl5s2T1fRRWsYAy2-IwYw,6502
417
417
  unit_tests/sources/streams/concurrent/test_partition_reader.py,sha256=2uj7uV3ie0BMb--aa3MUru-f4jLiYUR-Nl0r3EhwxLQ,951
418
418
  unit_tests/sources/streams/concurrent/test_thread_pool_manager.py,sha256=UzlMhXTgXAuqqPrESGjkDG9JLj4UdPo2bx3T9oLCFpA,3140
@@ -420,7 +420,7 @@ unit_tests/sources/streams/concurrent/test_throttled_queue.py,sha256=05NgNkx5c5r
420
420
  unit_tests/sources/streams/concurrent/test_throttler.py,sha256=y1cWUdKZP5iy4FKWmGdgEk4e_0WY0VNgRvzmlU114NY,448
421
421
  unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
422
422
  unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py,sha256=TH4vzdHNWvw4JsF0v4n6wrR1Rnr-WfU3R6nnOwGLNwg,9751
423
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=cFqcfrk-P4Gz-5yUFjTXIZ5U0zhzg3jayZiF6W79oPc,5880
423
+ unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=BGqaYgU_ow4PsuhDjFwAFkU1VCkUuromvTAUV5tOaJ8,5816
424
424
  unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=kDKKV0ApASyS5c2HYkKvYohSkT--46TqALirqU8POjg,13804
425
425
  unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=Z_4-ClsxBupmN7Pbl8lF9bkSA9wnjLtrgA9WR_8VRi8,3757
426
426
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=KqCLsXB_9rV4hNdSPrNynK3G-UIsipqsZT6X0Z-iM5E,13175
@@ -448,8 +448,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
448
448
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
449
449
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
450
450
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
451
- airbyte_cdk-0.58.9.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
452
- airbyte_cdk-0.58.9.dist-info/METADATA,sha256=c-ydl9BXY_-L2loNF2hjCcrt3PukOwp228pA6DRSk84,11073
453
- airbyte_cdk-0.58.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
454
- airbyte_cdk-0.58.9.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
455
- airbyte_cdk-0.58.9.dist-info/RECORD,,
451
+ airbyte_cdk-0.59.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
452
+ airbyte_cdk-0.59.0.dist-info/METADATA,sha256=5wiAdalwK0NUdE--8UKtiGbRy9ccsaY1T255M_Ei850,11073
453
+ airbyte_cdk-0.59.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
454
+ airbyte_cdk-0.59.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
455
+ airbyte_cdk-0.59.0.dist-info/RECORD,,
@@ -52,8 +52,7 @@ class StreamFacadeSource(ConcurrentSourceAdapter):
52
52
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
53
53
  state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=self._state)
54
54
  state_converter = StreamFacadeConcurrentConnectorStateConverter()
55
- stream_states = [state_converter.get_concurrent_stream_state(self._cursor_field, state_manager.get_stream_state(stream.name, stream.namespace))
56
- for stream in self._streams]
55
+ stream_states = [state_manager.get_stream_state(stream.name, stream.namespace) for stream in self._streams]
57
56
  return [
58
57
  StreamFacade.create_from_stream(
59
58
  stream,
@@ -69,6 +68,7 @@ class StreamFacadeSource(ConcurrentSourceAdapter):
69
68
  state_converter,
70
69
  self._cursor_field,
71
70
  self._cursor_boundaries,
71
+ None,
72
72
  )
73
73
  if self._cursor_field
74
74
  else NoopCursor(),
@@ -45,33 +45,50 @@ class ConcurrentCursorTest(TestCase):
45
45
  return ConcurrentCursor(
46
46
  _A_STREAM_NAME,
47
47
  _A_STREAM_NAMESPACE,
48
- self._state_converter.get_concurrent_stream_state(CursorField(_A_CURSOR_FIELD_KEY), {}),
48
+ {},
49
49
  self._message_repository,
50
50
  self._state_manager,
51
51
  self._state_converter,
52
52
  CursorField(_A_CURSOR_FIELD_KEY),
53
53
  _SLICE_BOUNDARY_FIELDS,
54
+ None,
54
55
  )
55
56
 
56
57
  def _cursor_without_slice_boundary_fields(self) -> ConcurrentCursor:
57
58
  return ConcurrentCursor(
58
59
  _A_STREAM_NAME,
59
60
  _A_STREAM_NAMESPACE,
60
- self._state_converter.get_concurrent_stream_state(CursorField(_A_CURSOR_FIELD_KEY), {}),
61
+ {},
61
62
  self._message_repository,
62
63
  self._state_manager,
63
64
  self._state_converter,
64
65
  CursorField(_A_CURSOR_FIELD_KEY),
65
66
  None,
67
+ None,
66
68
  )
67
69
 
68
70
  def test_given_boundary_fields_when_close_partition_then_emit_state(self) -> None:
69
- self._cursor_with_slice_boundary_fields().close_partition(
71
+ cursor = self._cursor_with_slice_boundary_fields()
72
+ cursor.close_partition(
70
73
  _partition(
71
74
  {_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30},
72
75
  )
73
76
  )
74
77
 
78
+ self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
79
+ self._state_manager.update_state_for_stream.assert_called_once_with(
80
+ _A_STREAM_NAME,
81
+ _A_STREAM_NAMESPACE,
82
+ {_A_CURSOR_FIELD_KEY: 0}, # State message is updated to the legacy format before being emitted
83
+ )
84
+
85
+ def test_given_boundary_fields_when_close_partition_then_emit_updated_state(self) -> None:
86
+ self._cursor_with_slice_boundary_fields().close_partition(
87
+ _partition(
88
+ {_LOWER_SLICE_BOUNDARY_FIELD: 0, _UPPER_SLICE_BOUNDARY_FIELD: 30},
89
+ )
90
+ )
91
+
75
92
  self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
76
93
  self._state_manager.update_state_for_stream.assert_called_once_with(
77
94
  _A_STREAM_NAME,
@@ -5,16 +5,6 @@
5
5
  from datetime import datetime, timezone
6
6
 
7
7
  import pytest
8
- from airbyte_cdk.models import (
9
- AirbyteStateBlob,
10
- AirbyteStateMessage,
11
- AirbyteStateType,
12
- AirbyteStream,
13
- AirbyteStreamState,
14
- StreamDescriptor,
15
- SyncMode,
16
- )
17
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
18
8
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
19
9
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ConcurrencyCompatibleStateType
20
10
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -23,115 +13,12 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
23
13
  )
24
14
 
25
15
 
26
- @pytest.mark.parametrize(
27
- "converter, stream, input_state, expected_output_state",
28
- [
29
- pytest.param(
30
- EpochValueConcurrentStreamStateConverter(),
31
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
32
- [],
33
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
34
- id="no-input-state-epoch",
35
- ),
36
- pytest.param(
37
- EpochValueConcurrentStreamStateConverter(),
38
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
39
- [
40
- AirbyteStateMessage(
41
- type=AirbyteStateType.STREAM,
42
- stream=AirbyteStreamState(
43
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
44
- stream_state=AirbyteStateBlob.parse_obj({"created_at": 1703020837}),
45
- ),
46
- ),
47
- ],
48
- {
49
- "legacy": {"created_at": 1703020837},
50
- "slices": [{"end": datetime(2023, 12, 19, 21, 20, 37, tzinfo=timezone.utc),
51
- "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
52
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
53
- },
54
- id="incompatible-input-state-epoch",
55
- ),
56
- pytest.param(
57
- EpochValueConcurrentStreamStateConverter(),
58
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
59
- [
60
- AirbyteStateMessage(
61
- type=AirbyteStateType.STREAM,
62
- stream=AirbyteStreamState(
63
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
64
- stream_state=AirbyteStateBlob.parse_obj(
65
- {
66
- "created_at": 1703020837,
67
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
68
- },
69
- ),
70
- ),
71
- ),
72
- ],
73
- {"created_at": 1703020837, "state_type": ConcurrencyCompatibleStateType.date_range.value},
74
- id="compatible-input-state-epoch",
75
- ),
76
- pytest.param(
77
- IsoMillisConcurrentStreamStateConverter(),
78
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
79
- [],
80
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
81
- id="no-input-state-isomillis",
82
- ),
83
- pytest.param(
84
- IsoMillisConcurrentStreamStateConverter(),
85
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
86
- [
87
- AirbyteStateMessage(
88
- type=AirbyteStateType.STREAM,
89
- stream=AirbyteStreamState(
90
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
91
- stream_state=AirbyteStateBlob.parse_obj({"created_at": "2021-01-18T21:18:20.000Z"}),
92
- ),
93
- ),
94
- ],
95
- {
96
- "legacy": {"created_at": "2021-01-18T21:18:20.000Z"},
97
- "slices": [{"end": datetime(2021, 1, 18, 21, 18, 20, tzinfo=timezone.utc),
98
- "start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
99
- "state_type": ConcurrencyCompatibleStateType.date_range.value},
100
- id="incompatible-input-state-isomillis",
101
- ),
102
- pytest.param(
103
- IsoMillisConcurrentStreamStateConverter(),
104
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
105
- [
106
- AirbyteStateMessage(
107
- type=AirbyteStateType.STREAM,
108
- stream=AirbyteStreamState(
109
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
110
- stream_state=AirbyteStateBlob.parse_obj(
111
- {
112
- "created_at": "2021-01-18T21:18:20.000Z",
113
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
114
- },
115
- ),
116
- ),
117
- ),
118
- ],
119
- {"created_at": "2021-01-18T21:18:20.000Z", "state_type": ConcurrencyCompatibleStateType.date_range.value},
120
- id="compatible-input-state-isomillis",
121
- ),
122
- ],
123
- )
124
- def test_concurrent_connector_state_manager_get_stream_state(converter, stream, input_state, expected_output_state):
125
- state_manager = ConnectorStateManager({"stream1": stream}, input_state)
126
- assert converter.get_concurrent_stream_state(CursorField("created_at"), state_manager.get_stream_state("stream1", None)) == expected_output_state
127
-
128
-
129
16
  @pytest.mark.parametrize(
130
17
  "converter, input_state, is_compatible",
131
18
  [
132
19
  pytest.param(
133
20
  EpochValueConcurrentStreamStateConverter(),
134
- {'state_type': 'date-range'},
21
+ {"state_type": "date-range"},
135
22
  True,
136
23
  id="no-input-state-is-compatible-epoch",
137
24
  ),
@@ -163,7 +50,7 @@ def test_concurrent_connector_state_manager_get_stream_state(converter, stream,
163
50
  ),
164
51
  pytest.param(
165
52
  IsoMillisConcurrentStreamStateConverter(),
166
- {'state_type': 'date-range'},
53
+ {"state_type": "date-range"},
167
54
  True,
168
55
  id="no-input-state-is-compatible-isomillis",
169
56
  ),
@@ -200,22 +87,106 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
200
87
 
201
88
 
202
89
  @pytest.mark.parametrize(
203
- "converter, stream, sequential_state, expected_output_state",
90
+ "converter,start,state,expected_start",
91
+ [
92
+ pytest.param(
93
+ EpochValueConcurrentStreamStateConverter(),
94
+ None,
95
+ {},
96
+ EpochValueConcurrentStreamStateConverter().zero_value,
97
+ id="epoch-converter-no-state-no-start-start-is-zero-value"
98
+ ),
99
+ pytest.param(
100
+ EpochValueConcurrentStreamStateConverter(),
101
+ 1617030403,
102
+ {},
103
+ datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
104
+ id="epoch-converter-no-state-with-start-start-is-start"
105
+ ),
106
+ pytest.param(
107
+ EpochValueConcurrentStreamStateConverter(),
108
+ None,
109
+ {"created_at": 1617030404},
110
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
111
+ id="epoch-converter-state-without-start-start-is-from-state"
112
+ ),
113
+ pytest.param(
114
+ EpochValueConcurrentStreamStateConverter(),
115
+ 1617030404,
116
+ {"created_at": 1617030403},
117
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
118
+ id="epoch-converter-state-before-start-start-is-start"
119
+ ),
120
+ pytest.param(
121
+ EpochValueConcurrentStreamStateConverter(),
122
+ 1617030403,
123
+ {"created_at": 1617030404},
124
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
125
+ id="epoch-converter-state-after-start-start-is-from-state"
126
+ ),
127
+ pytest.param(
128
+ IsoMillisConcurrentStreamStateConverter(),
129
+ None,
130
+ {},
131
+ IsoMillisConcurrentStreamStateConverter().zero_value,
132
+ id="isomillis-converter-no-state-no-start-start-is-zero-value"
133
+ ),
134
+ pytest.param(
135
+ IsoMillisConcurrentStreamStateConverter(),
136
+ "2021-08-22T05:03:27.000Z",
137
+ {},
138
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
139
+ id="isomillis-converter-no-state-with-start-start-is-start"
140
+ ),
141
+ pytest.param(
142
+ IsoMillisConcurrentStreamStateConverter(),
143
+ None,
144
+ {"created_at": "2021-08-22T05:03:27.000Z"},
145
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
146
+ id="isomillis-converter-state-without-start-start-is-from-state"
147
+ ),
148
+ pytest.param(
149
+ IsoMillisConcurrentStreamStateConverter(),
150
+ "2022-08-22T05:03:27.000Z",
151
+ {"created_at": "2021-08-22T05:03:27.000Z"},
152
+ datetime(2022, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
153
+ id="isomillis-converter-state-before-start-start-is-start"
154
+ ),
155
+ pytest.param(
156
+ IsoMillisConcurrentStreamStateConverter(),
157
+ "2022-08-22T05:03:27.000Z",
158
+ {"created_at": "2023-08-22T05:03:27.000Z"},
159
+ datetime(2023, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
160
+ id="isomillis-converter-state-after-start-start-is-from-state"
161
+ ),
162
+ ]
163
+ )
164
+ def test_get_sync_start(converter, start, state, expected_start):
165
+ assert converter._get_sync_start(CursorField("created_at"), state, start) == expected_start
166
+
167
+
168
+ @pytest.mark.parametrize(
169
+ "converter, start, sequential_state, expected_output_state",
204
170
  [
205
171
  pytest.param(
206
172
  EpochValueConcurrentStreamStateConverter(),
207
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
173
+ 0,
208
174
  {},
209
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
175
+ {
176
+ "legacy": {},
177
+ "slices": [{"start": EpochValueConcurrentStreamStateConverter().zero_value,
178
+ "end": EpochValueConcurrentStreamStateConverter().zero_value}],
179
+ "state_type": "date-range",
180
+ },
210
181
  id="empty-input-state-epoch",
211
182
  ),
212
183
  pytest.param(
213
184
  EpochValueConcurrentStreamStateConverter(),
214
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
185
+ 1617030403,
215
186
  {"created": 1617030403},
216
187
  {
217
188
  "state_type": "date-range",
218
- "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
189
+ "slices": [{"start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
219
190
  "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
220
191
  "legacy": {"created": 1617030403},
221
192
  },
@@ -223,18 +194,11 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
223
194
  ),
224
195
  pytest.param(
225
196
  IsoMillisConcurrentStreamStateConverter(),
226
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
227
- {},
228
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
229
- id="empty-input-state-isomillis",
230
- ),
231
- pytest.param(
232
- IsoMillisConcurrentStreamStateConverter(),
233
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
197
+ "2020-01-01T00:00:00.000Z",
234
198
  {"created": "2021-08-22T05:03:27.000Z"},
235
199
  {
236
200
  "state_type": "date-range",
237
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
201
+ "slices": [{"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
238
202
  "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}],
239
203
  "legacy": {"created": "2021-08-22T05:03:27.000Z"},
240
204
  },
@@ -242,186 +206,120 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
242
206
  ),
243
207
  ],
244
208
  )
245
- def test_convert_from_sequential_state(converter, stream, sequential_state, expected_output_state):
209
+ def test_convert_from_sequential_state(converter, start, sequential_state, expected_output_state):
246
210
  comparison_format = "%Y-%m-%dT%H:%M:%S.%f"
247
211
  if expected_output_state["slices"]:
248
- conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state)
212
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
249
213
  assert conversion["state_type"] == expected_output_state["state_type"]
250
214
  assert conversion["legacy"] == expected_output_state["legacy"]
251
215
  for actual, expected in zip(conversion["slices"], expected_output_state["slices"]):
252
216
  assert actual["start"].strftime(comparison_format) == expected["start"].strftime(comparison_format)
253
217
  assert actual["end"].strftime(comparison_format) == expected["end"].strftime(comparison_format)
254
218
  else:
255
- assert converter.convert_from_sequential_state(CursorField("created"), sequential_state) == expected_output_state
219
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
220
+ assert conversion == expected_output_state
256
221
 
257
222
 
258
223
  @pytest.mark.parametrize(
259
- "converter, stream, concurrent_state, expected_output_state",
224
+ "converter, concurrent_state, expected_output_state",
260
225
  [
261
226
  pytest.param(
262
227
  EpochValueConcurrentStreamStateConverter(),
263
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
264
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
265
- {},
266
- id="empty-input-state-epoch",
228
+ {
229
+ "state_type": "date-range",
230
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
231
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
232
+ },
233
+ {"created": 1617030403},
234
+ id="epoch-single-slice",
267
235
  ),
268
236
  pytest.param(
269
237
  EpochValueConcurrentStreamStateConverter(),
270
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
271
238
  {
272
239
  "state_type": "date-range",
273
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
274
- "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}]},
240
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
241
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
242
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
243
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
244
+ },
245
+ {"created": 1648566403},
246
+ id="epoch-overlapping-slices",
247
+ ),
248
+ pytest.param(
249
+ EpochValueConcurrentStreamStateConverter(),
250
+ {
251
+ "state_type": "date-range",
252
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
253
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
254
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
255
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
256
+ },
275
257
  {"created": 1617030403},
276
- id="with-input-state-epoch",
258
+ id="epoch-multiple-slices",
277
259
  ),
278
260
  pytest.param(
279
261
  IsoMillisConcurrentStreamStateConverter(),
280
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
281
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
282
- {},
283
- id="empty-input-state-isomillis",
262
+ {
263
+ "state_type": "date-range",
264
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
265
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
266
+ },
267
+ {"created": "2021-03-29T15:06:43.000Z"},
268
+ id="isomillis-single-slice",
284
269
  ),
285
270
  pytest.param(
286
271
  IsoMillisConcurrentStreamStateConverter(),
287
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
288
272
  {
289
273
  "state_type": "date-range",
290
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
291
- "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}]},
292
- {"created": "2021-08-22T05:03:27.000Z"},
293
- id="with-input-state-isomillis",
274
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
275
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
276
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
277
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
278
+ },
279
+ {"created": "2022-03-29T15:06:43.000Z"},
280
+ id="isomillis-overlapping-slices",
281
+ ),
282
+ pytest.param(
283
+ IsoMillisConcurrentStreamStateConverter(),
284
+ {
285
+ "state_type": "date-range",
286
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
287
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
288
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
289
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
290
+ },
291
+ {"created": "2021-03-29T15:06:43.000Z"},
292
+ id="isomillis-multiple-slices",
294
293
  ),
295
294
  ],
296
295
  )
297
- def test_convert_to_sequential_state(converter, stream, concurrent_state, expected_output_state):
296
+ def test_convert_to_sequential_state(converter, concurrent_state, expected_output_state):
298
297
  assert converter.convert_to_sequential_state(CursorField("created"), concurrent_state) == expected_output_state
299
298
 
300
299
 
301
300
  @pytest.mark.parametrize(
302
- "converter, input_intervals, expected_merged_intervals",
301
+ "converter, concurrent_state, expected_output_state",
303
302
  [
304
303
  pytest.param(
305
304
  EpochValueConcurrentStreamStateConverter(),
306
- [],
307
- [],
308
- id="no-intervals-epoch",
309
- ),
310
- pytest.param(
311
- EpochValueConcurrentStreamStateConverter(),
312
- [{"start": 0, "end": 1}],
313
- [{"start": 0, "end": 1}],
314
- id="single-interval-epoch",
315
- ),
316
- pytest.param(
317
- EpochValueConcurrentStreamStateConverter(),
318
- [{"start": 0, "end": 1}, {"start": 0, "end": 1}],
319
- [{"start": 0, "end": 1}],
320
- id="duplicate-intervals-epoch",
321
- ),
322
- pytest.param(
323
- EpochValueConcurrentStreamStateConverter(),
324
- [{"start": 0, "end": 1}, {"start": 0, "end": 2}],
325
- [{"start": 0, "end": 2}],
326
- id="overlapping-intervals-epoch",
327
- ),
328
- pytest.param(
329
- EpochValueConcurrentStreamStateConverter(),
330
- [{"start": 0, "end": 3}, {"start": 1, "end": 2}],
331
- [{"start": 0, "end": 3}],
332
- id="enclosed-intervals-epoch",
333
- ),
334
- pytest.param(
335
- EpochValueConcurrentStreamStateConverter(),
336
- [{"start": 1, "end": 2}, {"start": 0, "end": 1}],
337
- [{"start": 0, "end": 2}],
338
- id="unordered-intervals-epoch",
339
- ),
340
- pytest.param(
341
- EpochValueConcurrentStreamStateConverter(),
342
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}],
343
- [{"start": 0, "end": 3}],
344
- id="adjacent-intervals-epoch",
345
- ),
346
- pytest.param(
347
- EpochValueConcurrentStreamStateConverter(),
348
- [{"start": 3, "end": 4}, {"start": 0, "end": 1}],
349
- [{"start": 0, "end": 1}, {"start": 3, "end": 4}],
350
- id="nonoverlapping-intervals-epoch",
351
- ),
352
- pytest.param(
353
- EpochValueConcurrentStreamStateConverter(),
354
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}, {"start": 10, "end": 11}, {"start": 1, "end": 4}],
355
- [{"start": 0, "end": 4}, {"start": 10, "end": 11}],
356
- id="overlapping-and-nonoverlapping-intervals-epoch",
357
- ),
358
- pytest.param(
359
- IsoMillisConcurrentStreamStateConverter(),
360
- [],
361
- [],
362
- id="no-intervals-isomillis",
363
- ),
364
- pytest.param(
365
- IsoMillisConcurrentStreamStateConverter(),
366
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
367
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
368
- id="single-interval-isomillis",
369
- ),
370
- pytest.param(
371
- IsoMillisConcurrentStreamStateConverter(),
372
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
373
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
374
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
375
- id="duplicate-intervals-isomillis",
376
- ),
377
- pytest.param(
378
- IsoMillisConcurrentStreamStateConverter(),
379
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"},
380
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
381
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
382
- id="overlapping-intervals-isomillis",
383
- ),
384
- pytest.param(
385
- IsoMillisConcurrentStreamStateConverter(),
386
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
387
- {"start": "2022-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
388
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
389
- id="enclosed-intervals-isomillis",
390
- ),
391
- pytest.param(
392
- IsoMillisConcurrentStreamStateConverter(),
393
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
394
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
395
- [{"start": 0, "end": 2}],
396
- id="unordered-intervals-isomillis",
397
- ),
398
- pytest.param(
399
- IsoMillisConcurrentStreamStateConverter(),
400
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
401
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"}],
402
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
403
- id="adjacent-intervals-isomillis",
404
- ),
405
- pytest.param(
406
- IsoMillisConcurrentStreamStateConverter(),
407
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
408
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
409
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
410
- {"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
411
- id="nonoverlapping-intervals-isomillis",
305
+ {
306
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
307
+ "start": EpochValueConcurrentStreamStateConverter().zero_value,
308
+ },
309
+ {"created": 0},
310
+ id="empty-slices-epoch",
412
311
  ),
413
312
  pytest.param(
414
313
  IsoMillisConcurrentStreamStateConverter(),
415
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
416
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"},
417
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"},
418
- {"start": "2022-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"}],
419
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"},
420
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"}],
421
- id="overlapping-and-nonoverlapping-intervals-isomillis",
314
+ {
315
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
316
+ "start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
317
+ },
318
+ {"created": "2021-08-22T05:03:27.000Z"},
319
+ id="empty-slices-isomillis",
422
320
  ),
423
321
  ],
424
322
  )
425
- def test_merge_intervals(converter, input_intervals, expected_merged_intervals):
426
- parsed_intervals = [{"start": converter.parse_timestamp(i["start"]), "end": converter.parse_timestamp(i["end"])} for i in input_intervals]
427
- return converter.merge_intervals(parsed_intervals) == expected_merged_intervals
323
+ def test_convert_to_sequential_state_no_slices_returns_legacy_state(converter, concurrent_state, expected_output_state):
324
+ with pytest.raises(RuntimeError):
325
+ converter.convert_to_sequential_state(CursorField("created"), concurrent_state)