airbyte-cdk 0.58.9__py3-none-any.whl → 0.59.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,7 +3,8 @@
3
3
  #
4
4
  import functools
5
5
  from abc import ABC, abstractmethod
6
- from typing import Any, List, Mapping, Optional, Protocol, Tuple
6
+ from datetime import datetime
7
+ from typing import Any, List, Mapping, MutableMapping, Optional, Protocol, Tuple
7
8
 
8
9
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
9
10
  from airbyte_cdk.sources.message import MessageRepository
@@ -36,6 +37,11 @@ class CursorField:
36
37
 
37
38
 
38
39
  class Cursor(ABC):
40
+ @property
41
+ @abstractmethod
42
+ def state(self) -> MutableMapping[str, Any]:
43
+ ...
44
+
39
45
  @abstractmethod
40
46
  def observe(self, record: Record) -> None:
41
47
  """
@@ -52,6 +58,10 @@ class Cursor(ABC):
52
58
 
53
59
 
54
60
  class NoopCursor(Cursor):
61
+ @property
62
+ def state(self) -> MutableMapping[str, Any]:
63
+ return {}
64
+
55
65
  def observe(self, record: Record) -> None:
56
66
  pass
57
67
 
@@ -73,6 +83,7 @@ class ConcurrentCursor(Cursor):
73
83
  connector_state_converter: AbstractStreamStateConverter,
74
84
  cursor_field: CursorField,
75
85
  slice_boundary_fields: Optional[Tuple[str, str]],
86
+ start: Optional[Any],
76
87
  ) -> None:
77
88
  self._stream_name = stream_name
78
89
  self._stream_namespace = stream_namespace
@@ -82,9 +93,19 @@ class ConcurrentCursor(Cursor):
82
93
  self._cursor_field = cursor_field
83
94
  # To see some example where the slice boundaries might not be defined, check https://github.com/airbytehq/airbyte/blob/1ce84d6396e446e1ac2377362446e3fb94509461/airbyte-integrations/connectors/source-stripe/source_stripe/streams.py#L363-L379
84
95
  self._slice_boundary_fields = slice_boundary_fields if slice_boundary_fields else tuple()
96
+ self._start = start
85
97
  self._most_recent_record: Optional[Record] = None
86
98
  self._has_closed_at_least_one_slice = False
87
- self.state = stream_state
99
+ self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
100
+
101
+ @property
102
+ def state(self) -> MutableMapping[str, Any]:
103
+ return self._concurrent_state
104
+
105
+ def _get_concurrent_state(self, state: MutableMapping[str, Any]) -> Tuple[datetime, MutableMapping[str, Any]]:
106
+ if self._connector_state_converter.is_state_message_compatible(state):
107
+ return self._start or self._connector_state_converter.zero_value, self._connector_state_converter.deserialize(state)
108
+ return self._connector_state_converter.convert_from_sequential_state(self._cursor_field, state, self._start)
88
109
 
89
110
  def observe(self, record: Record) -> None:
90
111
  if self._slice_boundary_fields:
@@ -102,7 +123,7 @@ class ConcurrentCursor(Cursor):
102
123
  def close_partition(self, partition: Partition) -> None:
103
124
  slice_count_before = len(self.state.get("slices", []))
104
125
  self._add_slice_to_state(partition)
105
- if slice_count_before < len(self.state["slices"]):
126
+ if slice_count_before < len(self.state["slices"]): # only emit if at least one slice has been processed
106
127
  self._merge_partitions()
107
128
  self._emit_state_message()
108
129
  self._has_closed_at_least_one_slice = True
@@ -110,7 +131,9 @@ class ConcurrentCursor(Cursor):
110
131
  def _add_slice_to_state(self, partition: Partition) -> None:
111
132
  if self._slice_boundary_fields:
112
133
  if "slices" not in self.state:
113
- self.state["slices"] = []
134
+ raise RuntimeError(
135
+ f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
136
+ )
114
137
  self.state["slices"].append(
115
138
  {
116
139
  "start": self._extract_from_slice(partition, self._slice_boundary_fields[self._START_BOUNDARY]),
@@ -126,10 +149,8 @@ class ConcurrentCursor(Cursor):
126
149
 
127
150
  self.state["slices"].append(
128
151
  {
129
- # TODO: if we migrate stored state to the concurrent state format, we may want this to be the config start date
130
- # instead of zero_value.
131
- "start": self._connector_state_converter.zero_value,
132
- "end": self._extract_cursor_value(self._most_recent_record),
152
+ self._connector_state_converter.START_KEY: self.start,
153
+ self._connector_state_converter.END_KEY: self._extract_cursor_value(self._most_recent_record),
133
154
  }
134
155
  )
135
156
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import ABC, abstractmethod
6
6
  from enum import Enum
7
- from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional
7
+ from typing import TYPE_CHECKING, Any, List, MutableMapping, Tuple
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
@@ -18,15 +18,6 @@ class AbstractStreamStateConverter(ABC):
18
18
  START_KEY = "start"
19
19
  END_KEY = "end"
20
20
 
21
- def get_concurrent_stream_state(
22
- self, cursor_field: Optional["CursorField"], state: MutableMapping[str, Any]
23
- ) -> Optional[MutableMapping[str, Any]]:
24
- if not cursor_field:
25
- return None
26
- if self.is_state_message_compatible(state):
27
- return self.deserialize(state)
28
- return self.convert_from_sequential_state(cursor_field, state)
29
-
30
21
  @abstractmethod
31
22
  def deserialize(self, state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
32
23
  """
@@ -40,8 +31,11 @@ class AbstractStreamStateConverter(ABC):
40
31
 
41
32
  @abstractmethod
42
33
  def convert_from_sequential_state(
43
- self, cursor_field: "CursorField", stream_state: MutableMapping[str, Any]
44
- ) -> MutableMapping[str, Any]:
34
+ self,
35
+ cursor_field: "CursorField",
36
+ stream_state: MutableMapping[str, Any],
37
+ start: Any,
38
+ ) -> Tuple[Any, MutableMapping[str, Any]]:
45
39
  """
46
40
  Convert the state message to the format required by the ConcurrentCursor.
47
41
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import abstractmethod
6
6
  from datetime import datetime, timedelta
7
- from typing import Any, List, MutableMapping, Optional
7
+ from typing import Any, List, MutableMapping, Optional, Tuple
8
8
 
9
9
  import pendulum
10
10
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
@@ -16,9 +16,6 @@ from pendulum.datetime import DateTime
16
16
 
17
17
 
18
18
  class DateTimeStreamStateConverter(AbstractStreamStateConverter):
19
- START_KEY = "start"
20
- END_KEY = "end"
21
-
22
19
  @property
23
20
  @abstractmethod
24
21
  def _zero_value(self) -> Any:
@@ -62,7 +59,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
62
59
  for interval in sorted_intervals[1:]:
63
60
  last_end_time = merged_intervals[-1][self.END_KEY]
64
61
  current_start_time = interval[self.START_KEY]
65
- if self.compare_intervals(last_end_time, current_start_time):
62
+ if self._compare_intervals(last_end_time, current_start_time):
66
63
  merged_end_time = max(last_end_time, interval[self.END_KEY])
67
64
  merged_intervals[-1][self.END_KEY] = merged_end_time
68
65
  else:
@@ -70,10 +67,12 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
70
67
 
71
68
  return merged_intervals
72
69
 
73
- def compare_intervals(self, end_time: Any, start_time: Any) -> bool:
70
+ def _compare_intervals(self, end_time: Any, start_time: Any) -> bool:
74
71
  return bool(self.increment(end_time) >= start_time)
75
72
 
76
- def convert_from_sequential_state(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
73
+ def convert_from_sequential_state(
74
+ self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], start: datetime
75
+ ) -> Tuple[datetime, MutableMapping[str, Any]]:
77
76
  """
78
77
  Convert the state message to the format required by the ConcurrentCursor.
79
78
 
@@ -82,28 +81,35 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
82
81
  "state_type": ConcurrencyCompatibleStateType.date_range.value,
83
82
  "metadata": { … },
84
83
  "slices": [
85
- {starts: 0, end: "2021-01-18T21:18:20.000+00:00", finished_processing: true}]
84
+ {"start": "2021-01-18T21:18:20.000+00:00", "end": "2021-01-18T21:18:20.000+00:00"},
85
+ ]
86
86
  }
87
87
  """
88
+ sync_start = self._get_sync_start(cursor_field, stream_state, start)
88
89
  if self.is_state_message_compatible(stream_state):
89
- return stream_state
90
- if cursor_field.cursor_field_key in stream_state:
91
- slices = [
92
- {
93
- # TODO: if we migrate stored state to the concurrent state format, we may want this to be the config start date
94
- # instead of `zero_value`
95
- self.START_KEY: self.zero_value,
96
- self.END_KEY: self.parse_timestamp(stream_state[cursor_field.cursor_field_key]),
97
- },
98
- ]
99
- else:
100
- slices = []
101
- return {
90
+ return sync_start, stream_state
91
+
92
+ # Create a slice to represent the records synced during prior syncs.
93
+ # The start and end are the same to avoid confusion as to whether the records for this slice
94
+ # were actually synced
95
+ slices = [{self.START_KEY: sync_start, self.END_KEY: sync_start}]
96
+
97
+ return sync_start, {
102
98
  "state_type": ConcurrencyCompatibleStateType.date_range.value,
103
99
  "slices": slices,
104
100
  "legacy": stream_state,
105
101
  }
106
102
 
103
+ def _get_sync_start(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any], start: Optional[Any]) -> datetime:
104
+ sync_start = self.parse_timestamp(start) if start is not None else self.zero_value
105
+ prev_sync_low_water_mark = (
106
+ self.parse_timestamp(stream_state[cursor_field.cursor_field_key]) if cursor_field.cursor_field_key in stream_state else None
107
+ )
108
+ if prev_sync_low_water_mark and prev_sync_low_water_mark >= sync_start:
109
+ return prev_sync_low_water_mark
110
+ else:
111
+ return sync_start
112
+
107
113
  def convert_to_sequential_state(self, cursor_field: CursorField, stream_state: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
108
114
  """
109
115
  Convert the state message from the concurrency-compatible format to the stream's original format.
@@ -113,10 +119,9 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
113
119
  """
114
120
  if self.is_state_message_compatible(stream_state):
115
121
  legacy_state = stream_state.get("legacy", {})
116
- if slices := stream_state.pop("slices", None):
117
- latest_complete_time = self._get_latest_complete_time(slices)
118
- if latest_complete_time:
119
- legacy_state.update({cursor_field.cursor_field_key: self.output_format(latest_complete_time)})
122
+ latest_complete_time = self._get_latest_complete_time(stream_state.get("slices", []))
123
+ if latest_complete_time is not None:
124
+ legacy_state.update({cursor_field.cursor_field_key: self.output_format(latest_complete_time)})
120
125
  return legacy_state or {}
121
126
  else:
122
127
  return stream_state
@@ -125,11 +130,12 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
125
130
  """
126
131
  Get the latest time before which all records have been processed.
127
132
  """
128
- if slices:
129
- first_interval = self.merge_intervals(slices)[0][self.END_KEY]
130
- return first_interval
131
- else:
132
- return None
133
+ if not slices:
134
+ raise RuntimeError("Expected at least one slice but there were none. This is unexpected; please contact Support.")
135
+
136
+ merged_intervals = self.merge_intervals(slices)
137
+ first_interval = merged_intervals[0]
138
+ return first_interval[self.END_KEY]
133
139
 
134
140
 
135
141
  class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.58.9
3
+ Version: 0.59.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -202,7 +202,7 @@ airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuY
202
202
  airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=W7WEz6FrfAjb0o_msnMBIESSVO1qJC2_A8ocYg55Rw4,3579
203
203
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=f48kLzOHYNeD7Tfsdy7WaZ__hB24SfCTcW5WpQedqTc,18648
204
204
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=8xDRpfktnARBbRi_RwznvKuoGrpPF2b6tQyloMwogkM,2013
205
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=GnSRDkEEvg2GNy_fEc9cWFoYI1oEfvzwg1vhRrusqWg,7105
205
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=_mAbnJILeiGOGBNXeeXrSSoz7rveEBMoL97569EPEBY,8106
206
206
  airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=w83pvFbw9vjfhbovw-LrCFiwQMO8hfo1Vm-1CB1SeXQ,2777
207
207
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=-WETGIY5_QFmVeDFiqm4WhRJ_nNCkfcDwOQqx6cSqrI,365
208
208
  airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=TicEVRyLt5Y85xa8bXsrGzjmNMJAulBLqe1LfBLnHmk,1540
@@ -214,8 +214,8 @@ airbyte_cdk/sources/streams/concurrent/partitions/record.py,sha256=-Q3zLex3CHOXi
214
214
  airbyte_cdk/sources/streams/concurrent/partitions/throttled_queue.py,sha256=P6KrMb4GtcDUbMcx7pVb7dfF_igeW9Utn2MFoVHkH6o,1589
215
215
  airbyte_cdk/sources/streams/concurrent/partitions/types.py,sha256=iVARnsGOSdvlSCqAf-yxc4_PUT3oOR9B6cyVNcLTjY8,932
216
216
  airbyte_cdk/sources/streams/concurrent/state_converters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
217
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=8_97TvqOSn5jxmHsNMmnFi_A4XxGtMrX5GG6eNOyLtA,2954
218
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=4K_Fa0nviG3L3Mu4YX3oEs1cAfE0QqzrKkkdJsL21M4,7002
217
+ airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py,sha256=jeVP3V9uDM_aCMh1G3kZNKafjopy1rZzIAJ8sU-69KU,2613
218
+ airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py,sha256=BvVwPiVHE-R7mWDjcqhsKnZyD6OcpmAYaih7pcmOUvo,7589
219
219
  airbyte_cdk/sources/streams/http/__init__.py,sha256=cTP2d7Wf0hYXaN20U0dtxKa1pFZ9rI-lBbkQ0UM1apQ,261
220
220
  airbyte_cdk/sources/streams/http/availability_strategy.py,sha256=MHgW42gwaevaCVnNLrUSE6WJHT4reeZ417nMWrmbC7o,6884
221
221
  airbyte_cdk/sources/streams/http/exceptions.py,sha256=OokLDI7W8hZvq9e15sL3em2AdwmzmcAl72Ms-i5l0Nw,1334
@@ -411,8 +411,8 @@ unit_tests/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYC
411
411
  unit_tests/sources/streams/concurrent/test_adapters.py,sha256=Y_c1vKCtGKEzrUSncmpgp0lgFnArmBrIrmLFaOIAxRg,15439
412
412
  unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py,sha256=a5JWWWc20zeEGmMzYzzk-_6XwfDEZOGgW287dVgft_8,1339
413
413
  unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py,sha256=Dc7PGfQge1ymxnaTlPKGMVOLCV81JCXoK1ciJPwHIhg,26347
414
- unit_tests/sources/streams/concurrent/test_cursor.py,sha256=ere6x4nhhtSmEdvDoksLvLJTFkcl29N7VI7Mj6jin-I,5385
415
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py,sha256=mzlm8JL5AuYmNX4r9VViLKNaU1zUyBvBQ3lIdChW9pU,19153
414
+ unit_tests/sources/streams/concurrent/test_cursor.py,sha256=0nFp9xauWxDVxJmFQLedvnxWKDoAlk9ChDkd0Mmrhm4,5951
415
+ unit_tests/sources/streams/concurrent/test_datetime_state_converter.py,sha256=vx-oPmGSzSfBM37ZXN_wXLeTOlozErqQoa5sc5zP42o,13287
416
416
  unit_tests/sources/streams/concurrent/test_default_stream.py,sha256=VLF46ESoRqcoALYCdrdZ2NDl5s2T1fRRWsYAy2-IwYw,6502
417
417
  unit_tests/sources/streams/concurrent/test_partition_reader.py,sha256=2uj7uV3ie0BMb--aa3MUru-f4jLiYUR-Nl0r3EhwxLQ,951
418
418
  unit_tests/sources/streams/concurrent/test_thread_pool_manager.py,sha256=UzlMhXTgXAuqqPrESGjkDG9JLj4UdPo2bx3T9oLCFpA,3140
@@ -420,7 +420,7 @@ unit_tests/sources/streams/concurrent/test_throttled_queue.py,sha256=05NgNkx5c5r
420
420
  unit_tests/sources/streams/concurrent/test_throttler.py,sha256=y1cWUdKZP5iy4FKWmGdgEk4e_0WY0VNgRvzmlU114NY,448
421
421
  unit_tests/sources/streams/concurrent/scenarios/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
422
422
  unit_tests/sources/streams/concurrent/scenarios/incremental_scenarios.py,sha256=TH4vzdHNWvw4JsF0v4n6wrR1Rnr-WfU3R6nnOwGLNwg,9751
423
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=cFqcfrk-P4Gz-5yUFjTXIZ5U0zhzg3jayZiF6W79oPc,5880
423
+ unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py,sha256=BGqaYgU_ow4PsuhDjFwAFkU1VCkUuromvTAUV5tOaJ8,5816
424
424
  unit_tests/sources/streams/concurrent/scenarios/stream_facade_scenarios.py,sha256=kDKKV0ApASyS5c2HYkKvYohSkT--46TqALirqU8POjg,13804
425
425
  unit_tests/sources/streams/concurrent/scenarios/test_concurrent_scenarios.py,sha256=Z_4-ClsxBupmN7Pbl8lF9bkSA9wnjLtrgA9WR_8VRi8,3757
426
426
  unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py,sha256=KqCLsXB_9rV4hNdSPrNynK3G-UIsipqsZT6X0Z-iM5E,13175
@@ -448,8 +448,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
448
448
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
449
449
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
450
450
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
451
- airbyte_cdk-0.58.9.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
452
- airbyte_cdk-0.58.9.dist-info/METADATA,sha256=c-ydl9BXY_-L2loNF2hjCcrt3PukOwp228pA6DRSk84,11073
453
- airbyte_cdk-0.58.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
454
- airbyte_cdk-0.58.9.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
455
- airbyte_cdk-0.58.9.dist-info/RECORD,,
451
+ airbyte_cdk-0.59.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
452
+ airbyte_cdk-0.59.0.dist-info/METADATA,sha256=5wiAdalwK0NUdE--8UKtiGbRy9ccsaY1T255M_Ei850,11073
453
+ airbyte_cdk-0.59.0.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
454
+ airbyte_cdk-0.59.0.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
455
+ airbyte_cdk-0.59.0.dist-info/RECORD,,
@@ -52,8 +52,7 @@ class StreamFacadeSource(ConcurrentSourceAdapter):
52
52
  def streams(self, config: Mapping[str, Any]) -> List[Stream]:
53
53
  state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in self._streams}, state=self._state)
54
54
  state_converter = StreamFacadeConcurrentConnectorStateConverter()
55
- stream_states = [state_converter.get_concurrent_stream_state(self._cursor_field, state_manager.get_stream_state(stream.name, stream.namespace))
56
- for stream in self._streams]
55
+ stream_states = [state_manager.get_stream_state(stream.name, stream.namespace) for stream in self._streams]
57
56
  return [
58
57
  StreamFacade.create_from_stream(
59
58
  stream,
@@ -69,6 +68,7 @@ class StreamFacadeSource(ConcurrentSourceAdapter):
69
68
  state_converter,
70
69
  self._cursor_field,
71
70
  self._cursor_boundaries,
71
+ None,
72
72
  )
73
73
  if self._cursor_field
74
74
  else NoopCursor(),
@@ -45,33 +45,50 @@ class ConcurrentCursorTest(TestCase):
45
45
  return ConcurrentCursor(
46
46
  _A_STREAM_NAME,
47
47
  _A_STREAM_NAMESPACE,
48
- self._state_converter.get_concurrent_stream_state(CursorField(_A_CURSOR_FIELD_KEY), {}),
48
+ {},
49
49
  self._message_repository,
50
50
  self._state_manager,
51
51
  self._state_converter,
52
52
  CursorField(_A_CURSOR_FIELD_KEY),
53
53
  _SLICE_BOUNDARY_FIELDS,
54
+ None,
54
55
  )
55
56
 
56
57
  def _cursor_without_slice_boundary_fields(self) -> ConcurrentCursor:
57
58
  return ConcurrentCursor(
58
59
  _A_STREAM_NAME,
59
60
  _A_STREAM_NAMESPACE,
60
- self._state_converter.get_concurrent_stream_state(CursorField(_A_CURSOR_FIELD_KEY), {}),
61
+ {},
61
62
  self._message_repository,
62
63
  self._state_manager,
63
64
  self._state_converter,
64
65
  CursorField(_A_CURSOR_FIELD_KEY),
65
66
  None,
67
+ None,
66
68
  )
67
69
 
68
70
  def test_given_boundary_fields_when_close_partition_then_emit_state(self) -> None:
69
- self._cursor_with_slice_boundary_fields().close_partition(
71
+ cursor = self._cursor_with_slice_boundary_fields()
72
+ cursor.close_partition(
70
73
  _partition(
71
74
  {_LOWER_SLICE_BOUNDARY_FIELD: 12, _UPPER_SLICE_BOUNDARY_FIELD: 30},
72
75
  )
73
76
  )
74
77
 
78
+ self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
79
+ self._state_manager.update_state_for_stream.assert_called_once_with(
80
+ _A_STREAM_NAME,
81
+ _A_STREAM_NAMESPACE,
82
+ {_A_CURSOR_FIELD_KEY: 0}, # State message is updated to the legacy format before being emitted
83
+ )
84
+
85
+ def test_given_boundary_fields_when_close_partition_then_emit_updated_state(self) -> None:
86
+ self._cursor_with_slice_boundary_fields().close_partition(
87
+ _partition(
88
+ {_LOWER_SLICE_BOUNDARY_FIELD: 0, _UPPER_SLICE_BOUNDARY_FIELD: 30},
89
+ )
90
+ )
91
+
75
92
  self._message_repository.emit_message.assert_called_once_with(self._state_manager.create_state_message.return_value)
76
93
  self._state_manager.update_state_for_stream.assert_called_once_with(
77
94
  _A_STREAM_NAME,
@@ -5,16 +5,6 @@
5
5
  from datetime import datetime, timezone
6
6
 
7
7
  import pytest
8
- from airbyte_cdk.models import (
9
- AirbyteStateBlob,
10
- AirbyteStateMessage,
11
- AirbyteStateType,
12
- AirbyteStream,
13
- AirbyteStreamState,
14
- StreamDescriptor,
15
- SyncMode,
16
- )
17
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
18
8
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
19
9
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ConcurrencyCompatibleStateType
20
10
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -23,115 +13,12 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
23
13
  )
24
14
 
25
15
 
26
- @pytest.mark.parametrize(
27
- "converter, stream, input_state, expected_output_state",
28
- [
29
- pytest.param(
30
- EpochValueConcurrentStreamStateConverter(),
31
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
32
- [],
33
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
34
- id="no-input-state-epoch",
35
- ),
36
- pytest.param(
37
- EpochValueConcurrentStreamStateConverter(),
38
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
39
- [
40
- AirbyteStateMessage(
41
- type=AirbyteStateType.STREAM,
42
- stream=AirbyteStreamState(
43
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
44
- stream_state=AirbyteStateBlob.parse_obj({"created_at": 1703020837}),
45
- ),
46
- ),
47
- ],
48
- {
49
- "legacy": {"created_at": 1703020837},
50
- "slices": [{"end": datetime(2023, 12, 19, 21, 20, 37, tzinfo=timezone.utc),
51
- "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
52
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
53
- },
54
- id="incompatible-input-state-epoch",
55
- ),
56
- pytest.param(
57
- EpochValueConcurrentStreamStateConverter(),
58
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
59
- [
60
- AirbyteStateMessage(
61
- type=AirbyteStateType.STREAM,
62
- stream=AirbyteStreamState(
63
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
64
- stream_state=AirbyteStateBlob.parse_obj(
65
- {
66
- "created_at": 1703020837,
67
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
68
- },
69
- ),
70
- ),
71
- ),
72
- ],
73
- {"created_at": 1703020837, "state_type": ConcurrencyCompatibleStateType.date_range.value},
74
- id="compatible-input-state-epoch",
75
- ),
76
- pytest.param(
77
- IsoMillisConcurrentStreamStateConverter(),
78
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
79
- [],
80
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
81
- id="no-input-state-isomillis",
82
- ),
83
- pytest.param(
84
- IsoMillisConcurrentStreamStateConverter(),
85
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
86
- [
87
- AirbyteStateMessage(
88
- type=AirbyteStateType.STREAM,
89
- stream=AirbyteStreamState(
90
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
91
- stream_state=AirbyteStateBlob.parse_obj({"created_at": "2021-01-18T21:18:20.000Z"}),
92
- ),
93
- ),
94
- ],
95
- {
96
- "legacy": {"created_at": "2021-01-18T21:18:20.000Z"},
97
- "slices": [{"end": datetime(2021, 1, 18, 21, 18, 20, tzinfo=timezone.utc),
98
- "start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
99
- "state_type": ConcurrencyCompatibleStateType.date_range.value},
100
- id="incompatible-input-state-isomillis",
101
- ),
102
- pytest.param(
103
- IsoMillisConcurrentStreamStateConverter(),
104
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
105
- [
106
- AirbyteStateMessage(
107
- type=AirbyteStateType.STREAM,
108
- stream=AirbyteStreamState(
109
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
110
- stream_state=AirbyteStateBlob.parse_obj(
111
- {
112
- "created_at": "2021-01-18T21:18:20.000Z",
113
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
114
- },
115
- ),
116
- ),
117
- ),
118
- ],
119
- {"created_at": "2021-01-18T21:18:20.000Z", "state_type": ConcurrencyCompatibleStateType.date_range.value},
120
- id="compatible-input-state-isomillis",
121
- ),
122
- ],
123
- )
124
- def test_concurrent_connector_state_manager_get_stream_state(converter, stream, input_state, expected_output_state):
125
- state_manager = ConnectorStateManager({"stream1": stream}, input_state)
126
- assert converter.get_concurrent_stream_state(CursorField("created_at"), state_manager.get_stream_state("stream1", None)) == expected_output_state
127
-
128
-
129
16
  @pytest.mark.parametrize(
130
17
  "converter, input_state, is_compatible",
131
18
  [
132
19
  pytest.param(
133
20
  EpochValueConcurrentStreamStateConverter(),
134
- {'state_type': 'date-range'},
21
+ {"state_type": "date-range"},
135
22
  True,
136
23
  id="no-input-state-is-compatible-epoch",
137
24
  ),
@@ -163,7 +50,7 @@ def test_concurrent_connector_state_manager_get_stream_state(converter, stream,
163
50
  ),
164
51
  pytest.param(
165
52
  IsoMillisConcurrentStreamStateConverter(),
166
- {'state_type': 'date-range'},
53
+ {"state_type": "date-range"},
167
54
  True,
168
55
  id="no-input-state-is-compatible-isomillis",
169
56
  ),
@@ -200,22 +87,106 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
200
87
 
201
88
 
202
89
  @pytest.mark.parametrize(
203
- "converter, stream, sequential_state, expected_output_state",
90
+ "converter,start,state,expected_start",
91
+ [
92
+ pytest.param(
93
+ EpochValueConcurrentStreamStateConverter(),
94
+ None,
95
+ {},
96
+ EpochValueConcurrentStreamStateConverter().zero_value,
97
+ id="epoch-converter-no-state-no-start-start-is-zero-value"
98
+ ),
99
+ pytest.param(
100
+ EpochValueConcurrentStreamStateConverter(),
101
+ 1617030403,
102
+ {},
103
+ datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
104
+ id="epoch-converter-no-state-with-start-start-is-start"
105
+ ),
106
+ pytest.param(
107
+ EpochValueConcurrentStreamStateConverter(),
108
+ None,
109
+ {"created_at": 1617030404},
110
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
111
+ id="epoch-converter-state-without-start-start-is-from-state"
112
+ ),
113
+ pytest.param(
114
+ EpochValueConcurrentStreamStateConverter(),
115
+ 1617030404,
116
+ {"created_at": 1617030403},
117
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
118
+ id="epoch-converter-state-before-start-start-is-start"
119
+ ),
120
+ pytest.param(
121
+ EpochValueConcurrentStreamStateConverter(),
122
+ 1617030403,
123
+ {"created_at": 1617030404},
124
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
125
+ id="epoch-converter-state-after-start-start-is-from-state"
126
+ ),
127
+ pytest.param(
128
+ IsoMillisConcurrentStreamStateConverter(),
129
+ None,
130
+ {},
131
+ IsoMillisConcurrentStreamStateConverter().zero_value,
132
+ id="isomillis-converter-no-state-no-start-start-is-zero-value"
133
+ ),
134
+ pytest.param(
135
+ IsoMillisConcurrentStreamStateConverter(),
136
+ "2021-08-22T05:03:27.000Z",
137
+ {},
138
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
139
+ id="isomillis-converter-no-state-with-start-start-is-start"
140
+ ),
141
+ pytest.param(
142
+ IsoMillisConcurrentStreamStateConverter(),
143
+ None,
144
+ {"created_at": "2021-08-22T05:03:27.000Z"},
145
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
146
+ id="isomillis-converter-state-without-start-start-is-from-state"
147
+ ),
148
+ pytest.param(
149
+ IsoMillisConcurrentStreamStateConverter(),
150
+ "2022-08-22T05:03:27.000Z",
151
+ {"created_at": "2021-08-22T05:03:27.000Z"},
152
+ datetime(2022, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
153
+ id="isomillis-converter-state-before-start-start-is-start"
154
+ ),
155
+ pytest.param(
156
+ IsoMillisConcurrentStreamStateConverter(),
157
+ "2022-08-22T05:03:27.000Z",
158
+ {"created_at": "2023-08-22T05:03:27.000Z"},
159
+ datetime(2023, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
160
+ id="isomillis-converter-state-after-start-start-is-from-state"
161
+ ),
162
+ ]
163
+ )
164
+ def test_get_sync_start(converter, start, state, expected_start):
165
+ assert converter._get_sync_start(CursorField("created_at"), state, start) == expected_start
166
+
167
+
168
+ @pytest.mark.parametrize(
169
+ "converter, start, sequential_state, expected_output_state",
204
170
  [
205
171
  pytest.param(
206
172
  EpochValueConcurrentStreamStateConverter(),
207
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
173
+ 0,
208
174
  {},
209
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
175
+ {
176
+ "legacy": {},
177
+ "slices": [{"start": EpochValueConcurrentStreamStateConverter().zero_value,
178
+ "end": EpochValueConcurrentStreamStateConverter().zero_value}],
179
+ "state_type": "date-range",
180
+ },
210
181
  id="empty-input-state-epoch",
211
182
  ),
212
183
  pytest.param(
213
184
  EpochValueConcurrentStreamStateConverter(),
214
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
185
+ 1617030403,
215
186
  {"created": 1617030403},
216
187
  {
217
188
  "state_type": "date-range",
218
- "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
189
+ "slices": [{"start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
219
190
  "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
220
191
  "legacy": {"created": 1617030403},
221
192
  },
@@ -223,18 +194,11 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
223
194
  ),
224
195
  pytest.param(
225
196
  IsoMillisConcurrentStreamStateConverter(),
226
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
227
- {},
228
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
229
- id="empty-input-state-isomillis",
230
- ),
231
- pytest.param(
232
- IsoMillisConcurrentStreamStateConverter(),
233
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
197
+ "2020-01-01T00:00:00.000Z",
234
198
  {"created": "2021-08-22T05:03:27.000Z"},
235
199
  {
236
200
  "state_type": "date-range",
237
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
201
+ "slices": [{"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
238
202
  "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}],
239
203
  "legacy": {"created": "2021-08-22T05:03:27.000Z"},
240
204
  },
@@ -242,186 +206,120 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
242
206
  ),
243
207
  ],
244
208
  )
245
- def test_convert_from_sequential_state(converter, stream, sequential_state, expected_output_state):
209
+ def test_convert_from_sequential_state(converter, start, sequential_state, expected_output_state):
246
210
  comparison_format = "%Y-%m-%dT%H:%M:%S.%f"
247
211
  if expected_output_state["slices"]:
248
- conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state)
212
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
249
213
  assert conversion["state_type"] == expected_output_state["state_type"]
250
214
  assert conversion["legacy"] == expected_output_state["legacy"]
251
215
  for actual, expected in zip(conversion["slices"], expected_output_state["slices"]):
252
216
  assert actual["start"].strftime(comparison_format) == expected["start"].strftime(comparison_format)
253
217
  assert actual["end"].strftime(comparison_format) == expected["end"].strftime(comparison_format)
254
218
  else:
255
- assert converter.convert_from_sequential_state(CursorField("created"), sequential_state) == expected_output_state
219
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
220
+ assert conversion == expected_output_state
256
221
 
257
222
 
258
223
  @pytest.mark.parametrize(
259
- "converter, stream, concurrent_state, expected_output_state",
224
+ "converter, concurrent_state, expected_output_state",
260
225
  [
261
226
  pytest.param(
262
227
  EpochValueConcurrentStreamStateConverter(),
263
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
264
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
265
- {},
266
- id="empty-input-state-epoch",
228
+ {
229
+ "state_type": "date-range",
230
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
231
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
232
+ },
233
+ {"created": 1617030403},
234
+ id="epoch-single-slice",
267
235
  ),
268
236
  pytest.param(
269
237
  EpochValueConcurrentStreamStateConverter(),
270
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
271
238
  {
272
239
  "state_type": "date-range",
273
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
274
- "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}]},
240
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
241
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
242
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
243
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
244
+ },
245
+ {"created": 1648566403},
246
+ id="epoch-overlapping-slices",
247
+ ),
248
+ pytest.param(
249
+ EpochValueConcurrentStreamStateConverter(),
250
+ {
251
+ "state_type": "date-range",
252
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
253
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
254
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
255
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
256
+ },
275
257
  {"created": 1617030403},
276
- id="with-input-state-epoch",
258
+ id="epoch-multiple-slices",
277
259
  ),
278
260
  pytest.param(
279
261
  IsoMillisConcurrentStreamStateConverter(),
280
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
281
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
282
- {},
283
- id="empty-input-state-isomillis",
262
+ {
263
+ "state_type": "date-range",
264
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
265
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
266
+ },
267
+ {"created": "2021-03-29T15:06:43.000Z"},
268
+ id="isomillis-single-slice",
284
269
  ),
285
270
  pytest.param(
286
271
  IsoMillisConcurrentStreamStateConverter(),
287
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
288
272
  {
289
273
  "state_type": "date-range",
290
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
291
- "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}]},
292
- {"created": "2021-08-22T05:03:27.000Z"},
293
- id="with-input-state-isomillis",
274
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
275
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
276
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
277
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
278
+ },
279
+ {"created": "2022-03-29T15:06:43.000Z"},
280
+ id="isomillis-overlapping-slices",
281
+ ),
282
+ pytest.param(
283
+ IsoMillisConcurrentStreamStateConverter(),
284
+ {
285
+ "state_type": "date-range",
286
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
287
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
288
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
289
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
290
+ },
291
+ {"created": "2021-03-29T15:06:43.000Z"},
292
+ id="isomillis-multiple-slices",
294
293
  ),
295
294
  ],
296
295
  )
297
- def test_convert_to_sequential_state(converter, stream, concurrent_state, expected_output_state):
296
+ def test_convert_to_sequential_state(converter, concurrent_state, expected_output_state):
298
297
  assert converter.convert_to_sequential_state(CursorField("created"), concurrent_state) == expected_output_state
299
298
 
300
299
 
301
300
  @pytest.mark.parametrize(
302
- "converter, input_intervals, expected_merged_intervals",
301
+ "converter, concurrent_state, expected_output_state",
303
302
  [
304
303
  pytest.param(
305
304
  EpochValueConcurrentStreamStateConverter(),
306
- [],
307
- [],
308
- id="no-intervals-epoch",
309
- ),
310
- pytest.param(
311
- EpochValueConcurrentStreamStateConverter(),
312
- [{"start": 0, "end": 1}],
313
- [{"start": 0, "end": 1}],
314
- id="single-interval-epoch",
315
- ),
316
- pytest.param(
317
- EpochValueConcurrentStreamStateConverter(),
318
- [{"start": 0, "end": 1}, {"start": 0, "end": 1}],
319
- [{"start": 0, "end": 1}],
320
- id="duplicate-intervals-epoch",
321
- ),
322
- pytest.param(
323
- EpochValueConcurrentStreamStateConverter(),
324
- [{"start": 0, "end": 1}, {"start": 0, "end": 2}],
325
- [{"start": 0, "end": 2}],
326
- id="overlapping-intervals-epoch",
327
- ),
328
- pytest.param(
329
- EpochValueConcurrentStreamStateConverter(),
330
- [{"start": 0, "end": 3}, {"start": 1, "end": 2}],
331
- [{"start": 0, "end": 3}],
332
- id="enclosed-intervals-epoch",
333
- ),
334
- pytest.param(
335
- EpochValueConcurrentStreamStateConverter(),
336
- [{"start": 1, "end": 2}, {"start": 0, "end": 1}],
337
- [{"start": 0, "end": 2}],
338
- id="unordered-intervals-epoch",
339
- ),
340
- pytest.param(
341
- EpochValueConcurrentStreamStateConverter(),
342
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}],
343
- [{"start": 0, "end": 3}],
344
- id="adjacent-intervals-epoch",
345
- ),
346
- pytest.param(
347
- EpochValueConcurrentStreamStateConverter(),
348
- [{"start": 3, "end": 4}, {"start": 0, "end": 1}],
349
- [{"start": 0, "end": 1}, {"start": 3, "end": 4}],
350
- id="nonoverlapping-intervals-epoch",
351
- ),
352
- pytest.param(
353
- EpochValueConcurrentStreamStateConverter(),
354
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}, {"start": 10, "end": 11}, {"start": 1, "end": 4}],
355
- [{"start": 0, "end": 4}, {"start": 10, "end": 11}],
356
- id="overlapping-and-nonoverlapping-intervals-epoch",
357
- ),
358
- pytest.param(
359
- IsoMillisConcurrentStreamStateConverter(),
360
- [],
361
- [],
362
- id="no-intervals-isomillis",
363
- ),
364
- pytest.param(
365
- IsoMillisConcurrentStreamStateConverter(),
366
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
367
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
368
- id="single-interval-isomillis",
369
- ),
370
- pytest.param(
371
- IsoMillisConcurrentStreamStateConverter(),
372
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
373
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
374
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
375
- id="duplicate-intervals-isomillis",
376
- ),
377
- pytest.param(
378
- IsoMillisConcurrentStreamStateConverter(),
379
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"},
380
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
381
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
382
- id="overlapping-intervals-isomillis",
383
- ),
384
- pytest.param(
385
- IsoMillisConcurrentStreamStateConverter(),
386
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
387
- {"start": "2022-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
388
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
389
- id="enclosed-intervals-isomillis",
390
- ),
391
- pytest.param(
392
- IsoMillisConcurrentStreamStateConverter(),
393
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
394
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
395
- [{"start": 0, "end": 2}],
396
- id="unordered-intervals-isomillis",
397
- ),
398
- pytest.param(
399
- IsoMillisConcurrentStreamStateConverter(),
400
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
401
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"}],
402
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
403
- id="adjacent-intervals-isomillis",
404
- ),
405
- pytest.param(
406
- IsoMillisConcurrentStreamStateConverter(),
407
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
408
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
409
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
410
- {"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
411
- id="nonoverlapping-intervals-isomillis",
305
+ {
306
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
307
+ "start": EpochValueConcurrentStreamStateConverter().zero_value,
308
+ },
309
+ {"created": 0},
310
+ id="empty-slices-epoch",
412
311
  ),
413
312
  pytest.param(
414
313
  IsoMillisConcurrentStreamStateConverter(),
415
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
416
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"},
417
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"},
418
- {"start": "2022-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"}],
419
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"},
420
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"}],
421
- id="overlapping-and-nonoverlapping-intervals-isomillis",
314
+ {
315
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
316
+ "start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
317
+ },
318
+ {"created": "2021-08-22T05:03:27.000Z"},
319
+ id="empty-slices-isomillis",
422
320
  ),
423
321
  ],
424
322
  )
425
- def test_merge_intervals(converter, input_intervals, expected_merged_intervals):
426
- parsed_intervals = [{"start": converter.parse_timestamp(i["start"]), "end": converter.parse_timestamp(i["end"])} for i in input_intervals]
427
- return converter.merge_intervals(parsed_intervals) == expected_merged_intervals
323
+ def test_convert_to_sequential_state_no_slices_returns_legacy_state(converter, concurrent_state, expected_output_state):
324
+ with pytest.raises(RuntimeError):
325
+ converter.convert_to_sequential_state(CursorField("created"), concurrent_state)