airbyte-cdk 0.58.8__py3-none-any.whl → 0.59.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (23) hide show
  1. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +20 -21
  2. airbyte_cdk/sources/concurrent_source/concurrent_source.py +4 -3
  3. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +15 -18
  4. airbyte_cdk/sources/concurrent_source/throttler.py +25 -0
  5. airbyte_cdk/sources/streams/concurrent/cursor.py +29 -8
  6. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +3 -5
  7. airbyte_cdk/sources/streams/concurrent/partition_reader.py +3 -4
  8. airbyte_cdk/sources/streams/concurrent/partitions/throttled_queue.py +41 -0
  9. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +6 -12
  10. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +36 -30
  11. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/METADATA +1 -1
  12. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/RECORD +23 -19
  13. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
  14. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +4 -10
  15. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +82 -12
  16. unit_tests/sources/streams/concurrent/test_cursor.py +20 -3
  17. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +166 -268
  18. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +2 -15
  19. unit_tests/sources/streams/concurrent/test_throttled_queue.py +65 -0
  20. unit_tests/sources/streams/concurrent/test_throttler.py +13 -0
  21. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/LICENSE.txt +0 -0
  22. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/WHEEL +0 -0
  23. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/top_level.txt +0 -0
@@ -5,16 +5,6 @@
5
5
  from datetime import datetime, timezone
6
6
 
7
7
  import pytest
8
- from airbyte_cdk.models import (
9
- AirbyteStateBlob,
10
- AirbyteStateMessage,
11
- AirbyteStateType,
12
- AirbyteStream,
13
- AirbyteStreamState,
14
- StreamDescriptor,
15
- SyncMode,
16
- )
17
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
18
8
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
19
9
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ConcurrencyCompatibleStateType
20
10
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -23,115 +13,12 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
23
13
  )
24
14
 
25
15
 
26
- @pytest.mark.parametrize(
27
- "converter, stream, input_state, expected_output_state",
28
- [
29
- pytest.param(
30
- EpochValueConcurrentStreamStateConverter(),
31
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
32
- [],
33
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
34
- id="no-input-state-epoch",
35
- ),
36
- pytest.param(
37
- EpochValueConcurrentStreamStateConverter(),
38
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
39
- [
40
- AirbyteStateMessage(
41
- type=AirbyteStateType.STREAM,
42
- stream=AirbyteStreamState(
43
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
44
- stream_state=AirbyteStateBlob.parse_obj({"created_at": 1703020837}),
45
- ),
46
- ),
47
- ],
48
- {
49
- "legacy": {"created_at": 1703020837},
50
- "slices": [{"end": datetime(2023, 12, 19, 21, 20, 37, tzinfo=timezone.utc),
51
- "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
52
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
53
- },
54
- id="incompatible-input-state-epoch",
55
- ),
56
- pytest.param(
57
- EpochValueConcurrentStreamStateConverter(),
58
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
59
- [
60
- AirbyteStateMessage(
61
- type=AirbyteStateType.STREAM,
62
- stream=AirbyteStreamState(
63
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
64
- stream_state=AirbyteStateBlob.parse_obj(
65
- {
66
- "created_at": 1703020837,
67
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
68
- },
69
- ),
70
- ),
71
- ),
72
- ],
73
- {"created_at": 1703020837, "state_type": ConcurrencyCompatibleStateType.date_range.value},
74
- id="compatible-input-state-epoch",
75
- ),
76
- pytest.param(
77
- IsoMillisConcurrentStreamStateConverter(),
78
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
79
- [],
80
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
81
- id="no-input-state-isomillis",
82
- ),
83
- pytest.param(
84
- IsoMillisConcurrentStreamStateConverter(),
85
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
86
- [
87
- AirbyteStateMessage(
88
- type=AirbyteStateType.STREAM,
89
- stream=AirbyteStreamState(
90
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
91
- stream_state=AirbyteStateBlob.parse_obj({"created_at": "2021-01-18T21:18:20.000Z"}),
92
- ),
93
- ),
94
- ],
95
- {
96
- "legacy": {"created_at": "2021-01-18T21:18:20.000Z"},
97
- "slices": [{"end": datetime(2021, 1, 18, 21, 18, 20, tzinfo=timezone.utc),
98
- "start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
99
- "state_type": ConcurrencyCompatibleStateType.date_range.value},
100
- id="incompatible-input-state-isomillis",
101
- ),
102
- pytest.param(
103
- IsoMillisConcurrentStreamStateConverter(),
104
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
105
- [
106
- AirbyteStateMessage(
107
- type=AirbyteStateType.STREAM,
108
- stream=AirbyteStreamState(
109
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
110
- stream_state=AirbyteStateBlob.parse_obj(
111
- {
112
- "created_at": "2021-01-18T21:18:20.000Z",
113
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
114
- },
115
- ),
116
- ),
117
- ),
118
- ],
119
- {"created_at": "2021-01-18T21:18:20.000Z", "state_type": ConcurrencyCompatibleStateType.date_range.value},
120
- id="compatible-input-state-isomillis",
121
- ),
122
- ],
123
- )
124
- def test_concurrent_connector_state_manager_get_stream_state(converter, stream, input_state, expected_output_state):
125
- state_manager = ConnectorStateManager({"stream1": stream}, input_state)
126
- assert converter.get_concurrent_stream_state(CursorField("created_at"), state_manager.get_stream_state("stream1", None)) == expected_output_state
127
-
128
-
129
16
  @pytest.mark.parametrize(
130
17
  "converter, input_state, is_compatible",
131
18
  [
132
19
  pytest.param(
133
20
  EpochValueConcurrentStreamStateConverter(),
134
- {'state_type': 'date-range'},
21
+ {"state_type": "date-range"},
135
22
  True,
136
23
  id="no-input-state-is-compatible-epoch",
137
24
  ),
@@ -163,7 +50,7 @@ def test_concurrent_connector_state_manager_get_stream_state(converter, stream,
163
50
  ),
164
51
  pytest.param(
165
52
  IsoMillisConcurrentStreamStateConverter(),
166
- {'state_type': 'date-range'},
53
+ {"state_type": "date-range"},
167
54
  True,
168
55
  id="no-input-state-is-compatible-isomillis",
169
56
  ),
@@ -200,22 +87,106 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
200
87
 
201
88
 
202
89
  @pytest.mark.parametrize(
203
- "converter, stream, sequential_state, expected_output_state",
90
+ "converter,start,state,expected_start",
91
+ [
92
+ pytest.param(
93
+ EpochValueConcurrentStreamStateConverter(),
94
+ None,
95
+ {},
96
+ EpochValueConcurrentStreamStateConverter().zero_value,
97
+ id="epoch-converter-no-state-no-start-start-is-zero-value"
98
+ ),
99
+ pytest.param(
100
+ EpochValueConcurrentStreamStateConverter(),
101
+ 1617030403,
102
+ {},
103
+ datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
104
+ id="epoch-converter-no-state-with-start-start-is-start"
105
+ ),
106
+ pytest.param(
107
+ EpochValueConcurrentStreamStateConverter(),
108
+ None,
109
+ {"created_at": 1617030404},
110
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
111
+ id="epoch-converter-state-without-start-start-is-from-state"
112
+ ),
113
+ pytest.param(
114
+ EpochValueConcurrentStreamStateConverter(),
115
+ 1617030404,
116
+ {"created_at": 1617030403},
117
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
118
+ id="epoch-converter-state-before-start-start-is-start"
119
+ ),
120
+ pytest.param(
121
+ EpochValueConcurrentStreamStateConverter(),
122
+ 1617030403,
123
+ {"created_at": 1617030404},
124
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
125
+ id="epoch-converter-state-after-start-start-is-from-state"
126
+ ),
127
+ pytest.param(
128
+ IsoMillisConcurrentStreamStateConverter(),
129
+ None,
130
+ {},
131
+ IsoMillisConcurrentStreamStateConverter().zero_value,
132
+ id="isomillis-converter-no-state-no-start-start-is-zero-value"
133
+ ),
134
+ pytest.param(
135
+ IsoMillisConcurrentStreamStateConverter(),
136
+ "2021-08-22T05:03:27.000Z",
137
+ {},
138
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
139
+ id="isomillis-converter-no-state-with-start-start-is-start"
140
+ ),
141
+ pytest.param(
142
+ IsoMillisConcurrentStreamStateConverter(),
143
+ None,
144
+ {"created_at": "2021-08-22T05:03:27.000Z"},
145
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
146
+ id="isomillis-converter-state-without-start-start-is-from-state"
147
+ ),
148
+ pytest.param(
149
+ IsoMillisConcurrentStreamStateConverter(),
150
+ "2022-08-22T05:03:27.000Z",
151
+ {"created_at": "2021-08-22T05:03:27.000Z"},
152
+ datetime(2022, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
153
+ id="isomillis-converter-state-before-start-start-is-start"
154
+ ),
155
+ pytest.param(
156
+ IsoMillisConcurrentStreamStateConverter(),
157
+ "2022-08-22T05:03:27.000Z",
158
+ {"created_at": "2023-08-22T05:03:27.000Z"},
159
+ datetime(2023, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
160
+ id="isomillis-converter-state-after-start-start-is-from-state"
161
+ ),
162
+ ]
163
+ )
164
+ def test_get_sync_start(converter, start, state, expected_start):
165
+ assert converter._get_sync_start(CursorField("created_at"), state, start) == expected_start
166
+
167
+
168
+ @pytest.mark.parametrize(
169
+ "converter, start, sequential_state, expected_output_state",
204
170
  [
205
171
  pytest.param(
206
172
  EpochValueConcurrentStreamStateConverter(),
207
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
173
+ 0,
208
174
  {},
209
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
175
+ {
176
+ "legacy": {},
177
+ "slices": [{"start": EpochValueConcurrentStreamStateConverter().zero_value,
178
+ "end": EpochValueConcurrentStreamStateConverter().zero_value}],
179
+ "state_type": "date-range",
180
+ },
210
181
  id="empty-input-state-epoch",
211
182
  ),
212
183
  pytest.param(
213
184
  EpochValueConcurrentStreamStateConverter(),
214
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
185
+ 1617030403,
215
186
  {"created": 1617030403},
216
187
  {
217
188
  "state_type": "date-range",
218
- "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
189
+ "slices": [{"start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
219
190
  "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
220
191
  "legacy": {"created": 1617030403},
221
192
  },
@@ -223,18 +194,11 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
223
194
  ),
224
195
  pytest.param(
225
196
  IsoMillisConcurrentStreamStateConverter(),
226
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
227
- {},
228
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
229
- id="empty-input-state-isomillis",
230
- ),
231
- pytest.param(
232
- IsoMillisConcurrentStreamStateConverter(),
233
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
197
+ "2020-01-01T00:00:00.000Z",
234
198
  {"created": "2021-08-22T05:03:27.000Z"},
235
199
  {
236
200
  "state_type": "date-range",
237
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
201
+ "slices": [{"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
238
202
  "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}],
239
203
  "legacy": {"created": "2021-08-22T05:03:27.000Z"},
240
204
  },
@@ -242,186 +206,120 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
242
206
  ),
243
207
  ],
244
208
  )
245
- def test_convert_from_sequential_state(converter, stream, sequential_state, expected_output_state):
209
+ def test_convert_from_sequential_state(converter, start, sequential_state, expected_output_state):
246
210
  comparison_format = "%Y-%m-%dT%H:%M:%S.%f"
247
211
  if expected_output_state["slices"]:
248
- conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state)
212
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
249
213
  assert conversion["state_type"] == expected_output_state["state_type"]
250
214
  assert conversion["legacy"] == expected_output_state["legacy"]
251
215
  for actual, expected in zip(conversion["slices"], expected_output_state["slices"]):
252
216
  assert actual["start"].strftime(comparison_format) == expected["start"].strftime(comparison_format)
253
217
  assert actual["end"].strftime(comparison_format) == expected["end"].strftime(comparison_format)
254
218
  else:
255
- assert converter.convert_from_sequential_state(CursorField("created"), sequential_state) == expected_output_state
219
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
220
+ assert conversion == expected_output_state
256
221
 
257
222
 
258
223
  @pytest.mark.parametrize(
259
- "converter, stream, concurrent_state, expected_output_state",
224
+ "converter, concurrent_state, expected_output_state",
260
225
  [
261
226
  pytest.param(
262
227
  EpochValueConcurrentStreamStateConverter(),
263
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
264
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
265
- {},
266
- id="empty-input-state-epoch",
228
+ {
229
+ "state_type": "date-range",
230
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
231
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
232
+ },
233
+ {"created": 1617030403},
234
+ id="epoch-single-slice",
267
235
  ),
268
236
  pytest.param(
269
237
  EpochValueConcurrentStreamStateConverter(),
270
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
271
238
  {
272
239
  "state_type": "date-range",
273
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
274
- "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}]},
240
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
241
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
242
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
243
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
244
+ },
245
+ {"created": 1648566403},
246
+ id="epoch-overlapping-slices",
247
+ ),
248
+ pytest.param(
249
+ EpochValueConcurrentStreamStateConverter(),
250
+ {
251
+ "state_type": "date-range",
252
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
253
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
254
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
255
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
256
+ },
275
257
  {"created": 1617030403},
276
- id="with-input-state-epoch",
258
+ id="epoch-multiple-slices",
277
259
  ),
278
260
  pytest.param(
279
261
  IsoMillisConcurrentStreamStateConverter(),
280
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
281
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
282
- {},
283
- id="empty-input-state-isomillis",
262
+ {
263
+ "state_type": "date-range",
264
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
265
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
266
+ },
267
+ {"created": "2021-03-29T15:06:43.000Z"},
268
+ id="isomillis-single-slice",
284
269
  ),
285
270
  pytest.param(
286
271
  IsoMillisConcurrentStreamStateConverter(),
287
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
288
272
  {
289
273
  "state_type": "date-range",
290
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
291
- "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}]},
292
- {"created": "2021-08-22T05:03:27.000Z"},
293
- id="with-input-state-isomillis",
274
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
275
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
276
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
277
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
278
+ },
279
+ {"created": "2022-03-29T15:06:43.000Z"},
280
+ id="isomillis-overlapping-slices",
281
+ ),
282
+ pytest.param(
283
+ IsoMillisConcurrentStreamStateConverter(),
284
+ {
285
+ "state_type": "date-range",
286
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
287
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
288
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
289
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
290
+ },
291
+ {"created": "2021-03-29T15:06:43.000Z"},
292
+ id="isomillis-multiple-slices",
294
293
  ),
295
294
  ],
296
295
  )
297
- def test_convert_to_sequential_state(converter, stream, concurrent_state, expected_output_state):
296
+ def test_convert_to_sequential_state(converter, concurrent_state, expected_output_state):
298
297
  assert converter.convert_to_sequential_state(CursorField("created"), concurrent_state) == expected_output_state
299
298
 
300
299
 
301
300
  @pytest.mark.parametrize(
302
- "converter, input_intervals, expected_merged_intervals",
301
+ "converter, concurrent_state, expected_output_state",
303
302
  [
304
303
  pytest.param(
305
304
  EpochValueConcurrentStreamStateConverter(),
306
- [],
307
- [],
308
- id="no-intervals-epoch",
309
- ),
310
- pytest.param(
311
- EpochValueConcurrentStreamStateConverter(),
312
- [{"start": 0, "end": 1}],
313
- [{"start": 0, "end": 1}],
314
- id="single-interval-epoch",
315
- ),
316
- pytest.param(
317
- EpochValueConcurrentStreamStateConverter(),
318
- [{"start": 0, "end": 1}, {"start": 0, "end": 1}],
319
- [{"start": 0, "end": 1}],
320
- id="duplicate-intervals-epoch",
321
- ),
322
- pytest.param(
323
- EpochValueConcurrentStreamStateConverter(),
324
- [{"start": 0, "end": 1}, {"start": 0, "end": 2}],
325
- [{"start": 0, "end": 2}],
326
- id="overlapping-intervals-epoch",
327
- ),
328
- pytest.param(
329
- EpochValueConcurrentStreamStateConverter(),
330
- [{"start": 0, "end": 3}, {"start": 1, "end": 2}],
331
- [{"start": 0, "end": 3}],
332
- id="enclosed-intervals-epoch",
333
- ),
334
- pytest.param(
335
- EpochValueConcurrentStreamStateConverter(),
336
- [{"start": 1, "end": 2}, {"start": 0, "end": 1}],
337
- [{"start": 0, "end": 2}],
338
- id="unordered-intervals-epoch",
339
- ),
340
- pytest.param(
341
- EpochValueConcurrentStreamStateConverter(),
342
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}],
343
- [{"start": 0, "end": 3}],
344
- id="adjacent-intervals-epoch",
345
- ),
346
- pytest.param(
347
- EpochValueConcurrentStreamStateConverter(),
348
- [{"start": 3, "end": 4}, {"start": 0, "end": 1}],
349
- [{"start": 0, "end": 1}, {"start": 3, "end": 4}],
350
- id="nonoverlapping-intervals-epoch",
351
- ),
352
- pytest.param(
353
- EpochValueConcurrentStreamStateConverter(),
354
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}, {"start": 10, "end": 11}, {"start": 1, "end": 4}],
355
- [{"start": 0, "end": 4}, {"start": 10, "end": 11}],
356
- id="overlapping-and-nonoverlapping-intervals-epoch",
357
- ),
358
- pytest.param(
359
- IsoMillisConcurrentStreamStateConverter(),
360
- [],
361
- [],
362
- id="no-intervals-isomillis",
363
- ),
364
- pytest.param(
365
- IsoMillisConcurrentStreamStateConverter(),
366
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
367
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
368
- id="single-interval-isomillis",
369
- ),
370
- pytest.param(
371
- IsoMillisConcurrentStreamStateConverter(),
372
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
373
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
374
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
375
- id="duplicate-intervals-isomillis",
376
- ),
377
- pytest.param(
378
- IsoMillisConcurrentStreamStateConverter(),
379
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"},
380
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
381
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
382
- id="overlapping-intervals-isomillis",
383
- ),
384
- pytest.param(
385
- IsoMillisConcurrentStreamStateConverter(),
386
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
387
- {"start": "2022-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
388
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
389
- id="enclosed-intervals-isomillis",
390
- ),
391
- pytest.param(
392
- IsoMillisConcurrentStreamStateConverter(),
393
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
394
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
395
- [{"start": 0, "end": 2}],
396
- id="unordered-intervals-isomillis",
397
- ),
398
- pytest.param(
399
- IsoMillisConcurrentStreamStateConverter(),
400
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
401
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"}],
402
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
403
- id="adjacent-intervals-isomillis",
404
- ),
405
- pytest.param(
406
- IsoMillisConcurrentStreamStateConverter(),
407
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
408
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
409
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
410
- {"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
411
- id="nonoverlapping-intervals-isomillis",
305
+ {
306
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
307
+ "start": EpochValueConcurrentStreamStateConverter().zero_value,
308
+ },
309
+ {"created": 0},
310
+ id="empty-slices-epoch",
412
311
  ),
413
312
  pytest.param(
414
313
  IsoMillisConcurrentStreamStateConverter(),
415
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
416
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"},
417
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"},
418
- {"start": "2022-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"}],
419
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"},
420
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"}],
421
- id="overlapping-and-nonoverlapping-intervals-isomillis",
314
+ {
315
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
316
+ "start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
317
+ },
318
+ {"created": "2021-08-22T05:03:27.000Z"},
319
+ id="empty-slices-isomillis",
422
320
  ),
423
321
  ],
424
322
  )
425
- def test_merge_intervals(converter, input_intervals, expected_merged_intervals):
426
- parsed_intervals = [{"start": converter.parse_timestamp(i["start"]), "end": converter.parse_timestamp(i["end"])} for i in input_intervals]
427
- return converter.merge_intervals(parsed_intervals) == expected_merged_intervals
323
+ def test_convert_to_sequential_state_no_slices_returns_legacy_state(converter, concurrent_state, expected_output_state):
324
+ with pytest.raises(RuntimeError):
325
+ converter.convert_to_sequential_state(CursorField("created"), concurrent_state)
@@ -3,7 +3,7 @@
3
3
  #
4
4
  from concurrent.futures import Future, ThreadPoolExecutor
5
5
  from unittest import TestCase
6
- from unittest.mock import Mock, patch
6
+ from unittest.mock import Mock
7
7
 
8
8
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
9
9
 
@@ -23,23 +23,10 @@ class ThreadPoolManagerTest(TestCase):
23
23
 
24
24
  assert len(self._thread_pool_manager._futures) == 1
25
25
 
26
- def test_submit_too_many_concurrent_tasks(self):
27
- future = Mock(spec=Future)
28
- future.exception.return_value = None
29
- future.done.side_effect = [False, True]
30
-
31
- with patch("time.sleep") as sleep_mock:
32
- self._thread_pool_manager._futures = [future]
33
- self._thread_pool_manager.submit(self._fn, self._arg)
34
- self._threadpool.submit.assert_called_with(self._fn, self._arg)
35
- sleep_mock.assert_called_with(_SLEEP_TIME)
36
-
37
- assert len(self._thread_pool_manager._futures) == 1
38
-
39
26
  def test_submit_task_previous_task_failed(self):
40
27
  future = Mock(spec=Future)
41
28
  future.exception.return_value = RuntimeError
42
- future.done.side_effect = [False, True]
29
+ future.done.side_effect = [True, True]
43
30
 
44
31
  self._thread_pool_manager._futures = [future]
45
32
 
@@ -0,0 +1,65 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ from queue import Queue
4
+ from unittest.mock import Mock
5
+
6
+ import pytest
7
+ from _queue import Empty
8
+ from airbyte_cdk.sources.concurrent_source.throttler import Throttler
9
+ from airbyte_cdk.sources.streams.concurrent.partitions.throttled_queue import ThrottledQueue
10
+
11
+ _AN_ITEM = Mock()
12
+
13
+
14
+ def test_new_throttled_queue_is_empty():
15
+ queue = Queue()
16
+ throttler = Mock(spec=Throttler)
17
+ timeout = 100
18
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
19
+
20
+ assert throttled_queue.empty()
21
+
22
+
23
+ def test_throttled_queue_is_not_empty_after_putting_an_item():
24
+ queue = Queue()
25
+ throttler = Mock(spec=Throttler)
26
+ timeout = 100
27
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
28
+
29
+ throttled_queue.put(_AN_ITEM)
30
+
31
+ assert not throttled_queue.empty()
32
+
33
+
34
+ def test_throttled_queue_get_returns_item_if_any():
35
+ queue = Queue()
36
+ throttler = Mock(spec=Throttler)
37
+ timeout = 100
38
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
39
+
40
+ throttled_queue.put(_AN_ITEM)
41
+ item = throttled_queue.get()
42
+
43
+ assert item == _AN_ITEM
44
+ assert throttled_queue.empty()
45
+
46
+
47
+ def test_throttled_queue_blocks_for_timeout_seconds_if_no_items():
48
+ queue = Mock(spec=Queue)
49
+ throttler = Mock(spec=Throttler)
50
+ timeout = 100
51
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
52
+
53
+ throttled_queue.get()
54
+
55
+ assert queue.get.is_called_once_with(block=True, timeout=timeout)
56
+
57
+
58
+ def test_throttled_queue_raises_an_error_if_no_items_after_timeout():
59
+ queue = Queue()
60
+ throttler = Mock(spec=Throttler)
61
+ timeout = 0.001
62
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
63
+
64
+ with pytest.raises(Empty):
65
+ throttled_queue.get()
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ from unittest.mock import patch
4
+
5
+ from airbyte_cdk.sources.concurrent_source.throttler import Throttler
6
+
7
+
8
+ @patch('time.sleep', side_effect=lambda _: None)
9
+ @patch('airbyte_cdk.sources.concurrent_source.throttler.len', side_effect=[1, 1, 0])
10
+ def test_throttler(sleep_mock, len_mock):
11
+ throttler = Throttler([], 0.1, 1)
12
+ throttler.wait_and_acquire()
13
+ assert sleep_mock.call_count == 3