airbyte-cdk 0.58.8__py3-none-any.whl → 0.59.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +20 -21
  2. airbyte_cdk/sources/concurrent_source/concurrent_source.py +4 -3
  3. airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +15 -18
  4. airbyte_cdk/sources/concurrent_source/throttler.py +25 -0
  5. airbyte_cdk/sources/streams/concurrent/cursor.py +29 -8
  6. airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +3 -5
  7. airbyte_cdk/sources/streams/concurrent/partition_reader.py +3 -4
  8. airbyte_cdk/sources/streams/concurrent/partitions/throttled_queue.py +41 -0
  9. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +6 -12
  10. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +36 -30
  11. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/METADATA +1 -1
  12. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/RECORD +23 -19
  13. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
  14. unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +4 -10
  15. unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +82 -12
  16. unit_tests/sources/streams/concurrent/test_cursor.py +20 -3
  17. unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +166 -268
  18. unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +2 -15
  19. unit_tests/sources/streams/concurrent/test_throttled_queue.py +65 -0
  20. unit_tests/sources/streams/concurrent/test_throttler.py +13 -0
  21. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/LICENSE.txt +0 -0
  22. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/WHEEL +0 -0
  23. {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/top_level.txt +0 -0
@@ -5,16 +5,6 @@
5
5
  from datetime import datetime, timezone
6
6
 
7
7
  import pytest
8
- from airbyte_cdk.models import (
9
- AirbyteStateBlob,
10
- AirbyteStateMessage,
11
- AirbyteStateType,
12
- AirbyteStream,
13
- AirbyteStreamState,
14
- StreamDescriptor,
15
- SyncMode,
16
- )
17
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
18
8
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
19
9
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ConcurrencyCompatibleStateType
20
10
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -23,115 +13,12 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
23
13
  )
24
14
 
25
15
 
26
- @pytest.mark.parametrize(
27
- "converter, stream, input_state, expected_output_state",
28
- [
29
- pytest.param(
30
- EpochValueConcurrentStreamStateConverter(),
31
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
32
- [],
33
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
34
- id="no-input-state-epoch",
35
- ),
36
- pytest.param(
37
- EpochValueConcurrentStreamStateConverter(),
38
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
39
- [
40
- AirbyteStateMessage(
41
- type=AirbyteStateType.STREAM,
42
- stream=AirbyteStreamState(
43
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
44
- stream_state=AirbyteStateBlob.parse_obj({"created_at": 1703020837}),
45
- ),
46
- ),
47
- ],
48
- {
49
- "legacy": {"created_at": 1703020837},
50
- "slices": [{"end": datetime(2023, 12, 19, 21, 20, 37, tzinfo=timezone.utc),
51
- "start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
52
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
53
- },
54
- id="incompatible-input-state-epoch",
55
- ),
56
- pytest.param(
57
- EpochValueConcurrentStreamStateConverter(),
58
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
59
- [
60
- AirbyteStateMessage(
61
- type=AirbyteStateType.STREAM,
62
- stream=AirbyteStreamState(
63
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
64
- stream_state=AirbyteStateBlob.parse_obj(
65
- {
66
- "created_at": 1703020837,
67
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
68
- },
69
- ),
70
- ),
71
- ),
72
- ],
73
- {"created_at": 1703020837, "state_type": ConcurrencyCompatibleStateType.date_range.value},
74
- id="compatible-input-state-epoch",
75
- ),
76
- pytest.param(
77
- IsoMillisConcurrentStreamStateConverter(),
78
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
79
- [],
80
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
81
- id="no-input-state-isomillis",
82
- ),
83
- pytest.param(
84
- IsoMillisConcurrentStreamStateConverter(),
85
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
86
- [
87
- AirbyteStateMessage(
88
- type=AirbyteStateType.STREAM,
89
- stream=AirbyteStreamState(
90
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
91
- stream_state=AirbyteStateBlob.parse_obj({"created_at": "2021-01-18T21:18:20.000Z"}),
92
- ),
93
- ),
94
- ],
95
- {
96
- "legacy": {"created_at": "2021-01-18T21:18:20.000Z"},
97
- "slices": [{"end": datetime(2021, 1, 18, 21, 18, 20, tzinfo=timezone.utc),
98
- "start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
99
- "state_type": ConcurrencyCompatibleStateType.date_range.value},
100
- id="incompatible-input-state-isomillis",
101
- ),
102
- pytest.param(
103
- IsoMillisConcurrentStreamStateConverter(),
104
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
105
- [
106
- AirbyteStateMessage(
107
- type=AirbyteStateType.STREAM,
108
- stream=AirbyteStreamState(
109
- stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
110
- stream_state=AirbyteStateBlob.parse_obj(
111
- {
112
- "created_at": "2021-01-18T21:18:20.000Z",
113
- "state_type": ConcurrencyCompatibleStateType.date_range.value,
114
- },
115
- ),
116
- ),
117
- ),
118
- ],
119
- {"created_at": "2021-01-18T21:18:20.000Z", "state_type": ConcurrencyCompatibleStateType.date_range.value},
120
- id="compatible-input-state-isomillis",
121
- ),
122
- ],
123
- )
124
- def test_concurrent_connector_state_manager_get_stream_state(converter, stream, input_state, expected_output_state):
125
- state_manager = ConnectorStateManager({"stream1": stream}, input_state)
126
- assert converter.get_concurrent_stream_state(CursorField("created_at"), state_manager.get_stream_state("stream1", None)) == expected_output_state
127
-
128
-
129
16
  @pytest.mark.parametrize(
130
17
  "converter, input_state, is_compatible",
131
18
  [
132
19
  pytest.param(
133
20
  EpochValueConcurrentStreamStateConverter(),
134
- {'state_type': 'date-range'},
21
+ {"state_type": "date-range"},
135
22
  True,
136
23
  id="no-input-state-is-compatible-epoch",
137
24
  ),
@@ -163,7 +50,7 @@ def test_concurrent_connector_state_manager_get_stream_state(converter, stream,
163
50
  ),
164
51
  pytest.param(
165
52
  IsoMillisConcurrentStreamStateConverter(),
166
- {'state_type': 'date-range'},
53
+ {"state_type": "date-range"},
167
54
  True,
168
55
  id="no-input-state-is-compatible-isomillis",
169
56
  ),
@@ -200,22 +87,106 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
200
87
 
201
88
 
202
89
  @pytest.mark.parametrize(
203
- "converter, stream, sequential_state, expected_output_state",
90
+ "converter,start,state,expected_start",
91
+ [
92
+ pytest.param(
93
+ EpochValueConcurrentStreamStateConverter(),
94
+ None,
95
+ {},
96
+ EpochValueConcurrentStreamStateConverter().zero_value,
97
+ id="epoch-converter-no-state-no-start-start-is-zero-value"
98
+ ),
99
+ pytest.param(
100
+ EpochValueConcurrentStreamStateConverter(),
101
+ 1617030403,
102
+ {},
103
+ datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
104
+ id="epoch-converter-no-state-with-start-start-is-start"
105
+ ),
106
+ pytest.param(
107
+ EpochValueConcurrentStreamStateConverter(),
108
+ None,
109
+ {"created_at": 1617030404},
110
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
111
+ id="epoch-converter-state-without-start-start-is-from-state"
112
+ ),
113
+ pytest.param(
114
+ EpochValueConcurrentStreamStateConverter(),
115
+ 1617030404,
116
+ {"created_at": 1617030403},
117
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
118
+ id="epoch-converter-state-before-start-start-is-start"
119
+ ),
120
+ pytest.param(
121
+ EpochValueConcurrentStreamStateConverter(),
122
+ 1617030403,
123
+ {"created_at": 1617030404},
124
+ datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
125
+ id="epoch-converter-state-after-start-start-is-from-state"
126
+ ),
127
+ pytest.param(
128
+ IsoMillisConcurrentStreamStateConverter(),
129
+ None,
130
+ {},
131
+ IsoMillisConcurrentStreamStateConverter().zero_value,
132
+ id="isomillis-converter-no-state-no-start-start-is-zero-value"
133
+ ),
134
+ pytest.param(
135
+ IsoMillisConcurrentStreamStateConverter(),
136
+ "2021-08-22T05:03:27.000Z",
137
+ {},
138
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
139
+ id="isomillis-converter-no-state-with-start-start-is-start"
140
+ ),
141
+ pytest.param(
142
+ IsoMillisConcurrentStreamStateConverter(),
143
+ None,
144
+ {"created_at": "2021-08-22T05:03:27.000Z"},
145
+ datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
146
+ id="isomillis-converter-state-without-start-start-is-from-state"
147
+ ),
148
+ pytest.param(
149
+ IsoMillisConcurrentStreamStateConverter(),
150
+ "2022-08-22T05:03:27.000Z",
151
+ {"created_at": "2021-08-22T05:03:27.000Z"},
152
+ datetime(2022, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
153
+ id="isomillis-converter-state-before-start-start-is-start"
154
+ ),
155
+ pytest.param(
156
+ IsoMillisConcurrentStreamStateConverter(),
157
+ "2022-08-22T05:03:27.000Z",
158
+ {"created_at": "2023-08-22T05:03:27.000Z"},
159
+ datetime(2023, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
160
+ id="isomillis-converter-state-after-start-start-is-from-state"
161
+ ),
162
+ ]
163
+ )
164
+ def test_get_sync_start(converter, start, state, expected_start):
165
+ assert converter._get_sync_start(CursorField("created_at"), state, start) == expected_start
166
+
167
+
168
+ @pytest.mark.parametrize(
169
+ "converter, start, sequential_state, expected_output_state",
204
170
  [
205
171
  pytest.param(
206
172
  EpochValueConcurrentStreamStateConverter(),
207
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
173
+ 0,
208
174
  {},
209
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
175
+ {
176
+ "legacy": {},
177
+ "slices": [{"start": EpochValueConcurrentStreamStateConverter().zero_value,
178
+ "end": EpochValueConcurrentStreamStateConverter().zero_value}],
179
+ "state_type": "date-range",
180
+ },
210
181
  id="empty-input-state-epoch",
211
182
  ),
212
183
  pytest.param(
213
184
  EpochValueConcurrentStreamStateConverter(),
214
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
185
+ 1617030403,
215
186
  {"created": 1617030403},
216
187
  {
217
188
  "state_type": "date-range",
218
- "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
189
+ "slices": [{"start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
219
190
  "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
220
191
  "legacy": {"created": 1617030403},
221
192
  },
@@ -223,18 +194,11 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
223
194
  ),
224
195
  pytest.param(
225
196
  IsoMillisConcurrentStreamStateConverter(),
226
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
227
- {},
228
- {'legacy': {}, 'slices': [], 'state_type': 'date-range'},
229
- id="empty-input-state-isomillis",
230
- ),
231
- pytest.param(
232
- IsoMillisConcurrentStreamStateConverter(),
233
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
197
+ "2020-01-01T00:00:00.000Z",
234
198
  {"created": "2021-08-22T05:03:27.000Z"},
235
199
  {
236
200
  "state_type": "date-range",
237
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
201
+ "slices": [{"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
238
202
  "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}],
239
203
  "legacy": {"created": "2021-08-22T05:03:27.000Z"},
240
204
  },
@@ -242,186 +206,120 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
242
206
  ),
243
207
  ],
244
208
  )
245
- def test_convert_from_sequential_state(converter, stream, sequential_state, expected_output_state):
209
+ def test_convert_from_sequential_state(converter, start, sequential_state, expected_output_state):
246
210
  comparison_format = "%Y-%m-%dT%H:%M:%S.%f"
247
211
  if expected_output_state["slices"]:
248
- conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state)
212
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
249
213
  assert conversion["state_type"] == expected_output_state["state_type"]
250
214
  assert conversion["legacy"] == expected_output_state["legacy"]
251
215
  for actual, expected in zip(conversion["slices"], expected_output_state["slices"]):
252
216
  assert actual["start"].strftime(comparison_format) == expected["start"].strftime(comparison_format)
253
217
  assert actual["end"].strftime(comparison_format) == expected["end"].strftime(comparison_format)
254
218
  else:
255
- assert converter.convert_from_sequential_state(CursorField("created"), sequential_state) == expected_output_state
219
+ _, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
220
+ assert conversion == expected_output_state
256
221
 
257
222
 
258
223
  @pytest.mark.parametrize(
259
- "converter, stream, concurrent_state, expected_output_state",
224
+ "converter, concurrent_state, expected_output_state",
260
225
  [
261
226
  pytest.param(
262
227
  EpochValueConcurrentStreamStateConverter(),
263
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
264
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
265
- {},
266
- id="empty-input-state-epoch",
228
+ {
229
+ "state_type": "date-range",
230
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
231
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
232
+ },
233
+ {"created": 1617030403},
234
+ id="epoch-single-slice",
267
235
  ),
268
236
  pytest.param(
269
237
  EpochValueConcurrentStreamStateConverter(),
270
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
271
238
  {
272
239
  "state_type": "date-range",
273
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
274
- "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}]},
240
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
241
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
242
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
243
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
244
+ },
245
+ {"created": 1648566403},
246
+ id="epoch-overlapping-slices",
247
+ ),
248
+ pytest.param(
249
+ EpochValueConcurrentStreamStateConverter(),
250
+ {
251
+ "state_type": "date-range",
252
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
253
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
254
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
255
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
256
+ },
275
257
  {"created": 1617030403},
276
- id="with-input-state-epoch",
258
+ id="epoch-multiple-slices",
277
259
  ),
278
260
  pytest.param(
279
261
  IsoMillisConcurrentStreamStateConverter(),
280
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
281
- {"state_type": ConcurrencyCompatibleStateType.date_range.value},
282
- {},
283
- id="empty-input-state-isomillis",
262
+ {
263
+ "state_type": "date-range",
264
+ "slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
265
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
266
+ },
267
+ {"created": "2021-03-29T15:06:43.000Z"},
268
+ id="isomillis-single-slice",
284
269
  ),
285
270
  pytest.param(
286
271
  IsoMillisConcurrentStreamStateConverter(),
287
- AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
288
272
  {
289
273
  "state_type": "date-range",
290
- "slices": [{"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
291
- "end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}]},
292
- {"created": "2021-08-22T05:03:27.000Z"},
293
- id="with-input-state-isomillis",
274
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
275
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
276
+ {"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
277
+ "end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
278
+ },
279
+ {"created": "2022-03-29T15:06:43.000Z"},
280
+ id="isomillis-overlapping-slices",
281
+ ),
282
+ pytest.param(
283
+ IsoMillisConcurrentStreamStateConverter(),
284
+ {
285
+ "state_type": "date-range",
286
+ "slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
287
+ "end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
288
+ {"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
289
+ "end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
290
+ },
291
+ {"created": "2021-03-29T15:06:43.000Z"},
292
+ id="isomillis-multiple-slices",
294
293
  ),
295
294
  ],
296
295
  )
297
- def test_convert_to_sequential_state(converter, stream, concurrent_state, expected_output_state):
296
+ def test_convert_to_sequential_state(converter, concurrent_state, expected_output_state):
298
297
  assert converter.convert_to_sequential_state(CursorField("created"), concurrent_state) == expected_output_state
299
298
 
300
299
 
301
300
  @pytest.mark.parametrize(
302
- "converter, input_intervals, expected_merged_intervals",
301
+ "converter, concurrent_state, expected_output_state",
303
302
  [
304
303
  pytest.param(
305
304
  EpochValueConcurrentStreamStateConverter(),
306
- [],
307
- [],
308
- id="no-intervals-epoch",
309
- ),
310
- pytest.param(
311
- EpochValueConcurrentStreamStateConverter(),
312
- [{"start": 0, "end": 1}],
313
- [{"start": 0, "end": 1}],
314
- id="single-interval-epoch",
315
- ),
316
- pytest.param(
317
- EpochValueConcurrentStreamStateConverter(),
318
- [{"start": 0, "end": 1}, {"start": 0, "end": 1}],
319
- [{"start": 0, "end": 1}],
320
- id="duplicate-intervals-epoch",
321
- ),
322
- pytest.param(
323
- EpochValueConcurrentStreamStateConverter(),
324
- [{"start": 0, "end": 1}, {"start": 0, "end": 2}],
325
- [{"start": 0, "end": 2}],
326
- id="overlapping-intervals-epoch",
327
- ),
328
- pytest.param(
329
- EpochValueConcurrentStreamStateConverter(),
330
- [{"start": 0, "end": 3}, {"start": 1, "end": 2}],
331
- [{"start": 0, "end": 3}],
332
- id="enclosed-intervals-epoch",
333
- ),
334
- pytest.param(
335
- EpochValueConcurrentStreamStateConverter(),
336
- [{"start": 1, "end": 2}, {"start": 0, "end": 1}],
337
- [{"start": 0, "end": 2}],
338
- id="unordered-intervals-epoch",
339
- ),
340
- pytest.param(
341
- EpochValueConcurrentStreamStateConverter(),
342
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}],
343
- [{"start": 0, "end": 3}],
344
- id="adjacent-intervals-epoch",
345
- ),
346
- pytest.param(
347
- EpochValueConcurrentStreamStateConverter(),
348
- [{"start": 3, "end": 4}, {"start": 0, "end": 1}],
349
- [{"start": 0, "end": 1}, {"start": 3, "end": 4}],
350
- id="nonoverlapping-intervals-epoch",
351
- ),
352
- pytest.param(
353
- EpochValueConcurrentStreamStateConverter(),
354
- [{"start": 0, "end": 1}, {"start": 2, "end": 3}, {"start": 10, "end": 11}, {"start": 1, "end": 4}],
355
- [{"start": 0, "end": 4}, {"start": 10, "end": 11}],
356
- id="overlapping-and-nonoverlapping-intervals-epoch",
357
- ),
358
- pytest.param(
359
- IsoMillisConcurrentStreamStateConverter(),
360
- [],
361
- [],
362
- id="no-intervals-isomillis",
363
- ),
364
- pytest.param(
365
- IsoMillisConcurrentStreamStateConverter(),
366
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
367
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
368
- id="single-interval-isomillis",
369
- ),
370
- pytest.param(
371
- IsoMillisConcurrentStreamStateConverter(),
372
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
373
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
374
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
375
- id="duplicate-intervals-isomillis",
376
- ),
377
- pytest.param(
378
- IsoMillisConcurrentStreamStateConverter(),
379
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"},
380
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
381
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
382
- id="overlapping-intervals-isomillis",
383
- ),
384
- pytest.param(
385
- IsoMillisConcurrentStreamStateConverter(),
386
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
387
- {"start": "2022-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
388
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
389
- id="enclosed-intervals-isomillis",
390
- ),
391
- pytest.param(
392
- IsoMillisConcurrentStreamStateConverter(),
393
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
394
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
395
- [{"start": 0, "end": 2}],
396
- id="unordered-intervals-isomillis",
397
- ),
398
- pytest.param(
399
- IsoMillisConcurrentStreamStateConverter(),
400
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
401
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"}],
402
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
403
- id="adjacent-intervals-isomillis",
404
- ),
405
- pytest.param(
406
- IsoMillisConcurrentStreamStateConverter(),
407
- [{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
408
- {"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
409
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
410
- {"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
411
- id="nonoverlapping-intervals-isomillis",
305
+ {
306
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
307
+ "start": EpochValueConcurrentStreamStateConverter().zero_value,
308
+ },
309
+ {"created": 0},
310
+ id="empty-slices-epoch",
412
311
  ),
413
312
  pytest.param(
414
313
  IsoMillisConcurrentStreamStateConverter(),
415
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
416
- {"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"},
417
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"},
418
- {"start": "2022-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"}],
419
- [{"start": "2021-08-22T05:03:27.000Z", "end": "2025-08-22T05:03:27.000Z"},
420
- {"start": "2027-08-22T05:03:27.000Z", "end": "2028-08-22T05:03:27.000Z"}],
421
- id="overlapping-and-nonoverlapping-intervals-isomillis",
314
+ {
315
+ "state_type": ConcurrencyCompatibleStateType.date_range.value,
316
+ "start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
317
+ },
318
+ {"created": "2021-08-22T05:03:27.000Z"},
319
+ id="empty-slices-isomillis",
422
320
  ),
423
321
  ],
424
322
  )
425
- def test_merge_intervals(converter, input_intervals, expected_merged_intervals):
426
- parsed_intervals = [{"start": converter.parse_timestamp(i["start"]), "end": converter.parse_timestamp(i["end"])} for i in input_intervals]
427
- return converter.merge_intervals(parsed_intervals) == expected_merged_intervals
323
+ def test_convert_to_sequential_state_no_slices_returns_legacy_state(converter, concurrent_state, expected_output_state):
324
+ with pytest.raises(RuntimeError):
325
+ converter.convert_to_sequential_state(CursorField("created"), concurrent_state)
@@ -3,7 +3,7 @@
3
3
  #
4
4
  from concurrent.futures import Future, ThreadPoolExecutor
5
5
  from unittest import TestCase
6
- from unittest.mock import Mock, patch
6
+ from unittest.mock import Mock
7
7
 
8
8
  from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
9
9
 
@@ -23,23 +23,10 @@ class ThreadPoolManagerTest(TestCase):
23
23
 
24
24
  assert len(self._thread_pool_manager._futures) == 1
25
25
 
26
- def test_submit_too_many_concurrent_tasks(self):
27
- future = Mock(spec=Future)
28
- future.exception.return_value = None
29
- future.done.side_effect = [False, True]
30
-
31
- with patch("time.sleep") as sleep_mock:
32
- self._thread_pool_manager._futures = [future]
33
- self._thread_pool_manager.submit(self._fn, self._arg)
34
- self._threadpool.submit.assert_called_with(self._fn, self._arg)
35
- sleep_mock.assert_called_with(_SLEEP_TIME)
36
-
37
- assert len(self._thread_pool_manager._futures) == 1
38
-
39
26
  def test_submit_task_previous_task_failed(self):
40
27
  future = Mock(spec=Future)
41
28
  future.exception.return_value = RuntimeError
42
- future.done.side_effect = [False, True]
29
+ future.done.side_effect = [True, True]
43
30
 
44
31
  self._thread_pool_manager._futures = [future]
45
32
 
@@ -0,0 +1,65 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ from queue import Queue
4
+ from unittest.mock import Mock
5
+
6
+ import pytest
7
+ from _queue import Empty
8
+ from airbyte_cdk.sources.concurrent_source.throttler import Throttler
9
+ from airbyte_cdk.sources.streams.concurrent.partitions.throttled_queue import ThrottledQueue
10
+
11
+ _AN_ITEM = Mock()
12
+
13
+
14
+ def test_new_throttled_queue_is_empty():
15
+ queue = Queue()
16
+ throttler = Mock(spec=Throttler)
17
+ timeout = 100
18
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
19
+
20
+ assert throttled_queue.empty()
21
+
22
+
23
+ def test_throttled_queue_is_not_empty_after_putting_an_item():
24
+ queue = Queue()
25
+ throttler = Mock(spec=Throttler)
26
+ timeout = 100
27
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
28
+
29
+ throttled_queue.put(_AN_ITEM)
30
+
31
+ assert not throttled_queue.empty()
32
+
33
+
34
+ def test_throttled_queue_get_returns_item_if_any():
35
+ queue = Queue()
36
+ throttler = Mock(spec=Throttler)
37
+ timeout = 100
38
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
39
+
40
+ throttled_queue.put(_AN_ITEM)
41
+ item = throttled_queue.get()
42
+
43
+ assert item == _AN_ITEM
44
+ assert throttled_queue.empty()
45
+
46
+
47
+ def test_throttled_queue_blocks_for_timeout_seconds_if_no_items():
48
+ queue = Mock(spec=Queue)
49
+ throttler = Mock(spec=Throttler)
50
+ timeout = 100
51
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
52
+
53
+ throttled_queue.get()
54
+
55
+ assert queue.get.is_called_once_with(block=True, timeout=timeout)
56
+
57
+
58
+ def test_throttled_queue_raises_an_error_if_no_items_after_timeout():
59
+ queue = Queue()
60
+ throttler = Mock(spec=Throttler)
61
+ timeout = 0.001
62
+ throttled_queue = ThrottledQueue(queue, throttler, timeout)
63
+
64
+ with pytest.raises(Empty):
65
+ throttled_queue.get()
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
+
3
+ from unittest.mock import patch
4
+
5
+ from airbyte_cdk.sources.concurrent_source.throttler import Throttler
6
+
7
+
8
+ @patch('time.sleep', side_effect=lambda _: None)
9
+ @patch('airbyte_cdk.sources.concurrent_source.throttler.len', side_effect=[1, 1, 0])
10
+ def test_throttler(sleep_mock, len_mock):
11
+ throttler = Throttler([], 0.1, 1)
12
+ throttler.wait_and_acquire()
13
+ assert sleep_mock.call_count == 3