airbyte-cdk 0.58.8__py3-none-any.whl → 0.59.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +20 -21
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +4 -3
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +15 -18
- airbyte_cdk/sources/concurrent_source/throttler.py +25 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +29 -8
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +3 -5
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +3 -4
- airbyte_cdk/sources/streams/concurrent/partitions/throttled_queue.py +41 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +6 -12
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +36 -30
- {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/RECORD +23 -19
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
- unit_tests/sources/streams/concurrent/test_concurrent_partition_generator.py +4 -10
- unit_tests/sources/streams/concurrent/test_concurrent_read_processor.py +82 -12
- unit_tests/sources/streams/concurrent/test_cursor.py +20 -3
- unit_tests/sources/streams/concurrent/test_datetime_state_converter.py +166 -268
- unit_tests/sources/streams/concurrent/test_thread_pool_manager.py +2 -15
- unit_tests/sources/streams/concurrent/test_throttled_queue.py +65 -0
- unit_tests/sources/streams/concurrent/test_throttler.py +13 -0
- {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.58.8.dist-info → airbyte_cdk-0.59.0.dist-info}/top_level.txt +0 -0
@@ -5,16 +5,6 @@
|
|
5
5
|
from datetime import datetime, timezone
|
6
6
|
|
7
7
|
import pytest
|
8
|
-
from airbyte_cdk.models import (
|
9
|
-
AirbyteStateBlob,
|
10
|
-
AirbyteStateMessage,
|
11
|
-
AirbyteStateType,
|
12
|
-
AirbyteStream,
|
13
|
-
AirbyteStreamState,
|
14
|
-
StreamDescriptor,
|
15
|
-
SyncMode,
|
16
|
-
)
|
17
|
-
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
18
8
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
19
9
|
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import ConcurrencyCompatibleStateType
|
20
10
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -23,115 +13,12 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
|
|
23
13
|
)
|
24
14
|
|
25
15
|
|
26
|
-
@pytest.mark.parametrize(
|
27
|
-
"converter, stream, input_state, expected_output_state",
|
28
|
-
[
|
29
|
-
pytest.param(
|
30
|
-
EpochValueConcurrentStreamStateConverter(),
|
31
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
32
|
-
[],
|
33
|
-
{'legacy': {}, 'slices': [], 'state_type': 'date-range'},
|
34
|
-
id="no-input-state-epoch",
|
35
|
-
),
|
36
|
-
pytest.param(
|
37
|
-
EpochValueConcurrentStreamStateConverter(),
|
38
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
39
|
-
[
|
40
|
-
AirbyteStateMessage(
|
41
|
-
type=AirbyteStateType.STREAM,
|
42
|
-
stream=AirbyteStreamState(
|
43
|
-
stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
|
44
|
-
stream_state=AirbyteStateBlob.parse_obj({"created_at": 1703020837}),
|
45
|
-
),
|
46
|
-
),
|
47
|
-
],
|
48
|
-
{
|
49
|
-
"legacy": {"created_at": 1703020837},
|
50
|
-
"slices": [{"end": datetime(2023, 12, 19, 21, 20, 37, tzinfo=timezone.utc),
|
51
|
-
"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
|
52
|
-
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
53
|
-
},
|
54
|
-
id="incompatible-input-state-epoch",
|
55
|
-
),
|
56
|
-
pytest.param(
|
57
|
-
EpochValueConcurrentStreamStateConverter(),
|
58
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
59
|
-
[
|
60
|
-
AirbyteStateMessage(
|
61
|
-
type=AirbyteStateType.STREAM,
|
62
|
-
stream=AirbyteStreamState(
|
63
|
-
stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
|
64
|
-
stream_state=AirbyteStateBlob.parse_obj(
|
65
|
-
{
|
66
|
-
"created_at": 1703020837,
|
67
|
-
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
68
|
-
},
|
69
|
-
),
|
70
|
-
),
|
71
|
-
),
|
72
|
-
],
|
73
|
-
{"created_at": 1703020837, "state_type": ConcurrencyCompatibleStateType.date_range.value},
|
74
|
-
id="compatible-input-state-epoch",
|
75
|
-
),
|
76
|
-
pytest.param(
|
77
|
-
IsoMillisConcurrentStreamStateConverter(),
|
78
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
79
|
-
[],
|
80
|
-
{'legacy': {}, 'slices': [], 'state_type': 'date-range'},
|
81
|
-
id="no-input-state-isomillis",
|
82
|
-
),
|
83
|
-
pytest.param(
|
84
|
-
IsoMillisConcurrentStreamStateConverter(),
|
85
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
86
|
-
[
|
87
|
-
AirbyteStateMessage(
|
88
|
-
type=AirbyteStateType.STREAM,
|
89
|
-
stream=AirbyteStreamState(
|
90
|
-
stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
|
91
|
-
stream_state=AirbyteStateBlob.parse_obj({"created_at": "2021-01-18T21:18:20.000Z"}),
|
92
|
-
),
|
93
|
-
),
|
94
|
-
],
|
95
|
-
{
|
96
|
-
"legacy": {"created_at": "2021-01-18T21:18:20.000Z"},
|
97
|
-
"slices": [{"end": datetime(2021, 1, 18, 21, 18, 20, tzinfo=timezone.utc),
|
98
|
-
"start": datetime(1, 1, 1, 0, 0, 0, tzinfo=timezone.utc)}],
|
99
|
-
"state_type": ConcurrencyCompatibleStateType.date_range.value},
|
100
|
-
id="incompatible-input-state-isomillis",
|
101
|
-
),
|
102
|
-
pytest.param(
|
103
|
-
IsoMillisConcurrentStreamStateConverter(),
|
104
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
105
|
-
[
|
106
|
-
AirbyteStateMessage(
|
107
|
-
type=AirbyteStateType.STREAM,
|
108
|
-
stream=AirbyteStreamState(
|
109
|
-
stream_descriptor=StreamDescriptor(name="stream1", namespace=None),
|
110
|
-
stream_state=AirbyteStateBlob.parse_obj(
|
111
|
-
{
|
112
|
-
"created_at": "2021-01-18T21:18:20.000Z",
|
113
|
-
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
114
|
-
},
|
115
|
-
),
|
116
|
-
),
|
117
|
-
),
|
118
|
-
],
|
119
|
-
{"created_at": "2021-01-18T21:18:20.000Z", "state_type": ConcurrencyCompatibleStateType.date_range.value},
|
120
|
-
id="compatible-input-state-isomillis",
|
121
|
-
),
|
122
|
-
],
|
123
|
-
)
|
124
|
-
def test_concurrent_connector_state_manager_get_stream_state(converter, stream, input_state, expected_output_state):
|
125
|
-
state_manager = ConnectorStateManager({"stream1": stream}, input_state)
|
126
|
-
assert converter.get_concurrent_stream_state(CursorField("created_at"), state_manager.get_stream_state("stream1", None)) == expected_output_state
|
127
|
-
|
128
|
-
|
129
16
|
@pytest.mark.parametrize(
|
130
17
|
"converter, input_state, is_compatible",
|
131
18
|
[
|
132
19
|
pytest.param(
|
133
20
|
EpochValueConcurrentStreamStateConverter(),
|
134
|
-
{
|
21
|
+
{"state_type": "date-range"},
|
135
22
|
True,
|
136
23
|
id="no-input-state-is-compatible-epoch",
|
137
24
|
),
|
@@ -163,7 +50,7 @@ def test_concurrent_connector_state_manager_get_stream_state(converter, stream,
|
|
163
50
|
),
|
164
51
|
pytest.param(
|
165
52
|
IsoMillisConcurrentStreamStateConverter(),
|
166
|
-
{
|
53
|
+
{"state_type": "date-range"},
|
167
54
|
True,
|
168
55
|
id="no-input-state-is-compatible-isomillis",
|
169
56
|
),
|
@@ -200,22 +87,106 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
|
|
200
87
|
|
201
88
|
|
202
89
|
@pytest.mark.parametrize(
|
203
|
-
"converter,
|
90
|
+
"converter,start,state,expected_start",
|
91
|
+
[
|
92
|
+
pytest.param(
|
93
|
+
EpochValueConcurrentStreamStateConverter(),
|
94
|
+
None,
|
95
|
+
{},
|
96
|
+
EpochValueConcurrentStreamStateConverter().zero_value,
|
97
|
+
id="epoch-converter-no-state-no-start-start-is-zero-value"
|
98
|
+
),
|
99
|
+
pytest.param(
|
100
|
+
EpochValueConcurrentStreamStateConverter(),
|
101
|
+
1617030403,
|
102
|
+
{},
|
103
|
+
datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
|
104
|
+
id="epoch-converter-no-state-with-start-start-is-start"
|
105
|
+
),
|
106
|
+
pytest.param(
|
107
|
+
EpochValueConcurrentStreamStateConverter(),
|
108
|
+
None,
|
109
|
+
{"created_at": 1617030404},
|
110
|
+
datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
|
111
|
+
id="epoch-converter-state-without-start-start-is-from-state"
|
112
|
+
),
|
113
|
+
pytest.param(
|
114
|
+
EpochValueConcurrentStreamStateConverter(),
|
115
|
+
1617030404,
|
116
|
+
{"created_at": 1617030403},
|
117
|
+
datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
|
118
|
+
id="epoch-converter-state-before-start-start-is-start"
|
119
|
+
),
|
120
|
+
pytest.param(
|
121
|
+
EpochValueConcurrentStreamStateConverter(),
|
122
|
+
1617030403,
|
123
|
+
{"created_at": 1617030404},
|
124
|
+
datetime(2021, 3, 29, 15, 6, 44, tzinfo=timezone.utc),
|
125
|
+
id="epoch-converter-state-after-start-start-is-from-state"
|
126
|
+
),
|
127
|
+
pytest.param(
|
128
|
+
IsoMillisConcurrentStreamStateConverter(),
|
129
|
+
None,
|
130
|
+
{},
|
131
|
+
IsoMillisConcurrentStreamStateConverter().zero_value,
|
132
|
+
id="isomillis-converter-no-state-no-start-start-is-zero-value"
|
133
|
+
),
|
134
|
+
pytest.param(
|
135
|
+
IsoMillisConcurrentStreamStateConverter(),
|
136
|
+
"2021-08-22T05:03:27.000Z",
|
137
|
+
{},
|
138
|
+
datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
|
139
|
+
id="isomillis-converter-no-state-with-start-start-is-start"
|
140
|
+
),
|
141
|
+
pytest.param(
|
142
|
+
IsoMillisConcurrentStreamStateConverter(),
|
143
|
+
None,
|
144
|
+
{"created_at": "2021-08-22T05:03:27.000Z"},
|
145
|
+
datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
|
146
|
+
id="isomillis-converter-state-without-start-start-is-from-state"
|
147
|
+
),
|
148
|
+
pytest.param(
|
149
|
+
IsoMillisConcurrentStreamStateConverter(),
|
150
|
+
"2022-08-22T05:03:27.000Z",
|
151
|
+
{"created_at": "2021-08-22T05:03:27.000Z"},
|
152
|
+
datetime(2022, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
|
153
|
+
id="isomillis-converter-state-before-start-start-is-start"
|
154
|
+
),
|
155
|
+
pytest.param(
|
156
|
+
IsoMillisConcurrentStreamStateConverter(),
|
157
|
+
"2022-08-22T05:03:27.000Z",
|
158
|
+
{"created_at": "2023-08-22T05:03:27.000Z"},
|
159
|
+
datetime(2023, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
|
160
|
+
id="isomillis-converter-state-after-start-start-is-from-state"
|
161
|
+
),
|
162
|
+
]
|
163
|
+
)
|
164
|
+
def test_get_sync_start(converter, start, state, expected_start):
|
165
|
+
assert converter._get_sync_start(CursorField("created_at"), state, start) == expected_start
|
166
|
+
|
167
|
+
|
168
|
+
@pytest.mark.parametrize(
|
169
|
+
"converter, start, sequential_state, expected_output_state",
|
204
170
|
[
|
205
171
|
pytest.param(
|
206
172
|
EpochValueConcurrentStreamStateConverter(),
|
207
|
-
|
173
|
+
0,
|
208
174
|
{},
|
209
|
-
{
|
175
|
+
{
|
176
|
+
"legacy": {},
|
177
|
+
"slices": [{"start": EpochValueConcurrentStreamStateConverter().zero_value,
|
178
|
+
"end": EpochValueConcurrentStreamStateConverter().zero_value}],
|
179
|
+
"state_type": "date-range",
|
180
|
+
},
|
210
181
|
id="empty-input-state-epoch",
|
211
182
|
),
|
212
183
|
pytest.param(
|
213
184
|
EpochValueConcurrentStreamStateConverter(),
|
214
|
-
|
185
|
+
1617030403,
|
215
186
|
{"created": 1617030403},
|
216
187
|
{
|
217
188
|
"state_type": "date-range",
|
218
|
-
"slices": [{"start": datetime(
|
189
|
+
"slices": [{"start": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc),
|
219
190
|
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
220
191
|
"legacy": {"created": 1617030403},
|
221
192
|
},
|
@@ -223,18 +194,11 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
|
|
223
194
|
),
|
224
195
|
pytest.param(
|
225
196
|
IsoMillisConcurrentStreamStateConverter(),
|
226
|
-
|
227
|
-
{},
|
228
|
-
{'legacy': {}, 'slices': [], 'state_type': 'date-range'},
|
229
|
-
id="empty-input-state-isomillis",
|
230
|
-
),
|
231
|
-
pytest.param(
|
232
|
-
IsoMillisConcurrentStreamStateConverter(),
|
233
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
197
|
+
"2020-01-01T00:00:00.000Z",
|
234
198
|
{"created": "2021-08-22T05:03:27.000Z"},
|
235
199
|
{
|
236
200
|
"state_type": "date-range",
|
237
|
-
"slices": [{"start": datetime(
|
201
|
+
"slices": [{"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
|
238
202
|
"end": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc)}],
|
239
203
|
"legacy": {"created": "2021-08-22T05:03:27.000Z"},
|
240
204
|
},
|
@@ -242,186 +206,120 @@ def test_concurrent_stream_state_converter_is_state_message_compatible(converter
|
|
242
206
|
),
|
243
207
|
],
|
244
208
|
)
|
245
|
-
def test_convert_from_sequential_state(converter,
|
209
|
+
def test_convert_from_sequential_state(converter, start, sequential_state, expected_output_state):
|
246
210
|
comparison_format = "%Y-%m-%dT%H:%M:%S.%f"
|
247
211
|
if expected_output_state["slices"]:
|
248
|
-
conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state)
|
212
|
+
_, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
|
249
213
|
assert conversion["state_type"] == expected_output_state["state_type"]
|
250
214
|
assert conversion["legacy"] == expected_output_state["legacy"]
|
251
215
|
for actual, expected in zip(conversion["slices"], expected_output_state["slices"]):
|
252
216
|
assert actual["start"].strftime(comparison_format) == expected["start"].strftime(comparison_format)
|
253
217
|
assert actual["end"].strftime(comparison_format) == expected["end"].strftime(comparison_format)
|
254
218
|
else:
|
255
|
-
|
219
|
+
_, conversion = converter.convert_from_sequential_state(CursorField("created"), sequential_state, start)
|
220
|
+
assert conversion == expected_output_state
|
256
221
|
|
257
222
|
|
258
223
|
@pytest.mark.parametrize(
|
259
|
-
"converter,
|
224
|
+
"converter, concurrent_state, expected_output_state",
|
260
225
|
[
|
261
226
|
pytest.param(
|
262
227
|
EpochValueConcurrentStreamStateConverter(),
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
228
|
+
{
|
229
|
+
"state_type": "date-range",
|
230
|
+
"slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
|
231
|
+
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
232
|
+
},
|
233
|
+
{"created": 1617030403},
|
234
|
+
id="epoch-single-slice",
|
267
235
|
),
|
268
236
|
pytest.param(
|
269
237
|
EpochValueConcurrentStreamStateConverter(),
|
270
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
271
238
|
{
|
272
239
|
"state_type": "date-range",
|
273
|
-
"slices": [{"start": datetime(
|
274
|
-
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}
|
240
|
+
"slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
241
|
+
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
|
242
|
+
{"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
243
|
+
"end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
244
|
+
},
|
245
|
+
{"created": 1648566403},
|
246
|
+
id="epoch-overlapping-slices",
|
247
|
+
),
|
248
|
+
pytest.param(
|
249
|
+
EpochValueConcurrentStreamStateConverter(),
|
250
|
+
{
|
251
|
+
"state_type": "date-range",
|
252
|
+
"slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
253
|
+
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
|
254
|
+
{"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
255
|
+
"end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
256
|
+
},
|
275
257
|
{"created": 1617030403},
|
276
|
-
id="
|
258
|
+
id="epoch-multiple-slices",
|
277
259
|
),
|
278
260
|
pytest.param(
|
279
261
|
IsoMillisConcurrentStreamStateConverter(),
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
262
|
+
{
|
263
|
+
"state_type": "date-range",
|
264
|
+
"slices": [{"start": datetime(1970, 1, 3, 0, 0, 0, tzinfo=timezone.utc),
|
265
|
+
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
266
|
+
},
|
267
|
+
{"created": "2021-03-29T15:06:43.000Z"},
|
268
|
+
id="isomillis-single-slice",
|
284
269
|
),
|
285
270
|
pytest.param(
|
286
271
|
IsoMillisConcurrentStreamStateConverter(),
|
287
|
-
AirbyteStream(name="stream1", json_schema={}, supported_sync_modes=[SyncMode.incremental]),
|
288
272
|
{
|
289
273
|
"state_type": "date-range",
|
290
|
-
"slices": [{"start": datetime(
|
291
|
-
"end": datetime(2021,
|
292
|
-
|
293
|
-
|
274
|
+
"slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
275
|
+
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
|
276
|
+
{"start": datetime(2020, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
277
|
+
"end": datetime(2022, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
278
|
+
},
|
279
|
+
{"created": "2022-03-29T15:06:43.000Z"},
|
280
|
+
id="isomillis-overlapping-slices",
|
281
|
+
),
|
282
|
+
pytest.param(
|
283
|
+
IsoMillisConcurrentStreamStateConverter(),
|
284
|
+
{
|
285
|
+
"state_type": "date-range",
|
286
|
+
"slices": [{"start": datetime(1970, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
287
|
+
"end": datetime(2021, 3, 29, 15, 6, 43, tzinfo=timezone.utc)},
|
288
|
+
{"start": datetime(2022, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
|
289
|
+
"end": datetime(2023, 3, 29, 15, 6, 43, tzinfo=timezone.utc)}],
|
290
|
+
},
|
291
|
+
{"created": "2021-03-29T15:06:43.000Z"},
|
292
|
+
id="isomillis-multiple-slices",
|
294
293
|
),
|
295
294
|
],
|
296
295
|
)
|
297
|
-
def test_convert_to_sequential_state(converter,
|
296
|
+
def test_convert_to_sequential_state(converter, concurrent_state, expected_output_state):
|
298
297
|
assert converter.convert_to_sequential_state(CursorField("created"), concurrent_state) == expected_output_state
|
299
298
|
|
300
299
|
|
301
300
|
@pytest.mark.parametrize(
|
302
|
-
"converter,
|
301
|
+
"converter, concurrent_state, expected_output_state",
|
303
302
|
[
|
304
303
|
pytest.param(
|
305
304
|
EpochValueConcurrentStreamStateConverter(),
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
[{"start": 0, "end": 1}],
|
313
|
-
[{"start": 0, "end": 1}],
|
314
|
-
id="single-interval-epoch",
|
315
|
-
),
|
316
|
-
pytest.param(
|
317
|
-
EpochValueConcurrentStreamStateConverter(),
|
318
|
-
[{"start": 0, "end": 1}, {"start": 0, "end": 1}],
|
319
|
-
[{"start": 0, "end": 1}],
|
320
|
-
id="duplicate-intervals-epoch",
|
321
|
-
),
|
322
|
-
pytest.param(
|
323
|
-
EpochValueConcurrentStreamStateConverter(),
|
324
|
-
[{"start": 0, "end": 1}, {"start": 0, "end": 2}],
|
325
|
-
[{"start": 0, "end": 2}],
|
326
|
-
id="overlapping-intervals-epoch",
|
327
|
-
),
|
328
|
-
pytest.param(
|
329
|
-
EpochValueConcurrentStreamStateConverter(),
|
330
|
-
[{"start": 0, "end": 3}, {"start": 1, "end": 2}],
|
331
|
-
[{"start": 0, "end": 3}],
|
332
|
-
id="enclosed-intervals-epoch",
|
333
|
-
),
|
334
|
-
pytest.param(
|
335
|
-
EpochValueConcurrentStreamStateConverter(),
|
336
|
-
[{"start": 1, "end": 2}, {"start": 0, "end": 1}],
|
337
|
-
[{"start": 0, "end": 2}],
|
338
|
-
id="unordered-intervals-epoch",
|
339
|
-
),
|
340
|
-
pytest.param(
|
341
|
-
EpochValueConcurrentStreamStateConverter(),
|
342
|
-
[{"start": 0, "end": 1}, {"start": 2, "end": 3}],
|
343
|
-
[{"start": 0, "end": 3}],
|
344
|
-
id="adjacent-intervals-epoch",
|
345
|
-
),
|
346
|
-
pytest.param(
|
347
|
-
EpochValueConcurrentStreamStateConverter(),
|
348
|
-
[{"start": 3, "end": 4}, {"start": 0, "end": 1}],
|
349
|
-
[{"start": 0, "end": 1}, {"start": 3, "end": 4}],
|
350
|
-
id="nonoverlapping-intervals-epoch",
|
351
|
-
),
|
352
|
-
pytest.param(
|
353
|
-
EpochValueConcurrentStreamStateConverter(),
|
354
|
-
[{"start": 0, "end": 1}, {"start": 2, "end": 3}, {"start": 10, "end": 11}, {"start": 1, "end": 4}],
|
355
|
-
[{"start": 0, "end": 4}, {"start": 10, "end": 11}],
|
356
|
-
id="overlapping-and-nonoverlapping-intervals-epoch",
|
357
|
-
),
|
358
|
-
pytest.param(
|
359
|
-
IsoMillisConcurrentStreamStateConverter(),
|
360
|
-
[],
|
361
|
-
[],
|
362
|
-
id="no-intervals-isomillis",
|
363
|
-
),
|
364
|
-
pytest.param(
|
365
|
-
IsoMillisConcurrentStreamStateConverter(),
|
366
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
367
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
368
|
-
id="single-interval-isomillis",
|
369
|
-
),
|
370
|
-
pytest.param(
|
371
|
-
IsoMillisConcurrentStreamStateConverter(),
|
372
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
|
373
|
-
{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
374
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
375
|
-
id="duplicate-intervals-isomillis",
|
376
|
-
),
|
377
|
-
pytest.param(
|
378
|
-
IsoMillisConcurrentStreamStateConverter(),
|
379
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"},
|
380
|
-
{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
381
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
|
382
|
-
id="overlapping-intervals-isomillis",
|
383
|
-
),
|
384
|
-
pytest.param(
|
385
|
-
IsoMillisConcurrentStreamStateConverter(),
|
386
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
|
387
|
-
{"start": "2022-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
|
388
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
|
389
|
-
id="enclosed-intervals-isomillis",
|
390
|
-
),
|
391
|
-
pytest.param(
|
392
|
-
IsoMillisConcurrentStreamStateConverter(),
|
393
|
-
[{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
|
394
|
-
{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
395
|
-
[{"start": 0, "end": 2}],
|
396
|
-
id="unordered-intervals-isomillis",
|
397
|
-
),
|
398
|
-
pytest.param(
|
399
|
-
IsoMillisConcurrentStreamStateConverter(),
|
400
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
|
401
|
-
{"start": "2022-08-22T05:03:27.001Z", "end": "2023-08-22T05:03:27.000Z"}],
|
402
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2023-08-22T05:03:27.000Z"}],
|
403
|
-
id="adjacent-intervals-isomillis",
|
404
|
-
),
|
405
|
-
pytest.param(
|
406
|
-
IsoMillisConcurrentStreamStateConverter(),
|
407
|
-
[{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"},
|
408
|
-
{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"}],
|
409
|
-
[{"start": "2021-08-22T05:03:27.000Z", "end": "2022-08-22T05:03:27.000Z"},
|
410
|
-
{"start": "2023-08-22T05:03:27.000Z", "end": "2024-08-22T05:03:27.000Z"}],
|
411
|
-
id="nonoverlapping-intervals-isomillis",
|
305
|
+
{
|
306
|
+
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
307
|
+
"start": EpochValueConcurrentStreamStateConverter().zero_value,
|
308
|
+
},
|
309
|
+
{"created": 0},
|
310
|
+
id="empty-slices-epoch",
|
412
311
|
),
|
413
312
|
pytest.param(
|
414
313
|
IsoMillisConcurrentStreamStateConverter(),
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
id="overlapping-and-nonoverlapping-intervals-isomillis",
|
314
|
+
{
|
315
|
+
"state_type": ConcurrencyCompatibleStateType.date_range.value,
|
316
|
+
"start": datetime(2021, 8, 22, 5, 3, 27, tzinfo=timezone.utc),
|
317
|
+
},
|
318
|
+
{"created": "2021-08-22T05:03:27.000Z"},
|
319
|
+
id="empty-slices-isomillis",
|
422
320
|
),
|
423
321
|
],
|
424
322
|
)
|
425
|
-
def
|
426
|
-
|
427
|
-
|
323
|
+
def test_convert_to_sequential_state_no_slices_returns_legacy_state(converter, concurrent_state, expected_output_state):
|
324
|
+
with pytest.raises(RuntimeError):
|
325
|
+
converter.convert_to_sequential_state(CursorField("created"), concurrent_state)
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
from concurrent.futures import Future, ThreadPoolExecutor
|
5
5
|
from unittest import TestCase
|
6
|
-
from unittest.mock import Mock
|
6
|
+
from unittest.mock import Mock
|
7
7
|
|
8
8
|
from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPoolManager
|
9
9
|
|
@@ -23,23 +23,10 @@ class ThreadPoolManagerTest(TestCase):
|
|
23
23
|
|
24
24
|
assert len(self._thread_pool_manager._futures) == 1
|
25
25
|
|
26
|
-
def test_submit_too_many_concurrent_tasks(self):
|
27
|
-
future = Mock(spec=Future)
|
28
|
-
future.exception.return_value = None
|
29
|
-
future.done.side_effect = [False, True]
|
30
|
-
|
31
|
-
with patch("time.sleep") as sleep_mock:
|
32
|
-
self._thread_pool_manager._futures = [future]
|
33
|
-
self._thread_pool_manager.submit(self._fn, self._arg)
|
34
|
-
self._threadpool.submit.assert_called_with(self._fn, self._arg)
|
35
|
-
sleep_mock.assert_called_with(_SLEEP_TIME)
|
36
|
-
|
37
|
-
assert len(self._thread_pool_manager._futures) == 1
|
38
|
-
|
39
26
|
def test_submit_task_previous_task_failed(self):
|
40
27
|
future = Mock(spec=Future)
|
41
28
|
future.exception.return_value = RuntimeError
|
42
|
-
future.done.side_effect = [
|
29
|
+
future.done.side_effect = [True, True]
|
43
30
|
|
44
31
|
self._thread_pool_manager._futures = [future]
|
45
32
|
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from queue import Queue
|
4
|
+
from unittest.mock import Mock
|
5
|
+
|
6
|
+
import pytest
|
7
|
+
from _queue import Empty
|
8
|
+
from airbyte_cdk.sources.concurrent_source.throttler import Throttler
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.throttled_queue import ThrottledQueue
|
10
|
+
|
11
|
+
_AN_ITEM = Mock()
|
12
|
+
|
13
|
+
|
14
|
+
def test_new_throttled_queue_is_empty():
|
15
|
+
queue = Queue()
|
16
|
+
throttler = Mock(spec=Throttler)
|
17
|
+
timeout = 100
|
18
|
+
throttled_queue = ThrottledQueue(queue, throttler, timeout)
|
19
|
+
|
20
|
+
assert throttled_queue.empty()
|
21
|
+
|
22
|
+
|
23
|
+
def test_throttled_queue_is_not_empty_after_putting_an_item():
|
24
|
+
queue = Queue()
|
25
|
+
throttler = Mock(spec=Throttler)
|
26
|
+
timeout = 100
|
27
|
+
throttled_queue = ThrottledQueue(queue, throttler, timeout)
|
28
|
+
|
29
|
+
throttled_queue.put(_AN_ITEM)
|
30
|
+
|
31
|
+
assert not throttled_queue.empty()
|
32
|
+
|
33
|
+
|
34
|
+
def test_throttled_queue_get_returns_item_if_any():
|
35
|
+
queue = Queue()
|
36
|
+
throttler = Mock(spec=Throttler)
|
37
|
+
timeout = 100
|
38
|
+
throttled_queue = ThrottledQueue(queue, throttler, timeout)
|
39
|
+
|
40
|
+
throttled_queue.put(_AN_ITEM)
|
41
|
+
item = throttled_queue.get()
|
42
|
+
|
43
|
+
assert item == _AN_ITEM
|
44
|
+
assert throttled_queue.empty()
|
45
|
+
|
46
|
+
|
47
|
+
def test_throttled_queue_blocks_for_timeout_seconds_if_no_items():
|
48
|
+
queue = Mock(spec=Queue)
|
49
|
+
throttler = Mock(spec=Throttler)
|
50
|
+
timeout = 100
|
51
|
+
throttled_queue = ThrottledQueue(queue, throttler, timeout)
|
52
|
+
|
53
|
+
throttled_queue.get()
|
54
|
+
|
55
|
+
assert queue.get.is_called_once_with(block=True, timeout=timeout)
|
56
|
+
|
57
|
+
|
58
|
+
def test_throttled_queue_raises_an_error_if_no_items_after_timeout():
|
59
|
+
queue = Queue()
|
60
|
+
throttler = Mock(spec=Throttler)
|
61
|
+
timeout = 0.001
|
62
|
+
throttled_queue = ThrottledQueue(queue, throttler, timeout)
|
63
|
+
|
64
|
+
with pytest.raises(Empty):
|
65
|
+
throttled_queue.get()
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from unittest.mock import patch
|
4
|
+
|
5
|
+
from airbyte_cdk.sources.concurrent_source.throttler import Throttler
|
6
|
+
|
7
|
+
|
8
|
+
@patch('time.sleep', side_effect=lambda _: None)
|
9
|
+
@patch('airbyte_cdk.sources.concurrent_source.throttler.len', side_effect=[1, 1, 0])
|
10
|
+
def test_throttler(sleep_mock, len_mock):
|
11
|
+
throttler = Throttler([], 0.1, 1)
|
12
|
+
throttler.wait_and_acquire()
|
13
|
+
assert sleep_mock.call_count == 3
|
File without changes
|
File without changes
|
File without changes
|