airbyte-cdk 6.33.7__py3-none-any.whl → 6.34.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/test/mock_http/mocker.py +1 -9
- airbyte_cdk/test/mock_http/response.py +3 -6
- {airbyte_cdk-6.33.7.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.33.7.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/RECORD +9 -9
- {airbyte_cdk-6.33.7.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.33.7.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.33.7.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.33.7.dist-info → airbyte_cdk-6.34.0.dev1.dist-info}/entry_points.txt +0 -0
@@ -5,6 +5,7 @@
|
|
5
5
|
import copy
|
6
6
|
import logging
|
7
7
|
import threading
|
8
|
+
import time
|
8
9
|
from collections import OrderedDict
|
9
10
|
from copy import deepcopy
|
10
11
|
from datetime import timedelta
|
@@ -58,7 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
58
59
|
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
|
59
60
|
"""
|
60
61
|
|
61
|
-
DEFAULT_MAX_PARTITIONS_NUMBER =
|
62
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
|
63
|
+
SWITCH_TO_GLOBAL_LIMIT = 10_000
|
62
64
|
_NO_STATE: Mapping[str, Any] = {}
|
63
65
|
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
64
66
|
_GLOBAL_STATE_KEY = "state"
|
@@ -99,9 +101,11 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
99
101
|
self._new_global_cursor: Optional[StreamState] = None
|
100
102
|
self._lookback_window: int = 0
|
101
103
|
self._parent_state: Optional[StreamState] = None
|
102
|
-
self.
|
104
|
+
self._number_of_partitions: int = 0
|
103
105
|
self._use_global_cursor: bool = False
|
104
106
|
self._partition_serializer = PerPartitionKeySerializer()
|
107
|
+
# Track the last time a state message was emitted
|
108
|
+
self._last_emission_time: float = 0.0
|
105
109
|
|
106
110
|
self._set_initial_state(stream_state)
|
107
111
|
|
@@ -141,21 +145,16 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
141
145
|
raise ValueError("stream_slice cannot be None")
|
142
146
|
|
143
147
|
partition_key = self._to_partition_key(stream_slice.partition)
|
144
|
-
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
145
148
|
with self._lock:
|
146
149
|
self._semaphore_per_partition[partition_key].acquire()
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
and self._semaphore_per_partition[partition_key]._value == 0
|
151
|
-
):
|
150
|
+
if not self._use_global_cursor:
|
151
|
+
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
152
|
+
cursor = self._cursor_per_partition[partition_key]
|
152
153
|
if (
|
153
|
-
self.
|
154
|
-
|
155
|
-
< cursor.state[self.cursor_field.cursor_field_key]
|
154
|
+
partition_key in self._finished_partitions
|
155
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
156
156
|
):
|
157
|
-
self.
|
158
|
-
if not self._use_global_cursor:
|
157
|
+
self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
|
159
158
|
self._emit_state_message()
|
160
159
|
|
161
160
|
def ensure_at_least_one_state_emitted(self) -> None:
|
@@ -169,9 +168,23 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
169
168
|
self._global_cursor = self._new_global_cursor
|
170
169
|
self._lookback_window = self._timer.finish()
|
171
170
|
self._parent_state = self._partition_router.get_stream_state()
|
172
|
-
self._emit_state_message()
|
171
|
+
self._emit_state_message(throttle=False)
|
173
172
|
|
174
|
-
def
|
173
|
+
def _throttle_state_message(self) -> Optional[float]:
|
174
|
+
"""
|
175
|
+
Throttles the state message emission to once every 60 seconds.
|
176
|
+
"""
|
177
|
+
current_time = time.time()
|
178
|
+
if current_time - self._last_emission_time <= 60:
|
179
|
+
return None
|
180
|
+
return current_time
|
181
|
+
|
182
|
+
def _emit_state_message(self, throttle: bool = True) -> None:
|
183
|
+
if throttle:
|
184
|
+
current_time = self._throttle_state_message()
|
185
|
+
if current_time is None:
|
186
|
+
return
|
187
|
+
self._last_emission_time = current_time
|
175
188
|
self._connector_state_manager.update_state_for_stream(
|
176
189
|
self._stream_name,
|
177
190
|
self._stream_namespace,
|
@@ -202,6 +215,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
202
215
|
self._lookback_window if self._global_cursor else 0,
|
203
216
|
)
|
204
217
|
with self._lock:
|
218
|
+
self._number_of_partitions += 1
|
205
219
|
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
206
220
|
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
207
221
|
threading.Semaphore(0)
|
@@ -232,9 +246,15 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
232
246
|
- Logs a warning each time a partition is removed, indicating whether it was finished
|
233
247
|
or removed due to being the oldest.
|
234
248
|
"""
|
249
|
+
if not self._use_global_cursor and self.limit_reached():
|
250
|
+
logger.info(
|
251
|
+
f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
|
252
|
+
f"Switching to global cursor for {self._stream_name}."
|
253
|
+
)
|
254
|
+
self._use_global_cursor = True
|
255
|
+
|
235
256
|
with self._lock:
|
236
257
|
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
237
|
-
self._over_limit += 1
|
238
258
|
# Try removing finished partitions first
|
239
259
|
for partition_key in list(self._cursor_per_partition.keys()):
|
240
260
|
if (
|
@@ -245,7 +265,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
245
265
|
partition_key
|
246
266
|
) # Remove the oldest partition
|
247
267
|
logger.warning(
|
248
|
-
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self.
|
268
|
+
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
|
249
269
|
)
|
250
270
|
break
|
251
271
|
else:
|
@@ -254,7 +274,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
254
274
|
1
|
255
275
|
] # Remove the oldest partition
|
256
276
|
logger.warning(
|
257
|
-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self.
|
277
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
|
258
278
|
)
|
259
279
|
|
260
280
|
def _set_initial_state(self, stream_state: StreamState) -> None:
|
@@ -314,6 +334,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
314
334
|
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
315
335
|
|
316
336
|
for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
|
337
|
+
self._number_of_partitions += 1
|
317
338
|
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
318
339
|
self._create_cursor(state["cursor"])
|
319
340
|
)
|
@@ -354,16 +375,26 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
354
375
|
self._new_global_cursor = deepcopy(fixed_global_state)
|
355
376
|
|
356
377
|
def observe(self, record: Record) -> None:
|
357
|
-
if not self._use_global_cursor and self.limit_reached():
|
358
|
-
self._use_global_cursor = True
|
359
|
-
|
360
378
|
if not record.associated_slice:
|
361
379
|
raise ValueError(
|
362
380
|
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
363
381
|
)
|
364
|
-
|
365
|
-
|
366
|
-
|
382
|
+
|
383
|
+
record_cursor = self._connector_state_converter.output_format(
|
384
|
+
self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
385
|
+
)
|
386
|
+
self._update_global_cursor(record_cursor)
|
387
|
+
if not self._use_global_cursor:
|
388
|
+
self._cursor_per_partition[
|
389
|
+
self._to_partition_key(record.associated_slice.partition)
|
390
|
+
].observe(record)
|
391
|
+
|
392
|
+
def _update_global_cursor(self, value: Any) -> None:
|
393
|
+
if (
|
394
|
+
self._new_global_cursor is None
|
395
|
+
or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
|
396
|
+
):
|
397
|
+
self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
|
367
398
|
|
368
399
|
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
369
400
|
return self._partition_serializer.to_partition_key(partition)
|
@@ -397,4 +428,4 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
397
428
|
return cursor
|
398
429
|
|
399
430
|
def limit_reached(self) -> bool:
|
400
|
-
return self.
|
431
|
+
return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
|
@@ -17,7 +17,6 @@ class SupportedHttpMethods(str, Enum):
|
|
17
17
|
GET = "get"
|
18
18
|
PATCH = "patch"
|
19
19
|
POST = "post"
|
20
|
-
PUT = "put"
|
21
20
|
DELETE = "delete"
|
22
21
|
|
23
22
|
|
@@ -78,7 +77,7 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
78
77
|
additional_matcher=self._matches_wrapper(matcher),
|
79
78
|
response_list=[
|
80
79
|
{
|
81
|
-
|
80
|
+
"text": response.body,
|
82
81
|
"status_code": response.status_code,
|
83
82
|
"headers": response.headers,
|
84
83
|
}
|
@@ -86,10 +85,6 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
86
85
|
],
|
87
86
|
)
|
88
87
|
|
89
|
-
@staticmethod
|
90
|
-
def _get_body_field(response: HttpResponse) -> str:
|
91
|
-
return "text" if isinstance(response.body, str) else "content"
|
92
|
-
|
93
88
|
def get(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None:
|
94
89
|
self._mock_request_method(SupportedHttpMethods.GET, request, responses)
|
95
90
|
|
@@ -103,9 +98,6 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
103
98
|
) -> None:
|
104
99
|
self._mock_request_method(SupportedHttpMethods.POST, request, responses)
|
105
100
|
|
106
|
-
def put(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None:
|
107
|
-
self._mock_request_method(SupportedHttpMethods.PUT, request, responses)
|
108
|
-
|
109
101
|
def delete(
|
110
102
|
self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]
|
111
103
|
) -> None:
|
@@ -1,22 +1,19 @@
|
|
1
1
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
3
|
from types import MappingProxyType
|
4
|
-
from typing import Mapping
|
4
|
+
from typing import Mapping
|
5
5
|
|
6
6
|
|
7
7
|
class HttpResponse:
|
8
8
|
def __init__(
|
9
|
-
self,
|
10
|
-
body: Union[str, bytes],
|
11
|
-
status_code: int = 200,
|
12
|
-
headers: Mapping[str, str] = MappingProxyType({}),
|
9
|
+
self, body: str, status_code: int = 200, headers: Mapping[str, str] = MappingProxyType({})
|
13
10
|
):
|
14
11
|
self._body = body
|
15
12
|
self._status_code = status_code
|
16
13
|
self._headers = headers
|
17
14
|
|
18
15
|
@property
|
19
|
-
def body(self) ->
|
16
|
+
def body(self) -> str:
|
20
17
|
return self._body
|
21
18
|
|
22
19
|
@property
|
@@ -92,7 +92,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_
|
|
92
92
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
93
93
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
94
94
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
95
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=
|
95
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=Pg2phEFT9T8AzUjK6hVhn0rgR3yY6JPF-Dfv0g1m5dQ,19191
|
96
96
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=Rbe6lJLTtZ5en33MwZiB9-H9-AwDMNHgwBZs8EqhYqk,22172
|
97
97
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
98
98
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
|
@@ -333,9 +333,9 @@ airbyte_cdk/test/catalog_builder.py,sha256=-y05Cz1x0Dlk6oE9LSKhCozssV2gYBNtMdV5Y
|
|
333
333
|
airbyte_cdk/test/entrypoint_wrapper.py,sha256=9XBii_YguQp0d8cykn3hy102FsJcwIBQzSB7co5ho0s,9802
|
334
334
|
airbyte_cdk/test/mock_http/__init__.py,sha256=jE5kC6CQ0OXkTqKhciDnNVZHesBFVIA2YvkdFGwva7k,322
|
335
335
|
airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBETmP6hTEyc,1446
|
336
|
-
airbyte_cdk/test/mock_http/mocker.py,sha256=
|
336
|
+
airbyte_cdk/test/mock_http/mocker.py,sha256=HJjgFdapr7OALj0sfk-LVXYBiymbUDieaGa8U1_q730,7358
|
337
337
|
airbyte_cdk/test/mock_http/request.py,sha256=tdB8cqk2vLgCDTOKffBKsM06llYs4ZecgtH6DKyx6yY,4112
|
338
|
-
airbyte_cdk/test/mock_http/response.py,sha256=
|
338
|
+
airbyte_cdk/test/mock_http/response.py,sha256=U9KEsUkK2dPXYwnfwrwp6CcYSSpMYKLjfTrPFKSMCaM,602
|
339
339
|
airbyte_cdk/test/mock_http/response_builder.py,sha256=debPx_lRYBaQVSwCoKLa0F8KFk3h0qG7bWxFBATa0cc,7958
|
340
340
|
airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMoYM44,946
|
341
341
|
airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
|
@@ -360,9 +360,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
360
360
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
361
361
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
362
362
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
363
|
-
airbyte_cdk-6.
|
364
|
-
airbyte_cdk-6.
|
365
|
-
airbyte_cdk-6.
|
366
|
-
airbyte_cdk-6.
|
367
|
-
airbyte_cdk-6.
|
368
|
-
airbyte_cdk-6.
|
363
|
+
airbyte_cdk-6.34.0.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
364
|
+
airbyte_cdk-6.34.0.dev1.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
365
|
+
airbyte_cdk-6.34.0.dev1.dist-info/METADATA,sha256=zRWv4t7GvXHf9bPXmsf8vFuPd63eiYFXXGeMkUchcDw,6015
|
366
|
+
airbyte_cdk-6.34.0.dev1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
367
|
+
airbyte_cdk-6.34.0.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
368
|
+
airbyte_cdk-6.34.0.dev1.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|