airbyte-cdk 6.33.7__py3-none-any.whl → 6.34.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@
5
5
  import copy
6
6
  import logging
7
7
  import threading
8
+ import time
8
9
  from collections import OrderedDict
9
10
  from copy import deepcopy
10
11
  from datetime import timedelta
@@ -58,7 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
58
59
  CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
59
60
  """
60
61
 
61
- DEFAULT_MAX_PARTITIONS_NUMBER = 10000
62
+ DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
63
+ SWITCH_TO_GLOBAL_LIMIT = 10_000
62
64
  _NO_STATE: Mapping[str, Any] = {}
63
65
  _NO_CURSOR_STATE: Mapping[str, Any] = {}
64
66
  _GLOBAL_STATE_KEY = "state"
@@ -99,9 +101,11 @@ class ConcurrentPerPartitionCursor(Cursor):
99
101
  self._new_global_cursor: Optional[StreamState] = None
100
102
  self._lookback_window: int = 0
101
103
  self._parent_state: Optional[StreamState] = None
102
- self._over_limit: int = 0
104
+ self._number_of_partitions: int = 0
103
105
  self._use_global_cursor: bool = False
104
106
  self._partition_serializer = PerPartitionKeySerializer()
107
+ # Track the last time a state message was emitted
108
+ self._last_emission_time: float = 0.0
105
109
 
106
110
  self._set_initial_state(stream_state)
107
111
 
@@ -141,21 +145,16 @@ class ConcurrentPerPartitionCursor(Cursor):
141
145
  raise ValueError("stream_slice cannot be None")
142
146
 
143
147
  partition_key = self._to_partition_key(stream_slice.partition)
144
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
145
148
  with self._lock:
146
149
  self._semaphore_per_partition[partition_key].acquire()
147
- cursor = self._cursor_per_partition[partition_key]
148
- if (
149
- partition_key in self._finished_partitions
150
- and self._semaphore_per_partition[partition_key]._value == 0
151
- ):
150
+ if not self._use_global_cursor:
151
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
152
+ cursor = self._cursor_per_partition[partition_key]
152
153
  if (
153
- self._new_global_cursor is None
154
- or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
- < cursor.state[self.cursor_field.cursor_field_key]
154
+ partition_key in self._finished_partitions
155
+ and self._semaphore_per_partition[partition_key]._value == 0
156
156
  ):
157
- self._new_global_cursor = copy.deepcopy(cursor.state)
158
- if not self._use_global_cursor:
157
+ self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
159
158
  self._emit_state_message()
160
159
 
161
160
  def ensure_at_least_one_state_emitted(self) -> None:
@@ -169,9 +168,23 @@ class ConcurrentPerPartitionCursor(Cursor):
169
168
  self._global_cursor = self._new_global_cursor
170
169
  self._lookback_window = self._timer.finish()
171
170
  self._parent_state = self._partition_router.get_stream_state()
172
- self._emit_state_message()
171
+ self._emit_state_message(throttle=False)
173
172
 
174
- def _emit_state_message(self) -> None:
173
+ def _throttle_state_message(self) -> Optional[float]:
174
+ """
175
+ Throttles the state message emission to once every 60 seconds.
176
+ """
177
+ current_time = time.time()
178
+ if current_time - self._last_emission_time <= 60:
179
+ return None
180
+ return current_time
181
+
182
+ def _emit_state_message(self, throttle: bool = True) -> None:
183
+ if throttle:
184
+ current_time = self._throttle_state_message()
185
+ if current_time is None:
186
+ return
187
+ self._last_emission_time = current_time
175
188
  self._connector_state_manager.update_state_for_stream(
176
189
  self._stream_name,
177
190
  self._stream_namespace,
@@ -202,6 +215,7 @@ class ConcurrentPerPartitionCursor(Cursor):
202
215
  self._lookback_window if self._global_cursor else 0,
203
216
  )
204
217
  with self._lock:
218
+ self._number_of_partitions += 1
205
219
  self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
206
220
  self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
207
221
  threading.Semaphore(0)
@@ -232,9 +246,15 @@ class ConcurrentPerPartitionCursor(Cursor):
232
246
  - Logs a warning each time a partition is removed, indicating whether it was finished
233
247
  or removed due to being the oldest.
234
248
  """
249
+ if not self._use_global_cursor and self.limit_reached():
250
+ logger.info(
251
+ f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
252
+ f"Switching to global cursor for {self._stream_name}."
253
+ )
254
+ self._use_global_cursor = True
255
+
235
256
  with self._lock:
236
257
  while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
- self._over_limit += 1
238
258
  # Try removing finished partitions first
239
259
  for partition_key in list(self._cursor_per_partition.keys()):
240
260
  if (
@@ -245,7 +265,7 @@ class ConcurrentPerPartitionCursor(Cursor):
245
265
  partition_key
246
266
  ) # Remove the oldest partition
247
267
  logger.warning(
248
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
268
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
249
269
  )
250
270
  break
251
271
  else:
@@ -254,7 +274,7 @@ class ConcurrentPerPartitionCursor(Cursor):
254
274
  1
255
275
  ] # Remove the oldest partition
256
276
  logger.warning(
257
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
277
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
258
278
  )
259
279
 
260
280
  def _set_initial_state(self, stream_state: StreamState) -> None:
@@ -314,6 +334,7 @@ class ConcurrentPerPartitionCursor(Cursor):
314
334
  self._lookback_window = int(stream_state.get("lookback_window", 0))
315
335
 
316
336
  for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
337
+ self._number_of_partitions += 1
317
338
  self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
318
339
  self._create_cursor(state["cursor"])
319
340
  )
@@ -354,16 +375,26 @@ class ConcurrentPerPartitionCursor(Cursor):
354
375
  self._new_global_cursor = deepcopy(fixed_global_state)
355
376
 
356
377
  def observe(self, record: Record) -> None:
357
- if not self._use_global_cursor and self.limit_reached():
358
- self._use_global_cursor = True
359
-
360
378
  if not record.associated_slice:
361
379
  raise ValueError(
362
380
  "Invalid state as stream slices that are emitted should refer to an existing cursor"
363
381
  )
364
- self._cursor_per_partition[
365
- self._to_partition_key(record.associated_slice.partition)
366
- ].observe(record)
382
+
383
+ record_cursor = self._connector_state_converter.output_format(
384
+ self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
385
+ )
386
+ self._update_global_cursor(record_cursor)
387
+ if not self._use_global_cursor:
388
+ self._cursor_per_partition[
389
+ self._to_partition_key(record.associated_slice.partition)
390
+ ].observe(record)
391
+
392
+ def _update_global_cursor(self, value: Any) -> None:
393
+ if (
394
+ self._new_global_cursor is None
395
+ or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
396
+ ):
397
+ self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
367
398
 
368
399
  def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
369
400
  return self._partition_serializer.to_partition_key(partition)
@@ -397,4 +428,4 @@ class ConcurrentPerPartitionCursor(Cursor):
397
428
  return cursor
398
429
 
399
430
  def limit_reached(self) -> bool:
400
- return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
431
+ return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
@@ -17,7 +17,6 @@ class SupportedHttpMethods(str, Enum):
17
17
  GET = "get"
18
18
  PATCH = "patch"
19
19
  POST = "post"
20
- PUT = "put"
21
20
  DELETE = "delete"
22
21
 
23
22
 
@@ -78,7 +77,7 @@ class HttpMocker(contextlib.ContextDecorator):
78
77
  additional_matcher=self._matches_wrapper(matcher),
79
78
  response_list=[
80
79
  {
81
- self._get_body_field(response): response.body,
80
+ "text": response.body,
82
81
  "status_code": response.status_code,
83
82
  "headers": response.headers,
84
83
  }
@@ -86,10 +85,6 @@ class HttpMocker(contextlib.ContextDecorator):
86
85
  ],
87
86
  )
88
87
 
89
- @staticmethod
90
- def _get_body_field(response: HttpResponse) -> str:
91
- return "text" if isinstance(response.body, str) else "content"
92
-
93
88
  def get(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None:
94
89
  self._mock_request_method(SupportedHttpMethods.GET, request, responses)
95
90
 
@@ -103,9 +98,6 @@ class HttpMocker(contextlib.ContextDecorator):
103
98
  ) -> None:
104
99
  self._mock_request_method(SupportedHttpMethods.POST, request, responses)
105
100
 
106
- def put(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None:
107
- self._mock_request_method(SupportedHttpMethods.PUT, request, responses)
108
-
109
101
  def delete(
110
102
  self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]
111
103
  ) -> None:
@@ -1,22 +1,19 @@
1
1
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
2
 
3
3
  from types import MappingProxyType
4
- from typing import Mapping, Union
4
+ from typing import Mapping
5
5
 
6
6
 
7
7
  class HttpResponse:
8
8
  def __init__(
9
- self,
10
- body: Union[str, bytes],
11
- status_code: int = 200,
12
- headers: Mapping[str, str] = MappingProxyType({}),
9
+ self, body: str, status_code: int = 200, headers: Mapping[str, str] = MappingProxyType({})
13
10
  ):
14
11
  self._body = body
15
12
  self._status_code = status_code
16
13
  self._headers = headers
17
14
 
18
15
  @property
19
- def body(self) -> Union[str, bytes]:
16
+ def body(self) -> str:
20
17
  return self._body
21
18
 
22
19
  @property
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.33.7
3
+ Version: 6.34.0.dev1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -92,7 +92,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_
92
92
  airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
93
93
  airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
94
94
  airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
95
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=5dbO47TFmC5Oz8TZ8DKXwXeZElz70xy2v2HJlZr5qVs,17751
95
+ airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=Pg2phEFT9T8AzUjK6hVhn0rgR3yY6JPF-Dfv0g1m5dQ,19191
96
96
  airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=Rbe6lJLTtZ5en33MwZiB9-H9-AwDMNHgwBZs8EqhYqk,22172
97
97
  airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
98
98
  airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
@@ -333,9 +333,9 @@ airbyte_cdk/test/catalog_builder.py,sha256=-y05Cz1x0Dlk6oE9LSKhCozssV2gYBNtMdV5Y
333
333
  airbyte_cdk/test/entrypoint_wrapper.py,sha256=9XBii_YguQp0d8cykn3hy102FsJcwIBQzSB7co5ho0s,9802
334
334
  airbyte_cdk/test/mock_http/__init__.py,sha256=jE5kC6CQ0OXkTqKhciDnNVZHesBFVIA2YvkdFGwva7k,322
335
335
  airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBETmP6hTEyc,1446
336
- airbyte_cdk/test/mock_http/mocker.py,sha256=ghX44cLwhs7lqz1gYMizGX8zfPnDvt3YNI2w5jLpzIs,7726
336
+ airbyte_cdk/test/mock_http/mocker.py,sha256=HJjgFdapr7OALj0sfk-LVXYBiymbUDieaGa8U1_q730,7358
337
337
  airbyte_cdk/test/mock_http/request.py,sha256=tdB8cqk2vLgCDTOKffBKsM06llYs4ZecgtH6DKyx6yY,4112
338
- airbyte_cdk/test/mock_http/response.py,sha256=s4-cQQqTtmeej0pQDWqmG0vUWpHS-93lIWMpW3zSVyU,662
338
+ airbyte_cdk/test/mock_http/response.py,sha256=U9KEsUkK2dPXYwnfwrwp6CcYSSpMYKLjfTrPFKSMCaM,602
339
339
  airbyte_cdk/test/mock_http/response_builder.py,sha256=debPx_lRYBaQVSwCoKLa0F8KFk3h0qG7bWxFBATa0cc,7958
340
340
  airbyte_cdk/test/state_builder.py,sha256=kLPql9lNzUJaBg5YYRLJlY_Hy5JLHJDVyKPMZMoYM44,946
341
341
  airbyte_cdk/test/utils/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
@@ -360,9 +360,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
360
360
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
361
361
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
362
362
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
363
- airbyte_cdk-6.33.7.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
364
- airbyte_cdk-6.33.7.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
365
- airbyte_cdk-6.33.7.dist-info/METADATA,sha256=wx51UyfmmCxI6vcmkCr28bbvwR6P5gcokPbuCEwS83Q,6010
366
- airbyte_cdk-6.33.7.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
367
- airbyte_cdk-6.33.7.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
368
- airbyte_cdk-6.33.7.dist-info/RECORD,,
363
+ airbyte_cdk-6.34.0.dev1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
364
+ airbyte_cdk-6.34.0.dev1.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
365
+ airbyte_cdk-6.34.0.dev1.dist-info/METADATA,sha256=zRWv4t7GvXHf9bPXmsf8vFuPd63eiYFXXGeMkUchcDw,6015
366
+ airbyte_cdk-6.34.0.dev1.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
367
+ airbyte_cdk-6.34.0.dev1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
368
+ airbyte_cdk-6.34.0.dev1.dist-info/RECORD,,