airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
  11. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  14. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  15. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  16. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  22. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
  24. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  25. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  26. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  27. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  28. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  29. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  30. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
  31. airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
  32. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
  33. airbyte_cdk/sources/http_logger.py +1 -1
  34. airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
  38. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
  39. airbyte_cdk/sources/types.py +2 -4
  40. airbyte_cdk/sources/utils/transform.py +2 -23
  41. airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
  42. airbyte_cdk/utils/mapping_helpers.py +86 -27
  43. airbyte_cdk/utils/slice_hasher.py +1 -8
  44. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
  45. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
  46. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
  47. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
  48. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
  49. airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
  50. airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
  51. airbyte_cdk/utils/datetime_helpers.py +0 -499
  52. airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
  53. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
  54. {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -13,6 +13,7 @@ from typing import (
13
13
  Mapping,
14
14
  MutableMapping,
15
15
  Optional,
16
+ Protocol,
16
17
  Tuple,
17
18
  Union,
18
19
  )
@@ -20,8 +21,6 @@ from typing import (
20
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
21
22
  from airbyte_cdk.sources.message import MessageRepository
22
23
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
23
- from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
24
- from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
25
24
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
26
25
  from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
27
26
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
@@ -36,6 +35,36 @@ def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
36
35
  return functools.reduce(lambda a, b: a[b], path, mapping)
37
36
 
38
37
 
38
+ class GapType(Protocol):
39
+ """
40
+ This is the representation of gaps between two cursor values. Examples:
41
+ * if cursor values are datetimes, GapType is timedelta
42
+ * if cursor values are integer, GapType will also be integer
43
+ """
44
+
45
+ pass
46
+
47
+
48
+ class CursorValueType(Protocol):
49
+ """Protocol for annotating comparable types."""
50
+
51
+ @abstractmethod
52
+ def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
53
+ pass
54
+
55
+ @abstractmethod
56
+ def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
57
+ pass
58
+
59
+ @abstractmethod
60
+ def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
61
+ pass
62
+
63
+ @abstractmethod
64
+ def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
65
+ pass
66
+
67
+
39
68
  class CursorField:
40
69
  def __init__(self, cursor_field_key: str) -> None:
41
70
  self.cursor_field_key = cursor_field_key
@@ -143,7 +172,6 @@ class ConcurrentCursor(Cursor):
143
172
  lookback_window: Optional[GapType] = None,
144
173
  slice_range: Optional[GapType] = None,
145
174
  cursor_granularity: Optional[GapType] = None,
146
- clamping_strategy: ClampingStrategy = NoClamping(),
147
175
  ) -> None:
148
176
  self._stream_name = stream_name
149
177
  self._stream_namespace = stream_namespace
@@ -165,13 +193,10 @@ class ConcurrentCursor(Cursor):
165
193
  self._cursor_granularity = cursor_granularity
166
194
  # Flag to track if the logger has been triggered (per stream)
167
195
  self._should_be_synced_logger_triggered = False
168
- self._clamping_strategy = clamping_strategy
169
196
 
170
197
  @property
171
198
  def state(self) -> MutableMapping[str, Any]:
172
- return self._connector_state_converter.convert_to_state_message(
173
- self.cursor_field, self._concurrent_state
174
- )
199
+ return self._concurrent_state
175
200
 
176
201
  @property
177
202
  def cursor_field(self) -> CursorField:
@@ -216,10 +241,10 @@ class ConcurrentCursor(Cursor):
216
241
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
217
242
 
218
243
  def close_partition(self, partition: Partition) -> None:
219
- slice_count_before = len(self._concurrent_state.get("slices", []))
244
+ slice_count_before = len(self.state.get("slices", []))
220
245
  self._add_slice_to_state(partition)
221
246
  if slice_count_before < len(
222
- self._concurrent_state["slices"]
247
+ self.state["slices"]
223
248
  ): # only emit if at least one slice has been processed
224
249
  self._merge_partitions()
225
250
  self._emit_state_message()
@@ -231,11 +256,11 @@ class ConcurrentCursor(Cursor):
231
256
  )
232
257
 
233
258
  if self._slice_boundary_fields:
234
- if "slices" not in self._concurrent_state:
259
+ if "slices" not in self.state:
235
260
  raise RuntimeError(
236
261
  f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
237
262
  )
238
- self._concurrent_state["slices"].append(
263
+ self.state["slices"].append(
239
264
  {
240
265
  self._connector_state_converter.START_KEY: self._extract_from_slice(
241
266
  partition, self._slice_boundary_fields[self._START_BOUNDARY]
@@ -263,7 +288,7 @@ class ConcurrentCursor(Cursor):
263
288
  "expected. Please contact the Airbyte team."
264
289
  )
265
290
 
266
- self._concurrent_state["slices"].append(
291
+ self.state["slices"].append(
267
292
  {
268
293
  self._connector_state_converter.START_KEY: self.start,
269
294
  self._connector_state_converter.END_KEY: most_recent_cursor_value,
@@ -275,7 +300,9 @@ class ConcurrentCursor(Cursor):
275
300
  self._connector_state_manager.update_state_for_stream(
276
301
  self._stream_name,
277
302
  self._stream_namespace,
278
- self.state,
303
+ self._connector_state_converter.convert_to_state_message(
304
+ self._cursor_field, self.state
305
+ ),
279
306
  )
280
307
  state_message = self._connector_state_manager.create_state_message(
281
308
  self._stream_name, self._stream_namespace
@@ -283,9 +310,7 @@ class ConcurrentCursor(Cursor):
283
310
  self._message_repository.emit_message(state_message)
284
311
 
285
312
  def _merge_partitions(self) -> None:
286
- self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
287
- self._concurrent_state["slices"]
288
- )
313
+ self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
289
314
 
290
315
  def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
291
316
  try:
@@ -322,42 +347,36 @@ class ConcurrentCursor(Cursor):
322
347
  if self._start is not None and self._is_start_before_first_slice():
323
348
  yield from self._split_per_slice_range(
324
349
  self._start,
325
- self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
350
+ self.state["slices"][0][self._connector_state_converter.START_KEY],
326
351
  False,
327
352
  )
328
353
 
329
- if len(self._concurrent_state["slices"]) == 1:
354
+ if len(self.state["slices"]) == 1:
330
355
  yield from self._split_per_slice_range(
331
356
  self._calculate_lower_boundary_of_last_slice(
332
- self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
357
+ self.state["slices"][0][self._connector_state_converter.END_KEY]
333
358
  ),
334
359
  self._end_provider(),
335
360
  True,
336
361
  )
337
- elif len(self._concurrent_state["slices"]) > 1:
338
- for i in range(len(self._concurrent_state["slices"]) - 1):
362
+ elif len(self.state["slices"]) > 1:
363
+ for i in range(len(self.state["slices"]) - 1):
339
364
  if self._cursor_granularity:
340
365
  yield from self._split_per_slice_range(
341
- self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
366
+ self.state["slices"][i][self._connector_state_converter.END_KEY]
342
367
  + self._cursor_granularity,
343
- self._concurrent_state["slices"][i + 1][
344
- self._connector_state_converter.START_KEY
345
- ],
368
+ self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
346
369
  False,
347
370
  )
348
371
  else:
349
372
  yield from self._split_per_slice_range(
350
- self._concurrent_state["slices"][i][
351
- self._connector_state_converter.END_KEY
352
- ],
353
- self._concurrent_state["slices"][i + 1][
354
- self._connector_state_converter.START_KEY
355
- ],
373
+ self.state["slices"][i][self._connector_state_converter.END_KEY],
374
+ self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
356
375
  False,
357
376
  )
358
377
  yield from self._split_per_slice_range(
359
378
  self._calculate_lower_boundary_of_last_slice(
360
- self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
379
+ self.state["slices"][-1][self._connector_state_converter.END_KEY]
361
380
  ),
362
381
  self._end_provider(),
363
382
  True,
@@ -368,8 +387,7 @@ class ConcurrentCursor(Cursor):
368
387
  def _is_start_before_first_slice(self) -> bool:
369
388
  return (
370
389
  self._start is not None
371
- and self._start
372
- < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
390
+ and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
373
391
  )
374
392
 
375
393
  def _calculate_lower_boundary_of_last_slice(
@@ -390,12 +408,10 @@ class ConcurrentCursor(Cursor):
390
408
 
391
409
  lower = max(lower, self._start) if self._start else lower
392
410
  if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
393
- clamped_lower = self._clamping_strategy.clamp(lower)
394
- clamped_upper = self._clamping_strategy.clamp(upper)
395
411
  start_value, end_value = (
396
- (clamped_lower, clamped_upper - self._cursor_granularity)
412
+ (lower, upper - self._cursor_granularity)
397
413
  if self._cursor_granularity and not upper_is_end
398
- else (clamped_lower, clamped_upper)
414
+ else (lower, upper)
399
415
  )
400
416
  yield StreamSlice(
401
417
  partition={},
@@ -417,21 +433,11 @@ class ConcurrentCursor(Cursor):
417
433
  )
418
434
  has_reached_upper_boundary = current_upper_boundary >= upper
419
435
 
420
- clamped_upper = (
421
- self._clamping_strategy.clamp(current_upper_boundary)
422
- if current_upper_boundary != upper
423
- else current_upper_boundary
424
- )
425
- clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
426
- if clamped_lower >= clamped_upper:
427
- # clamping collapsed both values which means that it is time to stop processing
428
- # FIXME should this be replace by proper end_provider
429
- break
430
436
  start_value, end_value = (
431
- (clamped_lower, clamped_upper - self._cursor_granularity)
437
+ (current_lower_boundary, current_upper_boundary - self._cursor_granularity)
432
438
  if self._cursor_granularity
433
439
  and (not upper_is_end or not has_reached_upper_boundary)
434
- else (clamped_lower, clamped_upper)
440
+ else (current_lower_boundary, current_upper_boundary)
435
441
  )
436
442
  yield StreamSlice(
437
443
  partition={},
@@ -444,7 +450,7 @@ class ConcurrentCursor(Cursor):
444
450
  ]: self._connector_state_converter.output_format(end_value),
445
451
  },
446
452
  )
447
- current_lower_boundary = clamped_upper
453
+ current_lower_boundary = current_upper_boundary
448
454
  if current_upper_boundary >= upper:
449
455
  stop_processing = True
450
456
 
@@ -6,6 +6,9 @@ from abc import abstractmethod
6
6
  from datetime import datetime, timedelta, timezone
7
7
  from typing import Any, Callable, List, MutableMapping, Optional, Tuple
8
8
 
9
+ import pendulum
10
+ from pendulum.datetime import DateTime
11
+
9
12
  # FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
10
13
  # the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
11
14
  from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
@@ -14,7 +17,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
14
17
  AbstractStreamStateConverter,
15
18
  ConcurrencyCompatibleStateType,
16
19
  )
17
- from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
18
20
 
19
21
 
20
22
  class DateTimeStreamStateConverter(AbstractStreamStateConverter):
@@ -34,7 +36,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
34
36
 
35
37
  @classmethod
36
38
  def get_end_provider(cls) -> Callable[[], datetime]:
37
- return ab_datetime_now
39
+ return lambda: datetime.now(timezone.utc)
38
40
 
39
41
  @abstractmethod
40
42
  def increment(self, timestamp: datetime) -> datetime: ...
@@ -134,10 +136,10 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
134
136
  return int(timestamp.timestamp())
135
137
 
136
138
  def parse_timestamp(self, timestamp: int) -> datetime:
137
- dt_object = AirbyteDateTime.fromtimestamp(timestamp, timezone.utc)
138
- if not isinstance(dt_object, AirbyteDateTime):
139
+ dt_object = pendulum.from_timestamp(timestamp)
140
+ if not isinstance(dt_object, DateTime):
139
141
  raise ValueError(
140
- f"AirbyteDateTime object was expected but got {type(dt_object)} from AirbyteDateTime.fromtimestamp({timestamp})"
142
+ f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
141
143
  )
142
144
  return dt_object
143
145
 
@@ -167,25 +169,14 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
167
169
  def increment(self, timestamp: datetime) -> datetime:
168
170
  return timestamp + self._cursor_granularity
169
171
 
170
- def output_format(self, timestamp: datetime) -> str:
171
- """Format datetime with milliseconds always included.
172
-
173
- Args:
174
- timestamp: The datetime to format.
175
-
176
- Returns:
177
- str: ISO8601/RFC3339 formatted string with milliseconds.
178
- """
179
- dt = AirbyteDateTime.from_datetime(timestamp)
180
- # Always include milliseconds, even if zero
181
- millis = dt.microsecond // 1000 if dt.microsecond else 0
182
- return f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d}T{dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}.{millis:03d}Z"
172
+ def output_format(self, timestamp: datetime) -> Any:
173
+ return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
183
174
 
184
175
  def parse_timestamp(self, timestamp: str) -> datetime:
185
- dt_object = ab_datetime_parse(timestamp)
186
- if not isinstance(dt_object, AirbyteDateTime):
176
+ dt_object = pendulum.parse(timestamp)
177
+ if not isinstance(dt_object, DateTime):
187
178
  raise ValueError(
188
- f"AirbyteDateTime object was expected but got {type(dt_object)} from parse({timestamp})"
179
+ f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
189
180
  )
190
181
  return dt_object
191
182
 
@@ -193,7 +184,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
193
184
  class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
194
185
  """
195
186
  Datetime State converter that emits state according to the supplied datetime format. The converter supports reading
196
- incoming state in any valid datetime format using AirbyteDateTime parsing utilities.
187
+ incoming state in any valid datetime format via Pendulum.
197
188
  """
198
189
 
199
190
  def __init__(
@@ -223,17 +223,17 @@ class Stream(ABC):
223
223
  record_counter += 1
224
224
 
225
225
  checkpoint_interval = self.state_checkpoint_interval
226
+ checkpoint = checkpoint_reader.get_checkpoint()
226
227
  if (
227
228
  should_checkpoint
228
229
  and checkpoint_interval
229
230
  and record_counter % checkpoint_interval == 0
231
+ and checkpoint is not None
230
232
  ):
231
- checkpoint = checkpoint_reader.get_checkpoint()
232
- if checkpoint:
233
- airbyte_state_message = self._checkpoint_state(
234
- checkpoint, state_manager=state_manager
235
- )
236
- yield airbyte_state_message
233
+ airbyte_state_message = self._checkpoint_state(
234
+ checkpoint, state_manager=state_manager
235
+ )
236
+ yield airbyte_state_message
237
237
 
238
238
  if internal_config.is_limit_reached(record_counter):
239
239
  break