airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
  6. airbyte_cdk/sources/declarative/auth/token.py +8 -3
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
  11. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  14. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
  15. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
  16. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  17. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  18. airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
  19. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
  20. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  21. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
  22. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  23. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
  24. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
  25. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  26. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  27. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
  28. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
  29. airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
  30. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
  31. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  32. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
  33. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  34. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  35. airbyte_cdk/sources/http_logger.py +1 -1
  36. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  37. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  38. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  39. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  40. airbyte_cdk/sources/streams/core.py +6 -6
  41. airbyte_cdk/sources/streams/http/http.py +1 -2
  42. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  43. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
  44. airbyte_cdk/sources/types.py +4 -2
  45. airbyte_cdk/sources/utils/transform.py +23 -2
  46. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  47. airbyte_cdk/utils/datetime_helpers.py +499 -0
  48. airbyte_cdk/utils/mapping_helpers.py +27 -86
  49. airbyte_cdk/utils/slice_hasher.py +8 -1
  50. airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
  51. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
  52. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
  53. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
  54. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
  55. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,99 @@
1
+ from abc import ABC
2
+ from datetime import datetime, timedelta
3
+ from enum import Enum
4
+ from typing import Callable
5
+
6
+ from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType
7
+
8
+
9
+ class ClampingStrategy(ABC):
10
+ def clamp(self, value: CursorValueType) -> CursorValueType:
11
+ raise NotImplementedError()
12
+
13
+
14
+ class NoClamping(ClampingStrategy):
15
+ def clamp(self, value: CursorValueType) -> CursorValueType:
16
+ return value
17
+
18
+
19
+ class ClampingEndProvider:
20
+ def __init__(
21
+ self,
22
+ clamping_strategy: ClampingStrategy,
23
+ end_provider: Callable[[], CursorValueType],
24
+ granularity: timedelta,
25
+ ) -> None:
26
+ self._clamping_strategy = clamping_strategy
27
+ self._end_provider = end_provider
28
+ self._granularity = granularity
29
+
30
+ def __call__(self) -> CursorValueType:
31
+ return self._clamping_strategy.clamp(self._end_provider()) - self._granularity
32
+
33
+
34
+ class DayClampingStrategy(ClampingStrategy):
35
+ def __init__(self, is_ceiling: bool = True) -> None:
36
+ self._is_ceiling = is_ceiling
37
+
38
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
39
+ return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
40
+ if self._is_ceiling:
41
+ return return_value + timedelta(days=1)
42
+ return return_value
43
+
44
+
45
+ class MonthClampingStrategy(ClampingStrategy):
46
+ def __init__(self, is_ceiling: bool = True) -> None:
47
+ self._is_ceiling = is_ceiling
48
+
49
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
50
+ return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
51
+ needs_to_round = value.day != 1
52
+ if not needs_to_round:
53
+ return return_value
54
+
55
+ return self._ceil(return_value) if self._is_ceiling else return_value.replace(day=1)
56
+
57
+ def _ceil(self, value: datetime) -> datetime:
58
+ return value.replace(
59
+ year=value.year + 1 if value.month == 12 else value.year,
60
+ month=(value.month % 12) + 1,
61
+ day=1,
62
+ hour=0,
63
+ minute=0,
64
+ second=0,
65
+ microsecond=0,
66
+ )
67
+
68
+
69
+ class Weekday(Enum):
70
+ """
71
+ These integer values map to the same ones used by the Datetime.date.weekday() implementation
72
+ """
73
+
74
+ MONDAY = 0
75
+ TUESDAY = 1
76
+ WEDNESDAY = 2
77
+ THURSDAY = 3
78
+ FRIDAY = 4
79
+ SATURDAY = 5
80
+ SUNDAY = 6
81
+
82
+
83
+ class WeekClampingStrategy(ClampingStrategy):
84
+ def __init__(self, day_of_week: Weekday, is_ceiling: bool = True) -> None:
85
+ self._day_of_week = day_of_week.value
86
+ self._is_ceiling = is_ceiling
87
+
88
+ def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
89
+ days_diff_to_ceiling = (
90
+ 7 - (value.weekday() - self._day_of_week)
91
+ if value.weekday() > self._day_of_week
92
+ else abs(value.weekday() - self._day_of_week)
93
+ )
94
+ delta = (
95
+ timedelta(days_diff_to_ceiling)
96
+ if self._is_ceiling
97
+ else timedelta(days_diff_to_ceiling - 7)
98
+ )
99
+ return value.replace(hour=0, minute=0, second=0, microsecond=0) + delta
@@ -13,7 +13,6 @@ from typing import (
13
13
  Mapping,
14
14
  MutableMapping,
15
15
  Optional,
16
- Protocol,
17
16
  Tuple,
18
17
  Union,
19
18
  )
@@ -21,6 +20,8 @@ from typing import (
21
20
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
22
21
  from airbyte_cdk.sources.message import MessageRepository
23
22
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
23
+ from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
24
+ from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
24
25
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
26
  from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
26
27
  from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
@@ -35,36 +36,6 @@ def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
35
36
  return functools.reduce(lambda a, b: a[b], path, mapping)
36
37
 
37
38
 
38
- class GapType(Protocol):
39
- """
40
- This is the representation of gaps between two cursor values. Examples:
41
- * if cursor values are datetimes, GapType is timedelta
42
- * if cursor values are integer, GapType will also be integer
43
- """
44
-
45
- pass
46
-
47
-
48
- class CursorValueType(Protocol):
49
- """Protocol for annotating comparable types."""
50
-
51
- @abstractmethod
52
- def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
53
- pass
54
-
55
- @abstractmethod
56
- def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
57
- pass
58
-
59
- @abstractmethod
60
- def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
61
- pass
62
-
63
- @abstractmethod
64
- def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
65
- pass
66
-
67
-
68
39
  class CursorField:
69
40
  def __init__(self, cursor_field_key: str) -> None:
70
41
  self.cursor_field_key = cursor_field_key
@@ -172,6 +143,7 @@ class ConcurrentCursor(Cursor):
172
143
  lookback_window: Optional[GapType] = None,
173
144
  slice_range: Optional[GapType] = None,
174
145
  cursor_granularity: Optional[GapType] = None,
146
+ clamping_strategy: ClampingStrategy = NoClamping(),
175
147
  ) -> None:
176
148
  self._stream_name = stream_name
177
149
  self._stream_namespace = stream_namespace
@@ -193,10 +165,13 @@ class ConcurrentCursor(Cursor):
193
165
  self._cursor_granularity = cursor_granularity
194
166
  # Flag to track if the logger has been triggered (per stream)
195
167
  self._should_be_synced_logger_triggered = False
168
+ self._clamping_strategy = clamping_strategy
196
169
 
197
170
  @property
198
171
  def state(self) -> MutableMapping[str, Any]:
199
- return self._concurrent_state
172
+ return self._connector_state_converter.convert_to_state_message(
173
+ self.cursor_field, self._concurrent_state
174
+ )
200
175
 
201
176
  @property
202
177
  def cursor_field(self) -> CursorField:
@@ -241,10 +216,10 @@ class ConcurrentCursor(Cursor):
241
216
  return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
242
217
 
243
218
  def close_partition(self, partition: Partition) -> None:
244
- slice_count_before = len(self.state.get("slices", []))
219
+ slice_count_before = len(self._concurrent_state.get("slices", []))
245
220
  self._add_slice_to_state(partition)
246
221
  if slice_count_before < len(
247
- self.state["slices"]
222
+ self._concurrent_state["slices"]
248
223
  ): # only emit if at least one slice has been processed
249
224
  self._merge_partitions()
250
225
  self._emit_state_message()
@@ -256,11 +231,11 @@ class ConcurrentCursor(Cursor):
256
231
  )
257
232
 
258
233
  if self._slice_boundary_fields:
259
- if "slices" not in self.state:
234
+ if "slices" not in self._concurrent_state:
260
235
  raise RuntimeError(
261
236
  f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
262
237
  )
263
- self.state["slices"].append(
238
+ self._concurrent_state["slices"].append(
264
239
  {
265
240
  self._connector_state_converter.START_KEY: self._extract_from_slice(
266
241
  partition, self._slice_boundary_fields[self._START_BOUNDARY]
@@ -288,7 +263,7 @@ class ConcurrentCursor(Cursor):
288
263
  "expected. Please contact the Airbyte team."
289
264
  )
290
265
 
291
- self.state["slices"].append(
266
+ self._concurrent_state["slices"].append(
292
267
  {
293
268
  self._connector_state_converter.START_KEY: self.start,
294
269
  self._connector_state_converter.END_KEY: most_recent_cursor_value,
@@ -300,9 +275,7 @@ class ConcurrentCursor(Cursor):
300
275
  self._connector_state_manager.update_state_for_stream(
301
276
  self._stream_name,
302
277
  self._stream_namespace,
303
- self._connector_state_converter.convert_to_state_message(
304
- self._cursor_field, self.state
305
- ),
278
+ self.state,
306
279
  )
307
280
  state_message = self._connector_state_manager.create_state_message(
308
281
  self._stream_name, self._stream_namespace
@@ -310,7 +283,9 @@ class ConcurrentCursor(Cursor):
310
283
  self._message_repository.emit_message(state_message)
311
284
 
312
285
  def _merge_partitions(self) -> None:
313
- self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
286
+ self._concurrent_state["slices"] = self._connector_state_converter.merge_intervals(
287
+ self._concurrent_state["slices"]
288
+ )
314
289
 
315
290
  def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
316
291
  try:
@@ -347,36 +322,42 @@ class ConcurrentCursor(Cursor):
347
322
  if self._start is not None and self._is_start_before_first_slice():
348
323
  yield from self._split_per_slice_range(
349
324
  self._start,
350
- self.state["slices"][0][self._connector_state_converter.START_KEY],
325
+ self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY],
351
326
  False,
352
327
  )
353
328
 
354
- if len(self.state["slices"]) == 1:
329
+ if len(self._concurrent_state["slices"]) == 1:
355
330
  yield from self._split_per_slice_range(
356
331
  self._calculate_lower_boundary_of_last_slice(
357
- self.state["slices"][0][self._connector_state_converter.END_KEY]
332
+ self._concurrent_state["slices"][0][self._connector_state_converter.END_KEY]
358
333
  ),
359
334
  self._end_provider(),
360
335
  True,
361
336
  )
362
- elif len(self.state["slices"]) > 1:
363
- for i in range(len(self.state["slices"]) - 1):
337
+ elif len(self._concurrent_state["slices"]) > 1:
338
+ for i in range(len(self._concurrent_state["slices"]) - 1):
364
339
  if self._cursor_granularity:
365
340
  yield from self._split_per_slice_range(
366
- self.state["slices"][i][self._connector_state_converter.END_KEY]
341
+ self._concurrent_state["slices"][i][self._connector_state_converter.END_KEY]
367
342
  + self._cursor_granularity,
368
- self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
343
+ self._concurrent_state["slices"][i + 1][
344
+ self._connector_state_converter.START_KEY
345
+ ],
369
346
  False,
370
347
  )
371
348
  else:
372
349
  yield from self._split_per_slice_range(
373
- self.state["slices"][i][self._connector_state_converter.END_KEY],
374
- self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
350
+ self._concurrent_state["slices"][i][
351
+ self._connector_state_converter.END_KEY
352
+ ],
353
+ self._concurrent_state["slices"][i + 1][
354
+ self._connector_state_converter.START_KEY
355
+ ],
375
356
  False,
376
357
  )
377
358
  yield from self._split_per_slice_range(
378
359
  self._calculate_lower_boundary_of_last_slice(
379
- self.state["slices"][-1][self._connector_state_converter.END_KEY]
360
+ self._concurrent_state["slices"][-1][self._connector_state_converter.END_KEY]
380
361
  ),
381
362
  self._end_provider(),
382
363
  True,
@@ -387,7 +368,8 @@ class ConcurrentCursor(Cursor):
387
368
  def _is_start_before_first_slice(self) -> bool:
388
369
  return (
389
370
  self._start is not None
390
- and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
371
+ and self._start
372
+ < self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
391
373
  )
392
374
 
393
375
  def _calculate_lower_boundary_of_last_slice(
@@ -408,10 +390,12 @@ class ConcurrentCursor(Cursor):
408
390
 
409
391
  lower = max(lower, self._start) if self._start else lower
410
392
  if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
393
+ clamped_lower = self._clamping_strategy.clamp(lower)
394
+ clamped_upper = self._clamping_strategy.clamp(upper)
411
395
  start_value, end_value = (
412
- (lower, upper - self._cursor_granularity)
396
+ (clamped_lower, clamped_upper - self._cursor_granularity)
413
397
  if self._cursor_granularity and not upper_is_end
414
- else (lower, upper)
398
+ else (clamped_lower, clamped_upper)
415
399
  )
416
400
  yield StreamSlice(
417
401
  partition={},
@@ -433,11 +417,21 @@ class ConcurrentCursor(Cursor):
433
417
  )
434
418
  has_reached_upper_boundary = current_upper_boundary >= upper
435
419
 
420
+ clamped_upper = (
421
+ self._clamping_strategy.clamp(current_upper_boundary)
422
+ if current_upper_boundary != upper
423
+ else current_upper_boundary
424
+ )
425
+ clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
426
+ if clamped_lower >= clamped_upper:
427
+ # clamping collapsed both values which means that it is time to stop processing
428
+ # FIXME should this be replace by proper end_provider
429
+ break
436
430
  start_value, end_value = (
437
- (current_lower_boundary, current_upper_boundary - self._cursor_granularity)
431
+ (clamped_lower, clamped_upper - self._cursor_granularity)
438
432
  if self._cursor_granularity
439
433
  and (not upper_is_end or not has_reached_upper_boundary)
440
- else (current_lower_boundary, current_upper_boundary)
434
+ else (clamped_lower, clamped_upper)
441
435
  )
442
436
  yield StreamSlice(
443
437
  partition={},
@@ -450,7 +444,7 @@ class ConcurrentCursor(Cursor):
450
444
  ]: self._connector_state_converter.output_format(end_value),
451
445
  },
452
446
  )
453
- current_lower_boundary = current_upper_boundary
447
+ current_lower_boundary = clamped_upper
454
448
  if current_upper_boundary >= upper:
455
449
  stop_processing = True
456
450
 
@@ -0,0 +1,32 @@
1
+ from abc import abstractmethod
2
+ from typing import Protocol
3
+
4
+
5
+ class GapType(Protocol):
6
+ """
7
+ This is the representation of gaps between two cursor values. Examples:
8
+ * if cursor values are datetimes, GapType is timedelta
9
+ * if cursor values are integer, GapType will also be integer
10
+ """
11
+
12
+ pass
13
+
14
+
15
+ class CursorValueType(Protocol):
16
+ """Protocol for annotating comparable types."""
17
+
18
+ @abstractmethod
19
+ def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
20
+ pass
21
+
22
+ @abstractmethod
23
+ def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
24
+ pass
25
+
26
+ @abstractmethod
27
+ def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
28
+ pass
29
+
30
+ @abstractmethod
31
+ def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
32
+ pass
@@ -6,9 +6,6 @@ from abc import abstractmethod
6
6
  from datetime import datetime, timedelta, timezone
7
7
  from typing import Any, Callable, List, MutableMapping, Optional, Tuple
8
8
 
9
- import pendulum
10
- from pendulum.datetime import DateTime
11
-
12
9
  # FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
13
10
  # the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
14
11
  from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
@@ -17,6 +14,7 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
17
14
  AbstractStreamStateConverter,
18
15
  ConcurrencyCompatibleStateType,
19
16
  )
17
+ from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
20
18
 
21
19
 
22
20
  class DateTimeStreamStateConverter(AbstractStreamStateConverter):
@@ -36,7 +34,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
36
34
 
37
35
  @classmethod
38
36
  def get_end_provider(cls) -> Callable[[], datetime]:
39
- return lambda: datetime.now(timezone.utc)
37
+ return ab_datetime_now
40
38
 
41
39
  @abstractmethod
42
40
  def increment(self, timestamp: datetime) -> datetime: ...
@@ -136,10 +134,10 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
136
134
  return int(timestamp.timestamp())
137
135
 
138
136
  def parse_timestamp(self, timestamp: int) -> datetime:
139
- dt_object = pendulum.from_timestamp(timestamp)
140
- if not isinstance(dt_object, DateTime):
137
+ dt_object = AirbyteDateTime.fromtimestamp(timestamp, timezone.utc)
138
+ if not isinstance(dt_object, AirbyteDateTime):
141
139
  raise ValueError(
142
- f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
140
+ f"AirbyteDateTime object was expected but got {type(dt_object)} from AirbyteDateTime.fromtimestamp({timestamp})"
143
141
  )
144
142
  return dt_object
145
143
 
@@ -169,14 +167,25 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
169
167
  def increment(self, timestamp: datetime) -> datetime:
170
168
  return timestamp + self._cursor_granularity
171
169
 
172
- def output_format(self, timestamp: datetime) -> Any:
173
- return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
170
+ def output_format(self, timestamp: datetime) -> str:
171
+ """Format datetime with milliseconds always included.
172
+
173
+ Args:
174
+ timestamp: The datetime to format.
175
+
176
+ Returns:
177
+ str: ISO8601/RFC3339 formatted string with milliseconds.
178
+ """
179
+ dt = AirbyteDateTime.from_datetime(timestamp)
180
+ # Always include milliseconds, even if zero
181
+ millis = dt.microsecond // 1000 if dt.microsecond else 0
182
+ return f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d}T{dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}.{millis:03d}Z"
174
183
 
175
184
  def parse_timestamp(self, timestamp: str) -> datetime:
176
- dt_object = pendulum.parse(timestamp)
177
- if not isinstance(dt_object, DateTime):
185
+ dt_object = ab_datetime_parse(timestamp)
186
+ if not isinstance(dt_object, AirbyteDateTime):
178
187
  raise ValueError(
179
- f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
188
+ f"AirbyteDateTime object was expected but got {type(dt_object)} from parse({timestamp})"
180
189
  )
181
190
  return dt_object
182
191
 
@@ -184,7 +193,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
184
193
  class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
185
194
  """
186
195
  Datetime State converter that emits state according to the supplied datetime format. The converter supports reading
187
- incoming state in any valid datetime format via Pendulum.
196
+ incoming state in any valid datetime format using AirbyteDateTime parsing utilities.
188
197
  """
189
198
 
190
199
  def __init__(
@@ -223,17 +223,17 @@ class Stream(ABC):
223
223
  record_counter += 1
224
224
 
225
225
  checkpoint_interval = self.state_checkpoint_interval
226
- checkpoint = checkpoint_reader.get_checkpoint()
227
226
  if (
228
227
  should_checkpoint
229
228
  and checkpoint_interval
230
229
  and record_counter % checkpoint_interval == 0
231
- and checkpoint is not None
232
230
  ):
233
- airbyte_state_message = self._checkpoint_state(
234
- checkpoint, state_manager=state_manager
235
- )
236
- yield airbyte_state_message
231
+ checkpoint = checkpoint_reader.get_checkpoint()
232
+ if checkpoint:
233
+ airbyte_state_message = self._checkpoint_state(
234
+ checkpoint, state_manager=state_manager
235
+ )
236
+ yield airbyte_state_message
237
237
 
238
238
  if internal_config.is_limit_reached(record_counter):
239
239
  break
@@ -423,8 +423,6 @@ class HttpStream(Stream, CheckpointMixin, ABC):
423
423
  stream_slice: Optional[Mapping[str, Any]] = None,
424
424
  stream_state: Optional[Mapping[str, Any]] = None,
425
425
  ) -> Iterable[StreamData]:
426
- partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
427
-
428
426
  stream_state = stream_state or {}
429
427
  pagination_complete = False
430
428
  next_page_token = None
@@ -438,6 +436,7 @@ class HttpStream(Stream, CheckpointMixin, ABC):
438
436
 
439
437
  cursor = self.get_cursor()
440
438
  if cursor and isinstance(cursor, SubstreamResumableFullRefreshCursor):
439
+ partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
441
440
  # Substreams checkpoint state by marking an entire parent partition as completed so that on the subsequent attempt
442
441
  # after a failure, completed parents are skipped and the sync can make progress
443
442
  cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition))