airbyte-cdk 6.31.1__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
- airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +62 -231
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +88 -171
- airbyte_cdk/sources/types.py +2 -4
- airbyte_cdk/sources/utils/transform.py +2 -23
- airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
- airbyte_cdk/utils/mapping_helpers.py +86 -27
- airbyte_cdk/utils/slice_hasher.py +1 -8
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +48 -54
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
- airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
- airbyte_cdk/utils/datetime_helpers.py +0 -499
- airbyte_cdk-6.31.1.dist-info/LICENSE_SHORT +0 -1
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.1.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -13,6 +13,7 @@ from typing import (
|
|
13
13
|
Mapping,
|
14
14
|
MutableMapping,
|
15
15
|
Optional,
|
16
|
+
Protocol,
|
16
17
|
Tuple,
|
17
18
|
Union,
|
18
19
|
)
|
@@ -20,8 +21,6 @@ from typing import (
|
|
20
21
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
21
22
|
from airbyte_cdk.sources.message import MessageRepository
|
22
23
|
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
23
|
-
from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
|
24
|
-
from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
|
25
24
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
26
25
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
27
26
|
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
@@ -36,6 +35,36 @@ def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
|
|
36
35
|
return functools.reduce(lambda a, b: a[b], path, mapping)
|
37
36
|
|
38
37
|
|
38
|
+
class GapType(Protocol):
|
39
|
+
"""
|
40
|
+
This is the representation of gaps between two cursor values. Examples:
|
41
|
+
* if cursor values are datetimes, GapType is timedelta
|
42
|
+
* if cursor values are integer, GapType will also be integer
|
43
|
+
"""
|
44
|
+
|
45
|
+
pass
|
46
|
+
|
47
|
+
|
48
|
+
class CursorValueType(Protocol):
|
49
|
+
"""Protocol for annotating comparable types."""
|
50
|
+
|
51
|
+
@abstractmethod
|
52
|
+
def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
|
53
|
+
pass
|
54
|
+
|
55
|
+
@abstractmethod
|
56
|
+
def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
|
57
|
+
pass
|
58
|
+
|
59
|
+
@abstractmethod
|
60
|
+
def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
|
61
|
+
pass
|
62
|
+
|
63
|
+
@abstractmethod
|
64
|
+
def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
|
65
|
+
pass
|
66
|
+
|
67
|
+
|
39
68
|
class CursorField:
|
40
69
|
def __init__(self, cursor_field_key: str) -> None:
|
41
70
|
self.cursor_field_key = cursor_field_key
|
@@ -143,7 +172,6 @@ class ConcurrentCursor(Cursor):
|
|
143
172
|
lookback_window: Optional[GapType] = None,
|
144
173
|
slice_range: Optional[GapType] = None,
|
145
174
|
cursor_granularity: Optional[GapType] = None,
|
146
|
-
clamping_strategy: ClampingStrategy = NoClamping(),
|
147
175
|
) -> None:
|
148
176
|
self._stream_name = stream_name
|
149
177
|
self._stream_namespace = stream_namespace
|
@@ -165,13 +193,10 @@ class ConcurrentCursor(Cursor):
|
|
165
193
|
self._cursor_granularity = cursor_granularity
|
166
194
|
# Flag to track if the logger has been triggered (per stream)
|
167
195
|
self._should_be_synced_logger_triggered = False
|
168
|
-
self._clamping_strategy = clamping_strategy
|
169
196
|
|
170
197
|
@property
|
171
198
|
def state(self) -> MutableMapping[str, Any]:
|
172
|
-
return self.
|
173
|
-
self.cursor_field, self._concurrent_state
|
174
|
-
)
|
199
|
+
return self._concurrent_state
|
175
200
|
|
176
201
|
@property
|
177
202
|
def cursor_field(self) -> CursorField:
|
@@ -216,10 +241,10 @@ class ConcurrentCursor(Cursor):
|
|
216
241
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
217
242
|
|
218
243
|
def close_partition(self, partition: Partition) -> None:
|
219
|
-
slice_count_before = len(self.
|
244
|
+
slice_count_before = len(self.state.get("slices", []))
|
220
245
|
self._add_slice_to_state(partition)
|
221
246
|
if slice_count_before < len(
|
222
|
-
self.
|
247
|
+
self.state["slices"]
|
223
248
|
): # only emit if at least one slice has been processed
|
224
249
|
self._merge_partitions()
|
225
250
|
self._emit_state_message()
|
@@ -231,11 +256,11 @@ class ConcurrentCursor(Cursor):
|
|
231
256
|
)
|
232
257
|
|
233
258
|
if self._slice_boundary_fields:
|
234
|
-
if "slices" not in self.
|
259
|
+
if "slices" not in self.state:
|
235
260
|
raise RuntimeError(
|
236
261
|
f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
|
237
262
|
)
|
238
|
-
self.
|
263
|
+
self.state["slices"].append(
|
239
264
|
{
|
240
265
|
self._connector_state_converter.START_KEY: self._extract_from_slice(
|
241
266
|
partition, self._slice_boundary_fields[self._START_BOUNDARY]
|
@@ -263,7 +288,7 @@ class ConcurrentCursor(Cursor):
|
|
263
288
|
"expected. Please contact the Airbyte team."
|
264
289
|
)
|
265
290
|
|
266
|
-
self.
|
291
|
+
self.state["slices"].append(
|
267
292
|
{
|
268
293
|
self._connector_state_converter.START_KEY: self.start,
|
269
294
|
self._connector_state_converter.END_KEY: most_recent_cursor_value,
|
@@ -275,7 +300,9 @@ class ConcurrentCursor(Cursor):
|
|
275
300
|
self._connector_state_manager.update_state_for_stream(
|
276
301
|
self._stream_name,
|
277
302
|
self._stream_namespace,
|
278
|
-
self.
|
303
|
+
self._connector_state_converter.convert_to_state_message(
|
304
|
+
self._cursor_field, self.state
|
305
|
+
),
|
279
306
|
)
|
280
307
|
state_message = self._connector_state_manager.create_state_message(
|
281
308
|
self._stream_name, self._stream_namespace
|
@@ -283,9 +310,7 @@ class ConcurrentCursor(Cursor):
|
|
283
310
|
self._message_repository.emit_message(state_message)
|
284
311
|
|
285
312
|
def _merge_partitions(self) -> None:
|
286
|
-
self.
|
287
|
-
self._concurrent_state["slices"]
|
288
|
-
)
|
313
|
+
self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
|
289
314
|
|
290
315
|
def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
|
291
316
|
try:
|
@@ -322,42 +347,36 @@ class ConcurrentCursor(Cursor):
|
|
322
347
|
if self._start is not None and self._is_start_before_first_slice():
|
323
348
|
yield from self._split_per_slice_range(
|
324
349
|
self._start,
|
325
|
-
self.
|
350
|
+
self.state["slices"][0][self._connector_state_converter.START_KEY],
|
326
351
|
False,
|
327
352
|
)
|
328
353
|
|
329
|
-
if len(self.
|
354
|
+
if len(self.state["slices"]) == 1:
|
330
355
|
yield from self._split_per_slice_range(
|
331
356
|
self._calculate_lower_boundary_of_last_slice(
|
332
|
-
self.
|
357
|
+
self.state["slices"][0][self._connector_state_converter.END_KEY]
|
333
358
|
),
|
334
359
|
self._end_provider(),
|
335
360
|
True,
|
336
361
|
)
|
337
|
-
elif len(self.
|
338
|
-
for i in range(len(self.
|
362
|
+
elif len(self.state["slices"]) > 1:
|
363
|
+
for i in range(len(self.state["slices"]) - 1):
|
339
364
|
if self._cursor_granularity:
|
340
365
|
yield from self._split_per_slice_range(
|
341
|
-
self.
|
366
|
+
self.state["slices"][i][self._connector_state_converter.END_KEY]
|
342
367
|
+ self._cursor_granularity,
|
343
|
-
self.
|
344
|
-
self._connector_state_converter.START_KEY
|
345
|
-
],
|
368
|
+
self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
|
346
369
|
False,
|
347
370
|
)
|
348
371
|
else:
|
349
372
|
yield from self._split_per_slice_range(
|
350
|
-
self.
|
351
|
-
|
352
|
-
],
|
353
|
-
self._concurrent_state["slices"][i + 1][
|
354
|
-
self._connector_state_converter.START_KEY
|
355
|
-
],
|
373
|
+
self.state["slices"][i][self._connector_state_converter.END_KEY],
|
374
|
+
self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
|
356
375
|
False,
|
357
376
|
)
|
358
377
|
yield from self._split_per_slice_range(
|
359
378
|
self._calculate_lower_boundary_of_last_slice(
|
360
|
-
self.
|
379
|
+
self.state["slices"][-1][self._connector_state_converter.END_KEY]
|
361
380
|
),
|
362
381
|
self._end_provider(),
|
363
382
|
True,
|
@@ -368,8 +387,7 @@ class ConcurrentCursor(Cursor):
|
|
368
387
|
def _is_start_before_first_slice(self) -> bool:
|
369
388
|
return (
|
370
389
|
self._start is not None
|
371
|
-
and self._start
|
372
|
-
< self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
|
390
|
+
and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
|
373
391
|
)
|
374
392
|
|
375
393
|
def _calculate_lower_boundary_of_last_slice(
|
@@ -390,12 +408,10 @@ class ConcurrentCursor(Cursor):
|
|
390
408
|
|
391
409
|
lower = max(lower, self._start) if self._start else lower
|
392
410
|
if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
|
393
|
-
clamped_lower = self._clamping_strategy.clamp(lower)
|
394
|
-
clamped_upper = self._clamping_strategy.clamp(upper)
|
395
411
|
start_value, end_value = (
|
396
|
-
(
|
412
|
+
(lower, upper - self._cursor_granularity)
|
397
413
|
if self._cursor_granularity and not upper_is_end
|
398
|
-
else (
|
414
|
+
else (lower, upper)
|
399
415
|
)
|
400
416
|
yield StreamSlice(
|
401
417
|
partition={},
|
@@ -417,21 +433,11 @@ class ConcurrentCursor(Cursor):
|
|
417
433
|
)
|
418
434
|
has_reached_upper_boundary = current_upper_boundary >= upper
|
419
435
|
|
420
|
-
clamped_upper = (
|
421
|
-
self._clamping_strategy.clamp(current_upper_boundary)
|
422
|
-
if current_upper_boundary != upper
|
423
|
-
else current_upper_boundary
|
424
|
-
)
|
425
|
-
clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
|
426
|
-
if clamped_lower >= clamped_upper:
|
427
|
-
# clamping collapsed both values which means that it is time to stop processing
|
428
|
-
# FIXME should this be replace by proper end_provider
|
429
|
-
break
|
430
436
|
start_value, end_value = (
|
431
|
-
(
|
437
|
+
(current_lower_boundary, current_upper_boundary - self._cursor_granularity)
|
432
438
|
if self._cursor_granularity
|
433
439
|
and (not upper_is_end or not has_reached_upper_boundary)
|
434
|
-
else (
|
440
|
+
else (current_lower_boundary, current_upper_boundary)
|
435
441
|
)
|
436
442
|
yield StreamSlice(
|
437
443
|
partition={},
|
@@ -444,7 +450,7 @@ class ConcurrentCursor(Cursor):
|
|
444
450
|
]: self._connector_state_converter.output_format(end_value),
|
445
451
|
},
|
446
452
|
)
|
447
|
-
current_lower_boundary =
|
453
|
+
current_lower_boundary = current_upper_boundary
|
448
454
|
if current_upper_boundary >= upper:
|
449
455
|
stop_processing = True
|
450
456
|
|
@@ -6,6 +6,9 @@ from abc import abstractmethod
|
|
6
6
|
from datetime import datetime, timedelta, timezone
|
7
7
|
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
|
+
import pendulum
|
10
|
+
from pendulum.datetime import DateTime
|
11
|
+
|
9
12
|
# FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
|
10
13
|
# the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
|
11
14
|
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
|
@@ -14,7 +17,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
|
|
14
17
|
AbstractStreamStateConverter,
|
15
18
|
ConcurrencyCompatibleStateType,
|
16
19
|
)
|
17
|
-
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
18
20
|
|
19
21
|
|
20
22
|
class DateTimeStreamStateConverter(AbstractStreamStateConverter):
|
@@ -34,7 +36,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
|
|
34
36
|
|
35
37
|
@classmethod
|
36
38
|
def get_end_provider(cls) -> Callable[[], datetime]:
|
37
|
-
return
|
39
|
+
return lambda: datetime.now(timezone.utc)
|
38
40
|
|
39
41
|
@abstractmethod
|
40
42
|
def increment(self, timestamp: datetime) -> datetime: ...
|
@@ -134,10 +136,10 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
134
136
|
return int(timestamp.timestamp())
|
135
137
|
|
136
138
|
def parse_timestamp(self, timestamp: int) -> datetime:
|
137
|
-
dt_object =
|
138
|
-
if not isinstance(dt_object,
|
139
|
+
dt_object = pendulum.from_timestamp(timestamp)
|
140
|
+
if not isinstance(dt_object, DateTime):
|
139
141
|
raise ValueError(
|
140
|
-
f"
|
142
|
+
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
141
143
|
)
|
142
144
|
return dt_object
|
143
145
|
|
@@ -167,25 +169,14 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
167
169
|
def increment(self, timestamp: datetime) -> datetime:
|
168
170
|
return timestamp + self._cursor_granularity
|
169
171
|
|
170
|
-
def output_format(self, timestamp: datetime) ->
|
171
|
-
""
|
172
|
-
|
173
|
-
Args:
|
174
|
-
timestamp: The datetime to format.
|
175
|
-
|
176
|
-
Returns:
|
177
|
-
str: ISO8601/RFC3339 formatted string with milliseconds.
|
178
|
-
"""
|
179
|
-
dt = AirbyteDateTime.from_datetime(timestamp)
|
180
|
-
# Always include milliseconds, even if zero
|
181
|
-
millis = dt.microsecond // 1000 if dt.microsecond else 0
|
182
|
-
return f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d}T{dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}.{millis:03d}Z"
|
172
|
+
def output_format(self, timestamp: datetime) -> Any:
|
173
|
+
return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
183
174
|
|
184
175
|
def parse_timestamp(self, timestamp: str) -> datetime:
|
185
|
-
dt_object =
|
186
|
-
if not isinstance(dt_object,
|
176
|
+
dt_object = pendulum.parse(timestamp)
|
177
|
+
if not isinstance(dt_object, DateTime):
|
187
178
|
raise ValueError(
|
188
|
-
f"
|
179
|
+
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
189
180
|
)
|
190
181
|
return dt_object
|
191
182
|
|
@@ -193,7 +184,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
193
184
|
class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
|
194
185
|
"""
|
195
186
|
Datetime State converter that emits state according to the supplied datetime format. The converter supports reading
|
196
|
-
incoming state in any valid datetime format
|
187
|
+
incoming state in any valid datetime format via Pendulum.
|
197
188
|
"""
|
198
189
|
|
199
190
|
def __init__(
|
@@ -223,17 +223,17 @@ class Stream(ABC):
|
|
223
223
|
record_counter += 1
|
224
224
|
|
225
225
|
checkpoint_interval = self.state_checkpoint_interval
|
226
|
+
checkpoint = checkpoint_reader.get_checkpoint()
|
226
227
|
if (
|
227
228
|
should_checkpoint
|
228
229
|
and checkpoint_interval
|
229
230
|
and record_counter % checkpoint_interval == 0
|
231
|
+
and checkpoint is not None
|
230
232
|
):
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
)
|
236
|
-
yield airbyte_state_message
|
233
|
+
airbyte_state_message = self._checkpoint_state(
|
234
|
+
checkpoint, state_manager=state_manager
|
235
|
+
)
|
236
|
+
yield airbyte_state_message
|
237
237
|
|
238
238
|
if internal_config.is_limit_reached(record_counter):
|
239
239
|
break
|