airbyte-cdk 6.31.1.dev0__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
- airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
- airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +6 -17
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +31 -43
- airbyte_cdk/sources/types.py +2 -4
- airbyte_cdk/sources/utils/transform.py +2 -23
- airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
- airbyte_cdk/utils/mapping_helpers.py +86 -27
- airbyte_cdk/utils/slice_hasher.py +1 -8
- {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +49 -55
- {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
- airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
- airbyte_cdk/utils/datetime_helpers.py +0 -499
- airbyte_cdk-6.31.1.dev0.dist-info/LICENSE_SHORT +0 -1
- {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -13,6 +13,7 @@ from typing import (
|
|
13
13
|
Mapping,
|
14
14
|
MutableMapping,
|
15
15
|
Optional,
|
16
|
+
Protocol,
|
16
17
|
Tuple,
|
17
18
|
Union,
|
18
19
|
)
|
@@ -20,8 +21,6 @@ from typing import (
|
|
20
21
|
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
21
22
|
from airbyte_cdk.sources.message import MessageRepository
|
22
23
|
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
23
|
-
from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
|
24
|
-
from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
|
25
24
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
26
25
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
27
26
|
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
@@ -36,6 +35,36 @@ def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
|
|
36
35
|
return functools.reduce(lambda a, b: a[b], path, mapping)
|
37
36
|
|
38
37
|
|
38
|
+
class GapType(Protocol):
|
39
|
+
"""
|
40
|
+
This is the representation of gaps between two cursor values. Examples:
|
41
|
+
* if cursor values are datetimes, GapType is timedelta
|
42
|
+
* if cursor values are integer, GapType will also be integer
|
43
|
+
"""
|
44
|
+
|
45
|
+
pass
|
46
|
+
|
47
|
+
|
48
|
+
class CursorValueType(Protocol):
|
49
|
+
"""Protocol for annotating comparable types."""
|
50
|
+
|
51
|
+
@abstractmethod
|
52
|
+
def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
|
53
|
+
pass
|
54
|
+
|
55
|
+
@abstractmethod
|
56
|
+
def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
|
57
|
+
pass
|
58
|
+
|
59
|
+
@abstractmethod
|
60
|
+
def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
|
61
|
+
pass
|
62
|
+
|
63
|
+
@abstractmethod
|
64
|
+
def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
|
65
|
+
pass
|
66
|
+
|
67
|
+
|
39
68
|
class CursorField:
|
40
69
|
def __init__(self, cursor_field_key: str) -> None:
|
41
70
|
self.cursor_field_key = cursor_field_key
|
@@ -143,7 +172,6 @@ class ConcurrentCursor(Cursor):
|
|
143
172
|
lookback_window: Optional[GapType] = None,
|
144
173
|
slice_range: Optional[GapType] = None,
|
145
174
|
cursor_granularity: Optional[GapType] = None,
|
146
|
-
clamping_strategy: ClampingStrategy = NoClamping(),
|
147
175
|
) -> None:
|
148
176
|
self._stream_name = stream_name
|
149
177
|
self._stream_namespace = stream_namespace
|
@@ -165,13 +193,10 @@ class ConcurrentCursor(Cursor):
|
|
165
193
|
self._cursor_granularity = cursor_granularity
|
166
194
|
# Flag to track if the logger has been triggered (per stream)
|
167
195
|
self._should_be_synced_logger_triggered = False
|
168
|
-
self._clamping_strategy = clamping_strategy
|
169
196
|
|
170
197
|
@property
|
171
198
|
def state(self) -> MutableMapping[str, Any]:
|
172
|
-
return self.
|
173
|
-
self.cursor_field, self._concurrent_state
|
174
|
-
)
|
199
|
+
return self._concurrent_state
|
175
200
|
|
176
201
|
@property
|
177
202
|
def cursor_field(self) -> CursorField:
|
@@ -216,10 +241,10 @@ class ConcurrentCursor(Cursor):
|
|
216
241
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
217
242
|
|
218
243
|
def close_partition(self, partition: Partition) -> None:
|
219
|
-
slice_count_before = len(self.
|
244
|
+
slice_count_before = len(self.state.get("slices", []))
|
220
245
|
self._add_slice_to_state(partition)
|
221
246
|
if slice_count_before < len(
|
222
|
-
self.
|
247
|
+
self.state["slices"]
|
223
248
|
): # only emit if at least one slice has been processed
|
224
249
|
self._merge_partitions()
|
225
250
|
self._emit_state_message()
|
@@ -231,11 +256,11 @@ class ConcurrentCursor(Cursor):
|
|
231
256
|
)
|
232
257
|
|
233
258
|
if self._slice_boundary_fields:
|
234
|
-
if "slices" not in self.
|
259
|
+
if "slices" not in self.state:
|
235
260
|
raise RuntimeError(
|
236
261
|
f"The state for stream {self._stream_name} should have at least one slice to delineate the sync start time, but no slices are present. This is unexpected. Please contact Support."
|
237
262
|
)
|
238
|
-
self.
|
263
|
+
self.state["slices"].append(
|
239
264
|
{
|
240
265
|
self._connector_state_converter.START_KEY: self._extract_from_slice(
|
241
266
|
partition, self._slice_boundary_fields[self._START_BOUNDARY]
|
@@ -263,7 +288,7 @@ class ConcurrentCursor(Cursor):
|
|
263
288
|
"expected. Please contact the Airbyte team."
|
264
289
|
)
|
265
290
|
|
266
|
-
self.
|
291
|
+
self.state["slices"].append(
|
267
292
|
{
|
268
293
|
self._connector_state_converter.START_KEY: self.start,
|
269
294
|
self._connector_state_converter.END_KEY: most_recent_cursor_value,
|
@@ -275,7 +300,9 @@ class ConcurrentCursor(Cursor):
|
|
275
300
|
self._connector_state_manager.update_state_for_stream(
|
276
301
|
self._stream_name,
|
277
302
|
self._stream_namespace,
|
278
|
-
self.
|
303
|
+
self._connector_state_converter.convert_to_state_message(
|
304
|
+
self._cursor_field, self.state
|
305
|
+
),
|
279
306
|
)
|
280
307
|
state_message = self._connector_state_manager.create_state_message(
|
281
308
|
self._stream_name, self._stream_namespace
|
@@ -283,9 +310,7 @@ class ConcurrentCursor(Cursor):
|
|
283
310
|
self._message_repository.emit_message(state_message)
|
284
311
|
|
285
312
|
def _merge_partitions(self) -> None:
|
286
|
-
self.
|
287
|
-
self._concurrent_state["slices"]
|
288
|
-
)
|
313
|
+
self.state["slices"] = self._connector_state_converter.merge_intervals(self.state["slices"])
|
289
314
|
|
290
315
|
def _extract_from_slice(self, partition: Partition, key: str) -> CursorValueType:
|
291
316
|
try:
|
@@ -322,42 +347,36 @@ class ConcurrentCursor(Cursor):
|
|
322
347
|
if self._start is not None and self._is_start_before_first_slice():
|
323
348
|
yield from self._split_per_slice_range(
|
324
349
|
self._start,
|
325
|
-
self.
|
350
|
+
self.state["slices"][0][self._connector_state_converter.START_KEY],
|
326
351
|
False,
|
327
352
|
)
|
328
353
|
|
329
|
-
if len(self.
|
354
|
+
if len(self.state["slices"]) == 1:
|
330
355
|
yield from self._split_per_slice_range(
|
331
356
|
self._calculate_lower_boundary_of_last_slice(
|
332
|
-
self.
|
357
|
+
self.state["slices"][0][self._connector_state_converter.END_KEY]
|
333
358
|
),
|
334
359
|
self._end_provider(),
|
335
360
|
True,
|
336
361
|
)
|
337
|
-
elif len(self.
|
338
|
-
for i in range(len(self.
|
362
|
+
elif len(self.state["slices"]) > 1:
|
363
|
+
for i in range(len(self.state["slices"]) - 1):
|
339
364
|
if self._cursor_granularity:
|
340
365
|
yield from self._split_per_slice_range(
|
341
|
-
self.
|
366
|
+
self.state["slices"][i][self._connector_state_converter.END_KEY]
|
342
367
|
+ self._cursor_granularity,
|
343
|
-
self.
|
344
|
-
self._connector_state_converter.START_KEY
|
345
|
-
],
|
368
|
+
self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
|
346
369
|
False,
|
347
370
|
)
|
348
371
|
else:
|
349
372
|
yield from self._split_per_slice_range(
|
350
|
-
self.
|
351
|
-
|
352
|
-
],
|
353
|
-
self._concurrent_state["slices"][i + 1][
|
354
|
-
self._connector_state_converter.START_KEY
|
355
|
-
],
|
373
|
+
self.state["slices"][i][self._connector_state_converter.END_KEY],
|
374
|
+
self.state["slices"][i + 1][self._connector_state_converter.START_KEY],
|
356
375
|
False,
|
357
376
|
)
|
358
377
|
yield from self._split_per_slice_range(
|
359
378
|
self._calculate_lower_boundary_of_last_slice(
|
360
|
-
self.
|
379
|
+
self.state["slices"][-1][self._connector_state_converter.END_KEY]
|
361
380
|
),
|
362
381
|
self._end_provider(),
|
363
382
|
True,
|
@@ -368,8 +387,7 @@ class ConcurrentCursor(Cursor):
|
|
368
387
|
def _is_start_before_first_slice(self) -> bool:
|
369
388
|
return (
|
370
389
|
self._start is not None
|
371
|
-
and self._start
|
372
|
-
< self._concurrent_state["slices"][0][self._connector_state_converter.START_KEY]
|
390
|
+
and self._start < self.state["slices"][0][self._connector_state_converter.START_KEY]
|
373
391
|
)
|
374
392
|
|
375
393
|
def _calculate_lower_boundary_of_last_slice(
|
@@ -390,12 +408,10 @@ class ConcurrentCursor(Cursor):
|
|
390
408
|
|
391
409
|
lower = max(lower, self._start) if self._start else lower
|
392
410
|
if not self._slice_range or self._evaluate_upper_safely(lower, self._slice_range) >= upper:
|
393
|
-
clamped_lower = self._clamping_strategy.clamp(lower)
|
394
|
-
clamped_upper = self._clamping_strategy.clamp(upper)
|
395
411
|
start_value, end_value = (
|
396
|
-
(
|
412
|
+
(lower, upper - self._cursor_granularity)
|
397
413
|
if self._cursor_granularity and not upper_is_end
|
398
|
-
else (
|
414
|
+
else (lower, upper)
|
399
415
|
)
|
400
416
|
yield StreamSlice(
|
401
417
|
partition={},
|
@@ -417,21 +433,11 @@ class ConcurrentCursor(Cursor):
|
|
417
433
|
)
|
418
434
|
has_reached_upper_boundary = current_upper_boundary >= upper
|
419
435
|
|
420
|
-
clamped_upper = (
|
421
|
-
self._clamping_strategy.clamp(current_upper_boundary)
|
422
|
-
if current_upper_boundary != upper
|
423
|
-
else current_upper_boundary
|
424
|
-
)
|
425
|
-
clamped_lower = self._clamping_strategy.clamp(current_lower_boundary)
|
426
|
-
if clamped_lower >= clamped_upper:
|
427
|
-
# clamping collapsed both values which means that it is time to stop processing
|
428
|
-
# FIXME should this be replace by proper end_provider
|
429
|
-
break
|
430
436
|
start_value, end_value = (
|
431
|
-
(
|
437
|
+
(current_lower_boundary, current_upper_boundary - self._cursor_granularity)
|
432
438
|
if self._cursor_granularity
|
433
439
|
and (not upper_is_end or not has_reached_upper_boundary)
|
434
|
-
else (
|
440
|
+
else (current_lower_boundary, current_upper_boundary)
|
435
441
|
)
|
436
442
|
yield StreamSlice(
|
437
443
|
partition={},
|
@@ -444,7 +450,7 @@ class ConcurrentCursor(Cursor):
|
|
444
450
|
]: self._connector_state_converter.output_format(end_value),
|
445
451
|
},
|
446
452
|
)
|
447
|
-
current_lower_boundary =
|
453
|
+
current_lower_boundary = current_upper_boundary
|
448
454
|
if current_upper_boundary >= upper:
|
449
455
|
stop_processing = True
|
450
456
|
|
@@ -6,6 +6,9 @@ from abc import abstractmethod
|
|
6
6
|
from datetime import datetime, timedelta, timezone
|
7
7
|
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
|
+
import pendulum
|
10
|
+
from pendulum.datetime import DateTime
|
11
|
+
|
9
12
|
# FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
|
10
13
|
# the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
|
11
14
|
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
|
@@ -14,7 +17,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
|
|
14
17
|
AbstractStreamStateConverter,
|
15
18
|
ConcurrencyCompatibleStateType,
|
16
19
|
)
|
17
|
-
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
18
20
|
|
19
21
|
|
20
22
|
class DateTimeStreamStateConverter(AbstractStreamStateConverter):
|
@@ -34,7 +36,7 @@ class DateTimeStreamStateConverter(AbstractStreamStateConverter):
|
|
34
36
|
|
35
37
|
@classmethod
|
36
38
|
def get_end_provider(cls) -> Callable[[], datetime]:
|
37
|
-
return
|
39
|
+
return lambda: datetime.now(timezone.utc)
|
38
40
|
|
39
41
|
@abstractmethod
|
40
42
|
def increment(self, timestamp: datetime) -> datetime: ...
|
@@ -134,10 +136,10 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
134
136
|
return int(timestamp.timestamp())
|
135
137
|
|
136
138
|
def parse_timestamp(self, timestamp: int) -> datetime:
|
137
|
-
dt_object =
|
138
|
-
if not isinstance(dt_object,
|
139
|
+
dt_object = pendulum.from_timestamp(timestamp)
|
140
|
+
if not isinstance(dt_object, DateTime):
|
139
141
|
raise ValueError(
|
140
|
-
f"
|
142
|
+
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
141
143
|
)
|
142
144
|
return dt_object
|
143
145
|
|
@@ -167,25 +169,14 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
167
169
|
def increment(self, timestamp: datetime) -> datetime:
|
168
170
|
return timestamp + self._cursor_granularity
|
169
171
|
|
170
|
-
def output_format(self, timestamp: datetime) ->
|
171
|
-
""
|
172
|
-
|
173
|
-
Args:
|
174
|
-
timestamp: The datetime to format.
|
175
|
-
|
176
|
-
Returns:
|
177
|
-
str: ISO8601/RFC3339 formatted string with milliseconds.
|
178
|
-
"""
|
179
|
-
dt = AirbyteDateTime.from_datetime(timestamp)
|
180
|
-
# Always include milliseconds, even if zero
|
181
|
-
millis = dt.microsecond // 1000 if dt.microsecond else 0
|
182
|
-
return f"{dt.year:04d}-{dt.month:02d}-{dt.day:02d}T{dt.hour:02d}:{dt.minute:02d}:{dt.second:02d}.{millis:03d}Z"
|
172
|
+
def output_format(self, timestamp: datetime) -> Any:
|
173
|
+
return timestamp.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
|
183
174
|
|
184
175
|
def parse_timestamp(self, timestamp: str) -> datetime:
|
185
|
-
dt_object =
|
186
|
-
if not isinstance(dt_object,
|
176
|
+
dt_object = pendulum.parse(timestamp)
|
177
|
+
if not isinstance(dt_object, DateTime):
|
187
178
|
raise ValueError(
|
188
|
-
f"
|
179
|
+
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
189
180
|
)
|
190
181
|
return dt_object
|
191
182
|
|
@@ -193,7 +184,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
193
184
|
class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
|
194
185
|
"""
|
195
186
|
Datetime State converter that emits state according to the supplied datetime format. The converter supports reading
|
196
|
-
incoming state in any valid datetime format
|
187
|
+
incoming state in any valid datetime format via Pendulum.
|
197
188
|
"""
|
198
189
|
|
199
190
|
def __init__(
|
@@ -223,17 +223,17 @@ class Stream(ABC):
|
|
223
223
|
record_counter += 1
|
224
224
|
|
225
225
|
checkpoint_interval = self.state_checkpoint_interval
|
226
|
+
checkpoint = checkpoint_reader.get_checkpoint()
|
226
227
|
if (
|
227
228
|
should_checkpoint
|
228
229
|
and checkpoint_interval
|
229
230
|
and record_counter % checkpoint_interval == 0
|
231
|
+
and checkpoint is not None
|
230
232
|
):
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
)
|
236
|
-
yield airbyte_state_message
|
233
|
+
airbyte_state_message = self._checkpoint_state(
|
234
|
+
checkpoint, state_manager=state_manager
|
235
|
+
)
|
236
|
+
yield airbyte_state_message
|
237
237
|
|
238
238
|
if internal_config.is_limit_reached(record_counter):
|
239
239
|
break
|
@@ -423,6 +423,8 @@ class HttpStream(Stream, CheckpointMixin, ABC):
|
|
423
423
|
stream_slice: Optional[Mapping[str, Any]] = None,
|
424
424
|
stream_state: Optional[Mapping[str, Any]] = None,
|
425
425
|
) -> Iterable[StreamData]:
|
426
|
+
partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
|
427
|
+
|
426
428
|
stream_state = stream_state or {}
|
427
429
|
pagination_complete = False
|
428
430
|
next_page_token = None
|
@@ -436,7 +438,6 @@ class HttpStream(Stream, CheckpointMixin, ABC):
|
|
436
438
|
|
437
439
|
cursor = self.get_cursor()
|
438
440
|
if cursor and isinstance(cursor, SubstreamResumableFullRefreshCursor):
|
439
|
-
partition, _, _ = self._extract_slice_fields(stream_slice=stream_slice)
|
440
441
|
# Substreams checkpoint state by marking an entire parent partition as completed so that on the subsequent attempt
|
441
442
|
# after a failure, completed parents are skipped and the sync can make progress
|
442
443
|
cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition))
|
@@ -4,11 +4,11 @@
|
|
4
4
|
|
5
5
|
import logging
|
6
6
|
from abc import abstractmethod
|
7
|
-
from datetime import timedelta
|
8
7
|
from json import JSONDecodeError
|
9
8
|
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
9
|
|
11
10
|
import backoff
|
11
|
+
import pendulum
|
12
12
|
import requests
|
13
13
|
from requests.auth import AuthBase
|
14
14
|
|
@@ -17,7 +17,6 @@ from airbyte_cdk.sources.http_logger import format_http_message
|
|
17
17
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
18
18
|
from airbyte_cdk.utils import AirbyteTracedException
|
19
19
|
from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
|
20
|
-
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
21
20
|
|
22
21
|
from ..exceptions import DefaultBackoffException
|
23
22
|
|
@@ -73,7 +72,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
73
72
|
|
74
73
|
def token_has_expired(self) -> bool:
|
75
74
|
"""Returns True if the token is expired"""
|
76
|
-
return
|
75
|
+
return pendulum.now() > self.get_token_expiry_date() # type: ignore # this is always a bool despite what mypy thinks
|
77
76
|
|
78
77
|
def build_refresh_request_body(self) -> Mapping[str, Any]:
|
79
78
|
"""
|
@@ -180,7 +179,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
180
179
|
self.get_expires_in_name()
|
181
180
|
]
|
182
181
|
|
183
|
-
def _parse_token_expiration_date(self, value: Union[str, int]) ->
|
182
|
+
def _parse_token_expiration_date(self, value: Union[str, int]) -> pendulum.DateTime:
|
184
183
|
"""
|
185
184
|
Return the expiration datetime of the refresh token
|
186
185
|
|
@@ -192,19 +191,9 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
192
191
|
raise ValueError(
|
193
192
|
f"Invalid token expiry date format {self.token_expiry_date_format}; a string representing the format is required."
|
194
193
|
)
|
195
|
-
|
196
|
-
return ab_datetime_parse(str(value))
|
197
|
-
except ValueError as e:
|
198
|
-
raise ValueError(f"Invalid token expiry date format: {e}")
|
194
|
+
return pendulum.from_format(str(value), self.token_expiry_date_format)
|
199
195
|
else:
|
200
|
-
|
201
|
-
# Only accept numeric values (as int/float/string) when no format specified
|
202
|
-
seconds = int(float(str(value)))
|
203
|
-
return ab_datetime_now() + timedelta(seconds=seconds)
|
204
|
-
except (ValueError, TypeError):
|
205
|
-
raise ValueError(
|
206
|
-
f"Invalid expires_in value: {value}. Expected number of seconds when no format specified."
|
207
|
-
)
|
196
|
+
return pendulum.now().add(seconds=int(float(value)))
|
208
197
|
|
209
198
|
@property
|
210
199
|
def token_expiry_is_time_of_expiration(self) -> bool:
|
@@ -255,7 +244,7 @@ class AbstractOauth2Authenticator(AuthBase):
|
|
255
244
|
"""List of requested scopes"""
|
256
245
|
|
257
246
|
@abstractmethod
|
258
|
-
def get_token_expiry_date(self) ->
|
247
|
+
def get_token_expiry_date(self) -> pendulum.DateTime:
|
259
248
|
"""Expiration date of the access token"""
|
260
249
|
|
261
250
|
@abstractmethod
|
@@ -2,10 +2,10 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from datetime import timedelta
|
6
5
|
from typing import Any, List, Mapping, Optional, Sequence, Tuple, Union
|
7
6
|
|
8
7
|
import dpath
|
8
|
+
import pendulum
|
9
9
|
|
10
10
|
from airbyte_cdk.config_observation import (
|
11
11
|
create_connector_config_control_message,
|
@@ -15,11 +15,6 @@ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
|
15
15
|
from airbyte_cdk.sources.streams.http.requests_native_auth.abstract_oauth import (
|
16
16
|
AbstractOauth2Authenticator,
|
17
17
|
)
|
18
|
-
from airbyte_cdk.utils.datetime_helpers import (
|
19
|
-
AirbyteDateTime,
|
20
|
-
ab_datetime_now,
|
21
|
-
ab_datetime_parse,
|
22
|
-
)
|
23
18
|
|
24
19
|
|
25
20
|
class Oauth2Authenticator(AbstractOauth2Authenticator):
|
@@ -39,7 +34,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
39
34
|
client_secret_name: str = "client_secret",
|
40
35
|
refresh_token_name: str = "refresh_token",
|
41
36
|
scopes: List[str] | None = None,
|
42
|
-
token_expiry_date:
|
37
|
+
token_expiry_date: pendulum.DateTime | None = None,
|
43
38
|
token_expiry_date_format: str | None = None,
|
44
39
|
access_token_name: str = "access_token",
|
45
40
|
expires_in_name: str = "expires_in",
|
@@ -67,7 +62,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
67
62
|
self._grant_type_name = grant_type_name
|
68
63
|
self._grant_type = grant_type
|
69
64
|
|
70
|
-
self._token_expiry_date = token_expiry_date or (
|
65
|
+
self._token_expiry_date = token_expiry_date or pendulum.now().subtract(days=1) # type: ignore [no-untyped-call]
|
71
66
|
self._token_expiry_date_format = token_expiry_date_format
|
72
67
|
self._token_expiry_is_time_of_expiration = token_expiry_is_time_of_expiration
|
73
68
|
self._access_token = None
|
@@ -100,16 +95,16 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
100
95
|
return self._access_token_name
|
101
96
|
|
102
97
|
def get_scopes(self) -> list[str]:
|
103
|
-
return self._scopes # type: ignore[return-value]
|
98
|
+
return self._scopes # type: ignore [return-value]
|
104
99
|
|
105
100
|
def get_expires_in_name(self) -> str:
|
106
101
|
return self._expires_in_name
|
107
102
|
|
108
103
|
def get_refresh_request_body(self) -> Mapping[str, Any]:
|
109
|
-
return self._refresh_request_body # type: ignore[return-value]
|
104
|
+
return self._refresh_request_body # type: ignore [return-value]
|
110
105
|
|
111
106
|
def get_refresh_request_headers(self) -> Mapping[str, Any]:
|
112
|
-
return self._refresh_request_headers # type: ignore[return-value]
|
107
|
+
return self._refresh_request_headers # type: ignore [return-value]
|
113
108
|
|
114
109
|
def get_grant_type_name(self) -> str:
|
115
110
|
return self._grant_type_name
|
@@ -117,7 +112,7 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
117
112
|
def get_grant_type(self) -> str:
|
118
113
|
return self._grant_type
|
119
114
|
|
120
|
-
def get_token_expiry_date(self) ->
|
115
|
+
def get_token_expiry_date(self) -> pendulum.DateTime:
|
121
116
|
return self._token_expiry_date
|
122
117
|
|
123
118
|
def set_token_expiry_date(self, value: Union[str, int]) -> None:
|
@@ -133,11 +128,11 @@ class Oauth2Authenticator(AbstractOauth2Authenticator):
|
|
133
128
|
|
134
129
|
@property
|
135
130
|
def access_token(self) -> str:
|
136
|
-
return self._access_token # type: ignore[return-value]
|
131
|
+
return self._access_token # type: ignore [return-value]
|
137
132
|
|
138
133
|
@access_token.setter
|
139
134
|
def access_token(self, value: str) -> None:
|
140
|
-
self._access_token = value # type: ignore[assignment] # Incorrect type for assignment
|
135
|
+
self._access_token = value # type: ignore [assignment] # Incorrect type for assignment
|
141
136
|
|
142
137
|
|
143
138
|
class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
@@ -197,15 +192,15 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
197
192
|
message_repository (MessageRepository): the message repository used to emit logs on HTTP requests and control message on config update
|
198
193
|
"""
|
199
194
|
self._client_id = (
|
200
|
-
client_id # type: ignore[assignment] # Incorrect type for assignment
|
195
|
+
client_id # type: ignore [assignment] # Incorrect type for assignment
|
201
196
|
if client_id is not None
|
202
|
-
else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore[arg-type]
|
197
|
+
else dpath.get(connector_config, ("credentials", "client_id")) # type: ignore [arg-type]
|
203
198
|
)
|
204
199
|
self._client_secret = (
|
205
|
-
client_secret # type: ignore[assignment] # Incorrect type for assignment
|
200
|
+
client_secret # type: ignore [assignment] # Incorrect type for assignment
|
206
201
|
if client_secret is not None
|
207
202
|
else dpath.get(
|
208
|
-
connector_config, # type: ignore[arg-type]
|
203
|
+
connector_config, # type: ignore [arg-type]
|
209
204
|
("credentials", "client_secret"),
|
210
205
|
)
|
211
206
|
)
|
@@ -253,8 +248,8 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
253
248
|
|
254
249
|
@property
|
255
250
|
def access_token(self) -> str:
|
256
|
-
return dpath.get( # type: ignore[return-value]
|
257
|
-
self._connector_config, # type: ignore[arg-type]
|
251
|
+
return dpath.get( # type: ignore [return-value]
|
252
|
+
self._connector_config, # type: ignore [arg-type]
|
258
253
|
self._access_token_config_path,
|
259
254
|
default="",
|
260
255
|
)
|
@@ -262,63 +257,56 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
262
257
|
@access_token.setter
|
263
258
|
def access_token(self, new_access_token: str) -> None:
|
264
259
|
dpath.new(
|
265
|
-
self._connector_config, # type: ignore[arg-type]
|
260
|
+
self._connector_config, # type: ignore [arg-type]
|
266
261
|
self._access_token_config_path,
|
267
262
|
new_access_token,
|
268
263
|
)
|
269
264
|
|
270
265
|
def get_refresh_token(self) -> str:
|
271
|
-
return dpath.get( # type: ignore[return-value]
|
272
|
-
self._connector_config, # type: ignore[arg-type]
|
266
|
+
return dpath.get( # type: ignore [return-value]
|
267
|
+
self._connector_config, # type: ignore [arg-type]
|
273
268
|
self._refresh_token_config_path,
|
274
269
|
default="",
|
275
270
|
)
|
276
271
|
|
277
272
|
def set_refresh_token(self, new_refresh_token: str) -> None:
|
278
273
|
dpath.new(
|
279
|
-
self._connector_config, # type: ignore[arg-type]
|
274
|
+
self._connector_config, # type: ignore [arg-type]
|
280
275
|
self._refresh_token_config_path,
|
281
276
|
new_refresh_token,
|
282
277
|
)
|
283
278
|
|
284
|
-
def get_token_expiry_date(self) ->
|
279
|
+
def get_token_expiry_date(self) -> pendulum.DateTime:
|
285
280
|
expiry_date = dpath.get(
|
286
|
-
self._connector_config, # type: ignore[arg-type]
|
281
|
+
self._connector_config, # type: ignore [arg-type]
|
287
282
|
self._token_expiry_date_config_path,
|
288
283
|
default="",
|
289
284
|
)
|
290
|
-
|
291
|
-
ab_datetime_now() - timedelta(days=1)
|
292
|
-
if expiry_date == ""
|
293
|
-
else ab_datetime_parse(str(expiry_date))
|
294
|
-
)
|
295
|
-
if isinstance(result, AirbyteDateTime):
|
296
|
-
return result
|
297
|
-
raise TypeError("Invalid datetime conversion")
|
285
|
+
return pendulum.now().subtract(days=1) if expiry_date == "" else pendulum.parse(expiry_date) # type: ignore [arg-type, return-value, no-untyped-call]
|
298
286
|
|
299
287
|
def set_token_expiry_date( # type: ignore[override]
|
300
288
|
self,
|
301
|
-
new_token_expiry_date:
|
289
|
+
new_token_expiry_date: pendulum.DateTime,
|
302
290
|
) -> None:
|
303
291
|
dpath.new(
|
304
|
-
self._connector_config, # type: ignore[arg-type]
|
292
|
+
self._connector_config, # type: ignore [arg-type]
|
305
293
|
self._token_expiry_date_config_path,
|
306
294
|
str(new_token_expiry_date),
|
307
295
|
)
|
308
296
|
|
309
297
|
def token_has_expired(self) -> bool:
|
310
298
|
"""Returns True if the token is expired"""
|
311
|
-
return
|
299
|
+
return pendulum.now("UTC") > self.get_token_expiry_date()
|
312
300
|
|
313
301
|
@staticmethod
|
314
302
|
def get_new_token_expiry_date(
|
315
303
|
access_token_expires_in: str,
|
316
304
|
token_expiry_date_format: str | None = None,
|
317
|
-
) ->
|
305
|
+
) -> pendulum.DateTime:
|
318
306
|
if token_expiry_date_format:
|
319
|
-
return
|
307
|
+
return pendulum.from_format(access_token_expires_in, token_expiry_date_format)
|
320
308
|
else:
|
321
|
-
return
|
309
|
+
return pendulum.now("UTC").add(seconds=int(access_token_expires_in))
|
322
310
|
|
323
311
|
def get_access_token(self) -> str:
|
324
312
|
"""Retrieve new access and refresh token if the access token has expired.
|
@@ -330,7 +318,7 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
330
318
|
new_access_token, access_token_expires_in, new_refresh_token = (
|
331
319
|
self.refresh_access_token()
|
332
320
|
)
|
333
|
-
new_token_expiry_date:
|
321
|
+
new_token_expiry_date: pendulum.DateTime = self.get_new_token_expiry_date(
|
334
322
|
access_token_expires_in, self._token_expiry_date_format
|
335
323
|
)
|
336
324
|
self.access_token = new_access_token
|
@@ -341,10 +329,10 @@ class SingleUseRefreshTokenOauth2Authenticator(Oauth2Authenticator):
|
|
341
329
|
# message directly in the console, this is needed
|
342
330
|
if not isinstance(self._message_repository, NoopMessageRepository):
|
343
331
|
self._message_repository.emit_message(
|
344
|
-
create_connector_config_control_message(self._connector_config) # type: ignore[arg-type]
|
332
|
+
create_connector_config_control_message(self._connector_config) # type: ignore [arg-type]
|
345
333
|
)
|
346
334
|
else:
|
347
|
-
emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore[arg-type]
|
335
|
+
emit_configuration_as_airbyte_control_message(self._connector_config) # type: ignore [arg-type]
|
348
336
|
return self.access_token
|
349
337
|
|
350
338
|
def refresh_access_token( # type: ignore[override] # Signature doesn't match base class
|
airbyte_cdk/sources/types.py
CHANGED
@@ -6,7 +6,7 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
8
8
|
|
9
|
-
|
9
|
+
import orjson
|
10
10
|
|
11
11
|
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
12
12
|
# "hello"}] returns "hello"
|
@@ -151,9 +151,7 @@ class StreamSlice(Mapping[str, Any]):
|
|
151
151
|
return self._stream_slice
|
152
152
|
|
153
153
|
def __hash__(self) -> int:
|
154
|
-
return
|
155
|
-
stream_slice=self._stream_slice
|
156
|
-
) # no need to provide stream_name here as this is used for slicing the cursor
|
154
|
+
return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
|
157
155
|
|
158
156
|
def __bool__(self) -> bool:
|
159
157
|
return bool(self._stream_slice) or bool(self._extra_fields)
|