airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
- airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
- airbyte_cdk/sources/types.py +4 -2
- airbyte_cdk/sources/utils/transform.py +23 -2
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/mapping_helpers.py +27 -86
- airbyte_cdk/utils/slice_hasher.py +8 -1
- airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,499 @@
|
|
1
|
+
"""Provides consistent datetime handling across Airbyte with ISO8601/RFC3339 compliance.
|
2
|
+
|
3
|
+
Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
4
|
+
|
5
|
+
This module provides a custom datetime class (AirbyteDateTime) and helper functions that ensure
|
6
|
+
consistent datetime handling across Airbyte. All datetime strings are formatted according to
|
7
|
+
ISO8601/RFC3339 standards with 'T' delimiter and '+00:00' for UTC timezone.
|
8
|
+
|
9
|
+
Key Features:
|
10
|
+
- Timezone-aware datetime objects (defaults to UTC)
|
11
|
+
- ISO8601/RFC3339 compliant string formatting
|
12
|
+
- Consistent parsing of various datetime formats
|
13
|
+
- Support for Unix timestamps and milliseconds
|
14
|
+
- Type-safe datetime arithmetic with timedelta
|
15
|
+
|
16
|
+
## Basic Usage
|
17
|
+
|
18
|
+
```python
|
19
|
+
from airbyte_cdk.utils.datetime_helpers import AirbyteDateTime, ab_datetime_now, ab_datetime_parse
|
20
|
+
from datetime import timedelta, timezone
|
21
|
+
|
22
|
+
## Current time in UTC
|
23
|
+
now = ab_datetime_now()
|
24
|
+
print(now) # 2023-03-14T15:09:26.535897Z
|
25
|
+
|
26
|
+
# Parse various datetime formats
|
27
|
+
dt = ab_datetime_parse("2023-03-14T15:09:26Z") # ISO8601/RFC3339
|
28
|
+
dt = ab_datetime_parse("2023-03-14") # Date only (assumes midnight UTC)
|
29
|
+
dt = ab_datetime_parse(1678806566) # Unix timestamp
|
30
|
+
|
31
|
+
## Create with explicit timezone
|
32
|
+
dt = AirbyteDateTime(2023, 3, 14, 15, 9, 26, tzinfo=timezone.utc)
|
33
|
+
print(dt) # 2023-03-14T15:09:26+00:00
|
34
|
+
|
35
|
+
# Datetime arithmetic with timedelta
|
36
|
+
tomorrow = dt + timedelta(days=1)
|
37
|
+
yesterday = dt - timedelta(days=1)
|
38
|
+
time_diff = tomorrow - yesterday # timedelta object
|
39
|
+
```
|
40
|
+
|
41
|
+
## Millisecond Timestamp Handling
|
42
|
+
|
43
|
+
```python
|
44
|
+
# Convert to millisecond timestamp
|
45
|
+
dt = ab_datetime_parse("2023-03-14T15:09:26Z")
|
46
|
+
ms = dt.to_epoch_millis() # 1678806566000
|
47
|
+
|
48
|
+
# Create from millisecond timestamp
|
49
|
+
dt = AirbyteDateTime.from_epoch_millis(1678806566000)
|
50
|
+
print(dt) # 2023-03-14T15:09:26Z
|
51
|
+
```
|
52
|
+
|
53
|
+
## Timezone Handling
|
54
|
+
|
55
|
+
```python
|
56
|
+
# Create with non-UTC timezone
|
57
|
+
tz = timezone(timedelta(hours=-4)) # EDT
|
58
|
+
dt = AirbyteDateTime(2023, 3, 14, 15, 9, 26, tzinfo=tz)
|
59
|
+
print(dt) # 2023-03-14T15:09:26-04:00
|
60
|
+
|
61
|
+
## Parse with timezone
|
62
|
+
dt = ab_datetime_parse("2023-03-14T15:09:26-04:00")
|
63
|
+
print(dt) # 2023-03-14T15:09:26-04:00
|
64
|
+
|
65
|
+
## Naive datetimes are automatically converted to UTC
|
66
|
+
dt = ab_datetime_parse("2023-03-14T15:09:26")
|
67
|
+
print(dt) # 2023-03-14T15:09:26Z
|
68
|
+
```
|
69
|
+
|
70
|
+
# Format Validation
|
71
|
+
|
72
|
+
```python
|
73
|
+
from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse
|
74
|
+
|
75
|
+
# Validate ISO8601/RFC3339 format
|
76
|
+
assert ab_datetime_try_parse("2023-03-14T15:09:26Z") # Basic UTC format
|
77
|
+
assert ab_datetime_try_parse("2023-03-14T15:09:26-04:00") # With timezone offset
|
78
|
+
assert ab_datetime_try_parse("2023-03-14T15:09:26+00:00") # With explicit UTC offset
|
79
|
+
assert ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing T delimiter but still parsable
|
80
|
+
assert not ab_datetime_try_parse("foo") # Invalid: not parsable, returns `None`
|
81
|
+
```
|
82
|
+
"""
|
83
|
+
|
84
|
+
from datetime import datetime, timedelta, timezone
|
85
|
+
from typing import Any, Optional, Union, overload
|
86
|
+
|
87
|
+
from dateutil import parser
|
88
|
+
from typing_extensions import Never
|
89
|
+
from whenever import Instant, LocalDateTime, ZonedDateTime
|
90
|
+
|
91
|
+
|
92
|
+
class AirbyteDateTime(datetime):
|
93
|
+
"""A timezone-aware datetime class with ISO8601/RFC3339 string representation and operator overloading.
|
94
|
+
|
95
|
+
This class extends the standard datetime class to provide consistent timezone handling
|
96
|
+
(defaulting to UTC) and ISO8601/RFC3339 compliant string formatting. It also supports
|
97
|
+
operator overloading for datetime arithmetic with timedelta objects.
|
98
|
+
|
99
|
+
Example:
|
100
|
+
>>> dt = AirbyteDateTime(2023, 3, 14, 15, 9, 26, tzinfo=timezone.utc)
|
101
|
+
>>> str(dt)
|
102
|
+
'2023-03-14T15:09:26+00:00'
|
103
|
+
>>> dt + timedelta(hours=1)
|
104
|
+
'2023-03-14T16:09:26+00:00'
|
105
|
+
"""
|
106
|
+
|
107
|
+
def __new__(cls, *args: Any, **kwargs: Any) -> "AirbyteDateTime":
|
108
|
+
"""Creates a new timezone-aware AirbyteDateTime instance.
|
109
|
+
|
110
|
+
Ensures all instances are timezone-aware by defaulting to UTC if no timezone is provided.
|
111
|
+
|
112
|
+
Returns:
|
113
|
+
AirbyteDateTime: A new timezone-aware datetime instance.
|
114
|
+
"""
|
115
|
+
self = super().__new__(cls, *args, **kwargs)
|
116
|
+
if self.tzinfo is None:
|
117
|
+
return self.replace(tzinfo=timezone.utc)
|
118
|
+
return self
|
119
|
+
|
120
|
+
@classmethod
|
121
|
+
def from_datetime(cls, dt: datetime) -> "AirbyteDateTime":
|
122
|
+
"""Converts a standard datetime to AirbyteDateTime.
|
123
|
+
|
124
|
+
Args:
|
125
|
+
dt: A standard datetime object to convert.
|
126
|
+
|
127
|
+
Returns:
|
128
|
+
AirbyteDateTime: A new timezone-aware AirbyteDateTime instance.
|
129
|
+
"""
|
130
|
+
return cls(
|
131
|
+
dt.year,
|
132
|
+
dt.month,
|
133
|
+
dt.day,
|
134
|
+
dt.hour,
|
135
|
+
dt.minute,
|
136
|
+
dt.second,
|
137
|
+
dt.microsecond,
|
138
|
+
dt.tzinfo or timezone.utc,
|
139
|
+
)
|
140
|
+
|
141
|
+
def to_datetime(self) -> datetime:
|
142
|
+
"""Converts this AirbyteDateTime to a standard datetime object.
|
143
|
+
|
144
|
+
Today, this just returns `self` because AirbyteDateTime is a subclass of `datetime`.
|
145
|
+
In the future, we may modify our internal representation to use a different base class.
|
146
|
+
"""
|
147
|
+
return self
|
148
|
+
|
149
|
+
def __str__(self) -> str:
|
150
|
+
"""Returns the datetime in ISO8601/RFC3339 format with 'T' delimiter.
|
151
|
+
|
152
|
+
Ensures consistent string representation with timezone, using '+00:00' for UTC.
|
153
|
+
Preserves full microsecond precision when present, omits when zero.
|
154
|
+
|
155
|
+
Returns:
|
156
|
+
str: ISO8601/RFC3339 formatted string.
|
157
|
+
"""
|
158
|
+
aware_self = self if self.tzinfo else self.replace(tzinfo=timezone.utc)
|
159
|
+
return aware_self.isoformat(sep="T", timespec="auto")
|
160
|
+
|
161
|
+
def __repr__(self) -> str:
|
162
|
+
"""Returns the same string representation as __str__ for consistency.
|
163
|
+
|
164
|
+
Returns:
|
165
|
+
str: ISO8601/RFC3339 formatted string.
|
166
|
+
"""
|
167
|
+
return self.__str__()
|
168
|
+
|
169
|
+
def add(self, delta: timedelta) -> "AirbyteDateTime":
|
170
|
+
"""Add a timedelta interval to this datetime.
|
171
|
+
|
172
|
+
This method provides a more explicit alternative to the + operator
|
173
|
+
for adding time intervals to datetimes.
|
174
|
+
|
175
|
+
Args:
|
176
|
+
delta: The timedelta interval to add.
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
AirbyteDateTime: A new datetime with the interval added.
|
180
|
+
|
181
|
+
Example:
|
182
|
+
>>> dt = AirbyteDateTime(2023, 3, 14, tzinfo=timezone.utc)
|
183
|
+
>>> dt.add(timedelta(hours=1))
|
184
|
+
'2023-03-14T01:00:00Z'
|
185
|
+
"""
|
186
|
+
return self + delta
|
187
|
+
|
188
|
+
def subtract(self, delta: timedelta) -> "AirbyteDateTime":
|
189
|
+
"""Subtract a timedelta interval from this datetime.
|
190
|
+
|
191
|
+
This method provides a more explicit alternative to the - operator
|
192
|
+
for subtracting time intervals from datetimes.
|
193
|
+
|
194
|
+
Args:
|
195
|
+
delta: The timedelta interval to subtract.
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
AirbyteDateTime: A new datetime with the interval subtracted.
|
199
|
+
|
200
|
+
Example:
|
201
|
+
>>> dt = AirbyteDateTime(2023, 3, 14, tzinfo=timezone.utc)
|
202
|
+
>>> dt.subtract(timedelta(hours=1))
|
203
|
+
'2023-03-13T23:00:00Z'
|
204
|
+
"""
|
205
|
+
result = super().__sub__(delta)
|
206
|
+
if isinstance(result, datetime):
|
207
|
+
return AirbyteDateTime.from_datetime(result)
|
208
|
+
raise TypeError("Invalid operation")
|
209
|
+
|
210
|
+
def __add__(self, other: timedelta) -> "AirbyteDateTime":
|
211
|
+
"""Adds a timedelta to this datetime.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
other: A timedelta object to add.
|
215
|
+
|
216
|
+
Returns:
|
217
|
+
AirbyteDateTime: A new datetime with the timedelta added.
|
218
|
+
|
219
|
+
Raises:
|
220
|
+
TypeError: If other is not a timedelta.
|
221
|
+
"""
|
222
|
+
result = super().__add__(other)
|
223
|
+
if isinstance(result, datetime):
|
224
|
+
return AirbyteDateTime.from_datetime(result)
|
225
|
+
raise TypeError("Invalid operation")
|
226
|
+
|
227
|
+
def __radd__(self, other: timedelta) -> "AirbyteDateTime":
|
228
|
+
"""Supports timedelta + AirbyteDateTime operation.
|
229
|
+
|
230
|
+
Args:
|
231
|
+
other: A timedelta object to add.
|
232
|
+
|
233
|
+
Returns:
|
234
|
+
AirbyteDateTime: A new datetime with the timedelta added.
|
235
|
+
|
236
|
+
Raises:
|
237
|
+
TypeError: If other is not a timedelta.
|
238
|
+
"""
|
239
|
+
return self.__add__(other)
|
240
|
+
|
241
|
+
@overload # type: ignore[override]
|
242
|
+
def __sub__(self, other: timedelta) -> "AirbyteDateTime": ...
|
243
|
+
|
244
|
+
@overload # type: ignore[override]
|
245
|
+
def __sub__(self, other: Union[datetime, "AirbyteDateTime"]) -> timedelta: ...
|
246
|
+
|
247
|
+
def __sub__(
|
248
|
+
self, other: Union[datetime, "AirbyteDateTime", timedelta]
|
249
|
+
) -> Union[timedelta, "AirbyteDateTime"]: # type: ignore[override]
|
250
|
+
"""Subtracts a datetime, AirbyteDateTime, or timedelta from this datetime.
|
251
|
+
|
252
|
+
Args:
|
253
|
+
other: A datetime, AirbyteDateTime, or timedelta object to subtract.
|
254
|
+
|
255
|
+
Returns:
|
256
|
+
Union[timedelta, AirbyteDateTime]: A timedelta if subtracting datetime/AirbyteDateTime,
|
257
|
+
or a new datetime if subtracting timedelta.
|
258
|
+
|
259
|
+
Raises:
|
260
|
+
TypeError: If other is not a datetime, AirbyteDateTime, or timedelta.
|
261
|
+
"""
|
262
|
+
if isinstance(other, timedelta):
|
263
|
+
result = super().__sub__(other) # type: ignore[call-overload]
|
264
|
+
if isinstance(result, datetime):
|
265
|
+
return AirbyteDateTime.from_datetime(result)
|
266
|
+
elif isinstance(other, (datetime, AirbyteDateTime)):
|
267
|
+
result = super().__sub__(other) # type: ignore[call-overload]
|
268
|
+
if isinstance(result, timedelta):
|
269
|
+
return result
|
270
|
+
raise TypeError(
|
271
|
+
f"unsupported operand type(s) for -: '{type(self).__name__}' and '{type(other).__name__}'"
|
272
|
+
)
|
273
|
+
|
274
|
+
def __rsub__(self, other: datetime) -> timedelta:
|
275
|
+
"""Supports datetime - AirbyteDateTime operation.
|
276
|
+
|
277
|
+
Args:
|
278
|
+
other: A datetime object.
|
279
|
+
|
280
|
+
Returns:
|
281
|
+
timedelta: The time difference between the datetimes.
|
282
|
+
|
283
|
+
Raises:
|
284
|
+
TypeError: If other is not a datetime.
|
285
|
+
"""
|
286
|
+
if not isinstance(other, datetime):
|
287
|
+
return NotImplemented
|
288
|
+
result = other - datetime(
|
289
|
+
self.year,
|
290
|
+
self.month,
|
291
|
+
self.day,
|
292
|
+
self.hour,
|
293
|
+
self.minute,
|
294
|
+
self.second,
|
295
|
+
self.microsecond,
|
296
|
+
self.tzinfo,
|
297
|
+
)
|
298
|
+
if isinstance(result, timedelta):
|
299
|
+
return result
|
300
|
+
raise TypeError("Invalid operation")
|
301
|
+
|
302
|
+
def to_epoch_millis(self) -> int:
|
303
|
+
"""Return the Unix timestamp in milliseconds for this datetime.
|
304
|
+
|
305
|
+
Returns:
|
306
|
+
int: Number of milliseconds since Unix epoch (January 1, 1970).
|
307
|
+
|
308
|
+
Example:
|
309
|
+
>>> dt = AirbyteDateTime(2023, 3, 14, 15, 9, 26, tzinfo=timezone.utc)
|
310
|
+
>>> dt.to_epoch_millis()
|
311
|
+
1678806566000
|
312
|
+
"""
|
313
|
+
return int(self.timestamp() * 1000)
|
314
|
+
|
315
|
+
@classmethod
|
316
|
+
def from_epoch_millis(cls, milliseconds: int) -> "AirbyteDateTime":
|
317
|
+
"""Create an AirbyteDateTime from Unix timestamp in milliseconds.
|
318
|
+
|
319
|
+
Args:
|
320
|
+
milliseconds: Number of milliseconds since Unix epoch (January 1, 1970).
|
321
|
+
|
322
|
+
Returns:
|
323
|
+
AirbyteDateTime: A new timezone-aware datetime instance (UTC).
|
324
|
+
|
325
|
+
Example:
|
326
|
+
>>> dt = AirbyteDateTime.from_epoch_millis(1678806566000)
|
327
|
+
>>> str(dt)
|
328
|
+
'2023-03-14T15:09:26+00:00'
|
329
|
+
"""
|
330
|
+
return cls.fromtimestamp(milliseconds / 1000.0, timezone.utc)
|
331
|
+
|
332
|
+
@classmethod
|
333
|
+
def from_str(cls, dt_str: str) -> "AirbyteDateTime":
|
334
|
+
"""Thin convenience wrapper around `ab_datetime_parse()`.
|
335
|
+
|
336
|
+
This method attempts to create a new `AirbyteDateTime` using all available parsing
|
337
|
+
strategies.
|
338
|
+
|
339
|
+
Raises:
|
340
|
+
ValueError: If the value cannot be parsed into a valid datetime object.
|
341
|
+
"""
|
342
|
+
return ab_datetime_parse(dt_str)
|
343
|
+
|
344
|
+
|
345
|
+
def ab_datetime_now() -> AirbyteDateTime:
|
346
|
+
"""Returns the current time as an AirbyteDateTime in UTC timezone.
|
347
|
+
|
348
|
+
Previously named: now()
|
349
|
+
|
350
|
+
Returns:
|
351
|
+
AirbyteDateTime: Current UTC time.
|
352
|
+
|
353
|
+
Example:
|
354
|
+
>>> dt = ab_datetime_now()
|
355
|
+
>>> str(dt) # Returns current time in ISO8601/RFC3339
|
356
|
+
'2023-03-14T15:09:26.535897Z'
|
357
|
+
"""
|
358
|
+
return AirbyteDateTime.from_datetime(datetime.now(timezone.utc))
|
359
|
+
|
360
|
+
|
361
|
+
def ab_datetime_parse(dt_str: str | int) -> AirbyteDateTime:
|
362
|
+
"""Parses a datetime string or timestamp into an AirbyteDateTime with timezone awareness.
|
363
|
+
|
364
|
+
This implementation is as flexible as possible to handle various datetime formats.
|
365
|
+
Always returns a timezone-aware datetime (defaults to UTC if no timezone specified).
|
366
|
+
|
367
|
+
Handles:
|
368
|
+
- ISO8601/RFC3339 format strings (with ' ' or 'T' delimiter)
|
369
|
+
- Unix timestamps (as integers or strings)
|
370
|
+
- Date-only strings (YYYY-MM-DD)
|
371
|
+
- Timezone-aware formats (+00:00 for UTC, or ±HH:MM offset)
|
372
|
+
- Anything that can be parsed by `dateutil.parser.parse()`
|
373
|
+
|
374
|
+
Args:
|
375
|
+
dt_str: A datetime string in ISO8601/RFC3339 format, Unix timestamp (int/str),
|
376
|
+
or other recognizable datetime format.
|
377
|
+
|
378
|
+
Returns:
|
379
|
+
AirbyteDateTime: A timezone-aware datetime object.
|
380
|
+
|
381
|
+
Raises:
|
382
|
+
ValueError: If the input cannot be parsed as a valid datetime.
|
383
|
+
|
384
|
+
Example:
|
385
|
+
>>> ab_datetime_parse("2023-03-14T15:09:26+00:00")
|
386
|
+
'2023-03-14T15:09:26+00:00'
|
387
|
+
>>> ab_datetime_parse(1678806000) # Unix timestamp
|
388
|
+
'2023-03-14T15:00:00+00:00'
|
389
|
+
>>> ab_datetime_parse("2023-03-14") # Date-only
|
390
|
+
'2023-03-14T00:00:00+00:00'
|
391
|
+
"""
|
392
|
+
try:
|
393
|
+
# Handle numeric values as Unix timestamps (UTC)
|
394
|
+
if isinstance(dt_str, int) or (
|
395
|
+
isinstance(dt_str, str)
|
396
|
+
and (dt_str.isdigit() or (dt_str.startswith("-") and dt_str[1:].isdigit()))
|
397
|
+
):
|
398
|
+
timestamp = int(dt_str)
|
399
|
+
if timestamp < 0:
|
400
|
+
raise ValueError("Timestamp cannot be negative")
|
401
|
+
if len(str(abs(timestamp))) > 10:
|
402
|
+
raise ValueError("Timestamp value too large")
|
403
|
+
instant = Instant.from_timestamp(timestamp)
|
404
|
+
return AirbyteDateTime.from_datetime(instant.py_datetime())
|
405
|
+
|
406
|
+
if not isinstance(dt_str, str):
|
407
|
+
raise ValueError(
|
408
|
+
f"Could not parse datetime string: expected string or integer, got {type(dt_str)}"
|
409
|
+
)
|
410
|
+
|
411
|
+
# Handle date-only format first
|
412
|
+
if ":" not in dt_str and dt_str.count("-") == 2 and "/" not in dt_str:
|
413
|
+
try:
|
414
|
+
year, month, day = map(int, dt_str.split("-"))
|
415
|
+
if not (1 <= month <= 12 and 1 <= day <= 31):
|
416
|
+
raise ValueError(f"Invalid date format: {dt_str}")
|
417
|
+
instant = Instant.from_utc(year, month, day, 0, 0, 0)
|
418
|
+
return AirbyteDateTime.from_datetime(instant.py_datetime())
|
419
|
+
except (ValueError, TypeError):
|
420
|
+
raise ValueError(f"Invalid date format: {dt_str}")
|
421
|
+
|
422
|
+
# Reject time-only strings without date
|
423
|
+
if ":" in dt_str and dt_str.count("-") < 2 and dt_str.count("/") < 2:
|
424
|
+
raise ValueError(f"Missing date part in datetime string: {dt_str}")
|
425
|
+
|
426
|
+
# Try parsing with dateutil for timezone handling
|
427
|
+
try:
|
428
|
+
parsed = parser.parse(dt_str)
|
429
|
+
if parsed.tzinfo is None:
|
430
|
+
parsed = parsed.replace(tzinfo=timezone.utc)
|
431
|
+
|
432
|
+
return AirbyteDateTime.from_datetime(parsed)
|
433
|
+
except (ValueError, TypeError):
|
434
|
+
raise ValueError(f"Could not parse datetime string: {dt_str}")
|
435
|
+
except ValueError as e:
|
436
|
+
if "Invalid date format:" in str(e):
|
437
|
+
raise
|
438
|
+
if "Timestamp cannot be negative" in str(e):
|
439
|
+
raise
|
440
|
+
if "Timestamp value too large" in str(e):
|
441
|
+
raise
|
442
|
+
raise ValueError(f"Could not parse datetime string: {dt_str}")
|
443
|
+
|
444
|
+
|
445
|
+
def ab_datetime_try_parse(dt_str: str) -> AirbyteDateTime | None:
|
446
|
+
"""Try to parse the input as a datetime, failing gracefully instead of raising an exception.
|
447
|
+
|
448
|
+
This is a thin wrapper around `ab_datetime_parse()` that catches parsing errors and
|
449
|
+
returns `None` instead of raising an exception.
|
450
|
+
The implementation is as flexible as possible to handle various datetime formats.
|
451
|
+
Always returns a timezone-aware datetime (defaults to `UTC` if no timezone specified).
|
452
|
+
|
453
|
+
Example:
|
454
|
+
>>> ab_datetime_try_parse("2023-03-14T15:09:26Z") # Returns AirbyteDateTime
|
455
|
+
>>> ab_datetime_try_parse("2023-03-14 15:09:26Z") # Missing 'T' delimiter still parsable
|
456
|
+
>>> ab_datetime_try_parse("2023-03-14") # Returns midnight UTC time
|
457
|
+
"""
|
458
|
+
try:
|
459
|
+
return ab_datetime_parse(dt_str)
|
460
|
+
except (ValueError, TypeError):
|
461
|
+
return None
|
462
|
+
|
463
|
+
|
464
|
+
def ab_datetime_format(
|
465
|
+
dt: Union[datetime, AirbyteDateTime],
|
466
|
+
format: str | None = None,
|
467
|
+
) -> str:
|
468
|
+
"""Formats a datetime object as an ISO8601/RFC3339 string with 'T' delimiter and timezone.
|
469
|
+
|
470
|
+
Previously named: format()
|
471
|
+
|
472
|
+
Converts any datetime object to a string with 'T' delimiter and proper timezone.
|
473
|
+
If the datetime is naive (no timezone), UTC is assumed.
|
474
|
+
Uses '+00:00' for UTC timezone, otherwise keeps the original timezone offset.
|
475
|
+
|
476
|
+
Args:
|
477
|
+
dt: Any datetime object to format.
|
478
|
+
format: Optional format string. If provided, calls `strftime()` with this format.
|
479
|
+
Otherwise, uses the default ISO8601/RFC3339 format, adapted for available precision.
|
480
|
+
|
481
|
+
Returns:
|
482
|
+
str: ISO8601/RFC3339 formatted datetime string.
|
483
|
+
|
484
|
+
Example:
|
485
|
+
>>> dt = datetime(2023, 3, 14, 15, 9, 26, tzinfo=timezone.utc)
|
486
|
+
>>> ab_datetime_format(dt)
|
487
|
+
'2023-03-14T15:09:26+00:00'
|
488
|
+
"""
|
489
|
+
if isinstance(dt, AirbyteDateTime):
|
490
|
+
return str(dt)
|
491
|
+
|
492
|
+
if dt.tzinfo is None:
|
493
|
+
dt = dt.replace(tzinfo=timezone.utc)
|
494
|
+
|
495
|
+
if format:
|
496
|
+
return dt.strftime(format)
|
497
|
+
|
498
|
+
# Format with consistent timezone representation and "T" delimiter
|
499
|
+
return dt.isoformat(sep="T", timespec="auto")
|
@@ -3,102 +3,43 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
import
|
7
|
-
from typing import Any, Dict, List, Mapping, Optional, Union
|
8
|
-
|
9
|
-
|
10
|
-
def _merge_mappings(
|
11
|
-
target: Dict[str, Any],
|
12
|
-
source: Mapping[str, Any],
|
13
|
-
path: Optional[List[str]] = None,
|
14
|
-
allow_same_value_merge: bool = False,
|
15
|
-
) -> None:
|
16
|
-
"""
|
17
|
-
Recursively merge two dictionaries, raising an error if there are any conflicts.
|
18
|
-
For body_json requests (allow_same_value_merge=True), a conflict occurs only when the same path has different values.
|
19
|
-
For other request types (allow_same_value_merge=False), any duplicate key is a conflict, regardless of value.
|
20
|
-
|
21
|
-
Args:
|
22
|
-
target: The dictionary to merge into
|
23
|
-
source: The dictionary to merge from
|
24
|
-
path: The current path in the nested structure (for error messages)
|
25
|
-
allow_same_value_merge: Whether to allow merging the same value into the same key. Set to false by default, should only be true for body_json injections
|
26
|
-
"""
|
27
|
-
path = path or []
|
28
|
-
for key, source_value in source.items():
|
29
|
-
current_path = path + [str(key)]
|
30
|
-
|
31
|
-
if key in target:
|
32
|
-
target_value = target[key]
|
33
|
-
if isinstance(target_value, dict) and isinstance(source_value, dict):
|
34
|
-
# Only body_json supports nested_structures
|
35
|
-
if not allow_same_value_merge:
|
36
|
-
raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}")
|
37
|
-
# If both are dictionaries, recursively merge them
|
38
|
-
_merge_mappings(target_value, source_value, current_path, allow_same_value_merge)
|
39
|
-
|
40
|
-
elif not allow_same_value_merge or target_value != source_value:
|
41
|
-
# If same key has different values, that's a conflict
|
42
|
-
raise ValueError(f"Duplicate keys found: {'.'.join(current_path)}")
|
43
|
-
else:
|
44
|
-
# No conflict, just copy the value (using deepcopy for nested structures)
|
45
|
-
target[key] = copy.deepcopy(source_value)
|
6
|
+
from typing import Any, List, Mapping, Optional, Set, Union
|
46
7
|
|
47
8
|
|
48
9
|
def combine_mappings(
|
49
10
|
mappings: List[Optional[Union[Mapping[str, Any], str]]],
|
50
|
-
allow_same_value_merge: bool = False,
|
51
11
|
) -> Union[Mapping[str, Any], str]:
|
52
12
|
"""
|
53
|
-
Combine multiple mappings into a single mapping.
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
- Raises error if same path has different values
|
59
|
-
|
60
|
-
For other request types (allow_same_value_merge=False):
|
61
|
-
- Only supports flat structures
|
62
|
-
- Any duplicate key raises an error, regardless of value
|
63
|
-
|
64
|
-
Args:
|
65
|
-
mappings: List of mappings to combine
|
66
|
-
allow_same_value_merge: Whether to allow duplicate keys with matching values.
|
67
|
-
Should only be True for body_json requests.
|
68
|
-
|
69
|
-
Returns:
|
70
|
-
A single mapping combining all inputs, or a string if there is exactly one
|
71
|
-
string mapping and no other non-empty mappings.
|
72
|
-
|
73
|
-
Raises:
|
74
|
-
ValueError: If there are:
|
75
|
-
- Multiple string mappings
|
76
|
-
- Both a string mapping and non-empty dictionary mappings
|
77
|
-
- Conflicting keys/paths based on allow_same_value_merge setting
|
13
|
+
Combine multiple mappings into a single mapping. If any of the mappings are a string, return
|
14
|
+
that string. Raise errors in the following cases:
|
15
|
+
* If there are duplicate keys across mappings
|
16
|
+
* If there are multiple string mappings
|
17
|
+
* If there are multiple mappings containing keys and one of them is a string
|
78
18
|
"""
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
19
|
+
all_keys: List[Set[str]] = []
|
20
|
+
for part in mappings:
|
21
|
+
if part is None:
|
22
|
+
continue
|
23
|
+
keys = set(part.keys()) if not isinstance(part, str) else set()
|
24
|
+
all_keys.append(keys)
|
25
|
+
|
26
|
+
string_options = sum(isinstance(mapping, str) for mapping in mappings)
|
27
|
+
# If more than one mapping is a string, raise a ValueError
|
84
28
|
if string_options > 1:
|
85
29
|
raise ValueError("Cannot combine multiple string options")
|
86
30
|
|
87
|
-
|
88
|
-
|
89
|
-
m for m in mappings if m is not None and not (isinstance(m, Mapping) and not m)
|
90
|
-
]
|
31
|
+
if string_options == 1 and sum(len(keys) for keys in all_keys) > 0:
|
32
|
+
raise ValueError("Cannot combine multiple options if one is a string")
|
91
33
|
|
92
|
-
# If
|
93
|
-
|
94
|
-
if
|
95
|
-
|
96
|
-
return next(m for m in non_empty_mappings if isinstance(m, str))
|
34
|
+
# If any mapping is a string, return it
|
35
|
+
for mapping in mappings:
|
36
|
+
if isinstance(mapping, str):
|
37
|
+
return mapping
|
97
38
|
|
98
|
-
#
|
99
|
-
|
100
|
-
for
|
101
|
-
|
102
|
-
_merge_mappings(result, mapping, allow_same_value_merge=allow_same_value_merge)
|
39
|
+
# If there are duplicate keys across mappings, raise a ValueError
|
40
|
+
intersection = set().union(*all_keys)
|
41
|
+
if len(intersection) < sum(len(keys) for keys in all_keys):
|
42
|
+
raise ValueError(f"Duplicate keys found: {intersection}")
|
103
43
|
|
104
|
-
|
44
|
+
# Return the combined mappings
|
45
|
+
return {key: value for mapping in mappings if mapping for key, value in mapping.items()} # type: ignore # mapping can't be string here
|
@@ -16,7 +16,14 @@ class SliceHasher:
|
|
16
16
|
_ENCODING: Final = "utf-8"
|
17
17
|
|
18
18
|
@classmethod
|
19
|
-
def hash(
|
19
|
+
def hash(
|
20
|
+
cls,
|
21
|
+
stream_name: str = "<stream name not provided>",
|
22
|
+
stream_slice: Optional[Mapping[str, Any]] = None,
|
23
|
+
) -> int:
|
24
|
+
"""
|
25
|
+
Note that streams partition with the same slicing value but with different names might collapse if stream name is not provided
|
26
|
+
"""
|
20
27
|
if stream_slice:
|
21
28
|
try:
|
22
29
|
s = json.dumps(stream_slice, sort_keys=True, cls=SliceEncoder)
|
@@ -0,0 +1 @@
|
|
1
|
+
Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|