airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +75 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +213 -100
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +18 -3
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/__init__.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/filters.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +14 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
- airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
- airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/file_based_source.py +70 -37
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
- airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
- airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/test/mock_http/mocker.py +9 -1
- airbyte_cdk/test/mock_http/response.py +6 -3
- airbyte_cdk/utils/datetime_helpers.py +48 -66
- airbyte_cdk/utils/mapping_helpers.py +126 -26
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/RECORD +68 -59
- airbyte_cdk/connector_builder/message_grouper.py +0 -448
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/entry_points.txt +0 -0
@@ -41,6 +41,7 @@ class RecordSelector(HttpSelector):
|
|
41
41
|
_name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
|
42
42
|
record_filter: Optional[RecordFilter] = None
|
43
43
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
|
+
transform_before_filtering: bool = False
|
44
45
|
|
45
46
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
46
47
|
self._parameters = parameters
|
@@ -104,9 +105,17 @@ class RecordSelector(HttpSelector):
|
|
104
105
|
Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
|
105
106
|
share the logic of doing transformations on a set of records.
|
106
107
|
"""
|
107
|
-
|
108
|
-
|
109
|
-
|
108
|
+
if self.transform_before_filtering:
|
109
|
+
transformed_data = self._transform(all_data, stream_state, stream_slice)
|
110
|
+
transformed_filtered_data = self._filter(
|
111
|
+
transformed_data, stream_state, stream_slice, next_page_token
|
112
|
+
)
|
113
|
+
else:
|
114
|
+
filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
|
115
|
+
transformed_filtered_data = self._transform(filtered_data, stream_state, stream_slice)
|
116
|
+
normalized_data = self._normalize_by_schema(
|
117
|
+
transformed_filtered_data, schema=records_schema
|
118
|
+
)
|
110
119
|
for data in normalized_data:
|
111
120
|
yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
112
121
|
|
@@ -5,6 +5,7 @@
|
|
5
5
|
import copy
|
6
6
|
import logging
|
7
7
|
import threading
|
8
|
+
import time
|
8
9
|
from collections import OrderedDict
|
9
10
|
from copy import deepcopy
|
10
11
|
from datetime import timedelta
|
@@ -58,7 +59,8 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
58
59
|
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
|
59
60
|
"""
|
60
61
|
|
61
|
-
DEFAULT_MAX_PARTITIONS_NUMBER =
|
62
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 25_000
|
63
|
+
SWITCH_TO_GLOBAL_LIMIT = 10_000
|
62
64
|
_NO_STATE: Mapping[str, Any] = {}
|
63
65
|
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
64
66
|
_GLOBAL_STATE_KEY = "state"
|
@@ -99,9 +101,11 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
99
101
|
self._new_global_cursor: Optional[StreamState] = None
|
100
102
|
self._lookback_window: int = 0
|
101
103
|
self._parent_state: Optional[StreamState] = None
|
102
|
-
self.
|
104
|
+
self._number_of_partitions: int = 0
|
103
105
|
self._use_global_cursor: bool = False
|
104
106
|
self._partition_serializer = PerPartitionKeySerializer()
|
107
|
+
# Track the last time a state message was emitted
|
108
|
+
self._last_emission_time: float = 0.0
|
105
109
|
|
106
110
|
self._set_initial_state(stream_state)
|
107
111
|
|
@@ -141,21 +145,16 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
141
145
|
raise ValueError("stream_slice cannot be None")
|
142
146
|
|
143
147
|
partition_key = self._to_partition_key(stream_slice.partition)
|
144
|
-
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
145
148
|
with self._lock:
|
146
149
|
self._semaphore_per_partition[partition_key].acquire()
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
and self._semaphore_per_partition[partition_key]._value == 0
|
151
|
-
):
|
150
|
+
if not self._use_global_cursor:
|
151
|
+
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
152
|
+
cursor = self._cursor_per_partition[partition_key]
|
152
153
|
if (
|
153
|
-
self.
|
154
|
-
|
155
|
-
< cursor.state[self.cursor_field.cursor_field_key]
|
154
|
+
partition_key in self._finished_partitions
|
155
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
156
156
|
):
|
157
|
-
self.
|
158
|
-
if not self._use_global_cursor:
|
157
|
+
self._update_global_cursor(cursor.state[self.cursor_field.cursor_field_key])
|
159
158
|
self._emit_state_message()
|
160
159
|
|
161
160
|
def ensure_at_least_one_state_emitted(self) -> None:
|
@@ -169,9 +168,23 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
169
168
|
self._global_cursor = self._new_global_cursor
|
170
169
|
self._lookback_window = self._timer.finish()
|
171
170
|
self._parent_state = self._partition_router.get_stream_state()
|
172
|
-
self._emit_state_message()
|
171
|
+
self._emit_state_message(throttle=False)
|
173
172
|
|
174
|
-
def
|
173
|
+
def _throttle_state_message(self) -> Optional[float]:
|
174
|
+
"""
|
175
|
+
Throttles the state message emission to once every 60 seconds.
|
176
|
+
"""
|
177
|
+
current_time = time.time()
|
178
|
+
if current_time - self._last_emission_time <= 60:
|
179
|
+
return None
|
180
|
+
return current_time
|
181
|
+
|
182
|
+
def _emit_state_message(self, throttle: bool = True) -> None:
|
183
|
+
if throttle:
|
184
|
+
current_time = self._throttle_state_message()
|
185
|
+
if current_time is None:
|
186
|
+
return
|
187
|
+
self._last_emission_time = current_time
|
175
188
|
self._connector_state_manager.update_state_for_stream(
|
176
189
|
self._stream_name,
|
177
190
|
self._stream_namespace,
|
@@ -202,6 +215,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
202
215
|
self._lookback_window if self._global_cursor else 0,
|
203
216
|
)
|
204
217
|
with self._lock:
|
218
|
+
self._number_of_partitions += 1
|
205
219
|
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
206
220
|
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
207
221
|
threading.Semaphore(0)
|
@@ -232,9 +246,15 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
232
246
|
- Logs a warning each time a partition is removed, indicating whether it was finished
|
233
247
|
or removed due to being the oldest.
|
234
248
|
"""
|
249
|
+
if not self._use_global_cursor and self.limit_reached():
|
250
|
+
logger.info(
|
251
|
+
f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
|
252
|
+
f"Switching to global cursor for {self._stream_name}."
|
253
|
+
)
|
254
|
+
self._use_global_cursor = True
|
255
|
+
|
235
256
|
with self._lock:
|
236
257
|
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
237
|
-
self._over_limit += 1
|
238
258
|
# Try removing finished partitions first
|
239
259
|
for partition_key in list(self._cursor_per_partition.keys()):
|
240
260
|
if (
|
@@ -245,7 +265,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
245
265
|
partition_key
|
246
266
|
) # Remove the oldest partition
|
247
267
|
logger.warning(
|
248
|
-
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self.
|
268
|
+
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
|
249
269
|
)
|
250
270
|
break
|
251
271
|
else:
|
@@ -254,7 +274,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
254
274
|
1
|
255
275
|
] # Remove the oldest partition
|
256
276
|
logger.warning(
|
257
|
-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self.
|
277
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._number_of_partitions - self.DEFAULT_MAX_PARTITIONS_NUMBER}."
|
258
278
|
)
|
259
279
|
|
260
280
|
def _set_initial_state(self, stream_state: StreamState) -> None:
|
@@ -314,6 +334,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
314
334
|
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
315
335
|
|
316
336
|
for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
|
337
|
+
self._number_of_partitions += 1
|
317
338
|
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
318
339
|
self._create_cursor(state["cursor"])
|
319
340
|
)
|
@@ -354,16 +375,26 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
354
375
|
self._new_global_cursor = deepcopy(fixed_global_state)
|
355
376
|
|
356
377
|
def observe(self, record: Record) -> None:
|
357
|
-
if not self._use_global_cursor and self.limit_reached():
|
358
|
-
self._use_global_cursor = True
|
359
|
-
|
360
378
|
if not record.associated_slice:
|
361
379
|
raise ValueError(
|
362
380
|
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
363
381
|
)
|
364
|
-
|
365
|
-
|
366
|
-
|
382
|
+
|
383
|
+
record_cursor = self._connector_state_converter.output_format(
|
384
|
+
self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
385
|
+
)
|
386
|
+
self._update_global_cursor(record_cursor)
|
387
|
+
if not self._use_global_cursor:
|
388
|
+
self._cursor_per_partition[
|
389
|
+
self._to_partition_key(record.associated_slice.partition)
|
390
|
+
].observe(record)
|
391
|
+
|
392
|
+
def _update_global_cursor(self, value: Any) -> None:
|
393
|
+
if (
|
394
|
+
self._new_global_cursor is None
|
395
|
+
or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
|
396
|
+
):
|
397
|
+
self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
|
367
398
|
|
368
399
|
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
369
400
|
return self._partition_serializer.to_partition_key(partition)
|
@@ -397,4 +428,4 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
397
428
|
return cursor
|
398
429
|
|
399
430
|
def limit_reached(self) -> bool:
|
400
|
-
return self.
|
431
|
+
return self._number_of_partitions > self.SWITCH_TO_GLOBAL_LIMIT
|
@@ -21,6 +21,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
|
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.message import MessageRepository
|
23
23
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
24
|
+
from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths
|
24
25
|
|
25
26
|
|
26
27
|
@dataclass
|
@@ -122,6 +123,10 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
122
123
|
if not self.cursor_datetime_formats:
|
123
124
|
self.cursor_datetime_formats = [self.datetime_format]
|
124
125
|
|
126
|
+
_validate_component_request_option_paths(
|
127
|
+
self.config, self.start_time_option, self.end_time_option
|
128
|
+
)
|
129
|
+
|
125
130
|
def get_stream_state(self) -> StreamState:
|
126
131
|
return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
127
132
|
|
@@ -365,14 +370,15 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
365
370
|
options: MutableMapping[str, Any] = {}
|
366
371
|
if not stream_slice:
|
367
372
|
return options
|
373
|
+
|
368
374
|
if self.start_time_option and self.start_time_option.inject_into == option_type:
|
369
|
-
|
370
|
-
|
371
|
-
|
375
|
+
start_time_value = stream_slice.get(self._partition_field_start.eval(self.config))
|
376
|
+
self.start_time_option.inject_into_request(options, start_time_value, self.config)
|
377
|
+
|
372
378
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
373
|
-
|
374
|
-
|
375
|
-
|
379
|
+
end_time_value = stream_slice.get(self._partition_field_end.eval(self.config))
|
380
|
+
self.end_time_option.inject_into_request(options, end_time_value, self.config)
|
381
|
+
|
376
382
|
return options
|
377
383
|
|
378
384
|
def should_be_synced(self, record: Record) -> bool:
|
@@ -115,7 +115,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
115
115
|
* Yield the last slice. At that point, once there are as many slices yielded as closes, the global slice will be closed too
|
116
116
|
"""
|
117
117
|
slice_generator = (
|
118
|
-
StreamSlice(
|
118
|
+
StreamSlice(
|
119
|
+
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
120
|
+
)
|
119
121
|
for partition in self._partition_router.stream_slices()
|
120
122
|
for cursor_slice in self._stream_cursor.stream_slices()
|
121
123
|
)
|
@@ -131,7 +133,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
131
133
|
|
132
134
|
def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
133
135
|
slice_generator = (
|
134
|
-
StreamSlice(
|
136
|
+
StreamSlice(
|
137
|
+
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
138
|
+
)
|
135
139
|
for cursor_slice in self._stream_cursor.stream_slices()
|
136
140
|
)
|
137
141
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
import ast
|
@@ -11,10 +11,12 @@ from jinja2.environment import Template
|
|
11
11
|
from jinja2.exceptions import UndefinedError
|
12
12
|
from jinja2.sandbox import SandboxedEnvironment
|
13
13
|
|
14
|
+
from airbyte_cdk.models import FailureType
|
14
15
|
from airbyte_cdk.sources.declarative.interpolation.filters import filters
|
15
16
|
from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
|
16
17
|
from airbyte_cdk.sources.declarative.interpolation.macros import macros
|
17
18
|
from airbyte_cdk.sources.types import Config
|
19
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
18
20
|
|
19
21
|
|
20
22
|
class StreamPartitionAccessEnvironment(SandboxedEnvironment):
|
@@ -36,6 +38,10 @@ _ALIASES = {
|
|
36
38
|
"stream_partition": "stream_slice", # Use stream_partition to access partition router's values
|
37
39
|
}
|
38
40
|
|
41
|
+
_UNSUPPORTED_INTERPOLATION_VARIABLES: Mapping[str, str] = {
|
42
|
+
"stream_state": "`stream_state` is no longer supported for interpolation. We recommend using `stream_interval` instead. Please reference the CDK Migration Guide for more information.",
|
43
|
+
}
|
44
|
+
|
39
45
|
# These extensions are not installed so they're not currently a problem,
|
40
46
|
# but we're still explicitly removing them from the jinja context.
|
41
47
|
# At worst, this is documentation that we do NOT want to include these extensions because of the potential security risks
|
@@ -95,6 +101,13 @@ class JinjaInterpolation(Interpolation):
|
|
95
101
|
elif equivalent in context:
|
96
102
|
context[alias] = context[equivalent]
|
97
103
|
|
104
|
+
for variable_name in _UNSUPPORTED_INTERPOLATION_VARIABLES:
|
105
|
+
if variable_name in input_str:
|
106
|
+
raise AirbyteTracedException(
|
107
|
+
message=_UNSUPPORTED_INTERPOLATION_VARIABLES[variable_name],
|
108
|
+
internal_message=_UNSUPPORTED_INTERPOLATION_VARIABLES[variable_name],
|
109
|
+
failure_type=FailureType.config_error,
|
110
|
+
)
|
98
111
|
try:
|
99
112
|
if isinstance(input_str, str):
|
100
113
|
result = self._eval(input_str, context)
|
@@ -1,5 +1,5 @@
|
|
1
1
|
#
|
2
|
-
# Copyright (c)
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
5
|
import builtins
|
@@ -63,10 +63,24 @@ def timestamp(dt: Union[float, str]) -> Union[int, float]:
|
|
63
63
|
if isinstance(dt, (int, float)):
|
64
64
|
return int(dt)
|
65
65
|
else:
|
66
|
-
return
|
66
|
+
return str_to_datetime(dt).astimezone(pytz.utc).timestamp()
|
67
67
|
|
68
68
|
|
69
|
-
def
|
69
|
+
def str_to_datetime(s: str) -> datetime.datetime:
|
70
|
+
"""
|
71
|
+
Converts a string to a datetime object with UTC timezone
|
72
|
+
|
73
|
+
If the input string does not contain timezone information, UTC is assumed.
|
74
|
+
Supports both basic date strings like "2022-01-14" and datetime strings with optional timezone
|
75
|
+
like "2022-01-01T13:45:30+00:00".
|
76
|
+
|
77
|
+
Usage:
|
78
|
+
`"{{ str_to_datetime('2022-01-14') }}"`
|
79
|
+
|
80
|
+
:param s: string to parse as datetime
|
81
|
+
:return: datetime object in UTC timezone
|
82
|
+
"""
|
83
|
+
|
70
84
|
parsed_date = parser.isoparse(s)
|
71
85
|
if not parsed_date.tzinfo:
|
72
86
|
# Assume UTC if the input does not contain a timezone
|
@@ -155,7 +169,7 @@ def format_datetime(
|
|
155
169
|
if isinstance(dt, datetime.datetime):
|
156
170
|
return dt.strftime(format)
|
157
171
|
dt_datetime = (
|
158
|
-
datetime.datetime.strptime(dt, input_format) if input_format else
|
172
|
+
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
159
173
|
)
|
160
174
|
if format == "%s":
|
161
175
|
return str(int(dt_datetime.timestamp()))
|
@@ -172,5 +186,6 @@ _macros_list = [
|
|
172
186
|
duration,
|
173
187
|
format_datetime,
|
174
188
|
today_with_timezone,
|
189
|
+
str_to_datetime,
|
175
190
|
]
|
176
191
|
macros = {f.__name__: f for f in _macros_list}
|
@@ -137,6 +137,10 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
137
137
|
self._source_config, config
|
138
138
|
)
|
139
139
|
|
140
|
+
api_budget_model = self._source_config.get("api_budget")
|
141
|
+
if api_budget_model:
|
142
|
+
self._constructor.set_api_budget(api_budget_model, config)
|
143
|
+
|
140
144
|
source_streams = [
|
141
145
|
self._constructor.create_component(
|
142
146
|
DeclarativeStreamModel,
|
@@ -365,6 +369,11 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
365
369
|
# Ensure that each stream is created with a unique name
|
366
370
|
name = dynamic_stream.get("name")
|
367
371
|
|
372
|
+
if not isinstance(name, str):
|
373
|
+
raise ValueError(
|
374
|
+
f"Expected stream name {name} to be a string, got {type(name)}."
|
375
|
+
)
|
376
|
+
|
368
377
|
if name in seen_dynamic_streams:
|
369
378
|
error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
|
370
379
|
failure_type = FailureType.system_error
|