airbyte-cdk 6.34.0.dev2__py3-none-any.whl → 6.34.1.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +12 -16
- airbyte_cdk/connector_builder/message_grouper.py +448 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -6
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +19 -30
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +85 -203
- airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
- airbyte_cdk/sources/declarative/decoders/__init__.py +4 -0
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -7
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +58 -12
- airbyte_cdk/sources/declarative/extractors/record_selector.py +3 -12
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +38 -122
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -12
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +0 -9
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -150
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +84 -234
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +18 -26
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -8
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -16
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +37 -70
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +12 -107
- airbyte_cdk/sources/file_based/stream/__init__.py +1 -10
- airbyte_cdk/sources/streams/call_rate.py +47 -185
- airbyte_cdk/sources/streams/http/http.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +56 -217
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +73 -144
- airbyte_cdk/test/mock_http/mocker.py +1 -9
- airbyte_cdk/test/mock_http/response.py +3 -6
- airbyte_cdk/utils/datetime_helpers.py +66 -48
- airbyte_cdk/utils/mapping_helpers.py +26 -126
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/RECORD +45 -54
- airbyte_cdk/connector_builder/test_reader/__init__.py +0 -7
- airbyte_cdk/connector_builder/test_reader/helpers.py +0 -591
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +0 -160
- airbyte_cdk/connector_builder/test_reader/reader.py +0 -441
- airbyte_cdk/connector_builder/test_reader/types.py +0 -75
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +0 -81
- airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -47
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +0 -85
- airbyte_cdk/sources/specs/transfer_modes.py +0 -26
- airbyte_cdk/sources/streams/permissions/identities_stream.py +0 -75
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.0.dev2.dist-info → airbyte_cdk-6.34.1.dev0.dist-info}/entry_points.txt +0 -0
@@ -41,7 +41,6 @@ class RecordSelector(HttpSelector):
|
|
41
41
|
_name: Union[InterpolatedString, str] = field(init=False, repr=False, default="")
|
42
42
|
record_filter: Optional[RecordFilter] = None
|
43
43
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
|
-
transform_before_filtering: bool = False
|
45
44
|
|
46
45
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
47
46
|
self._parameters = parameters
|
@@ -105,17 +104,9 @@ class RecordSelector(HttpSelector):
|
|
105
104
|
Until we decide to move this logic away from the selector, we made this method public so that users like AsyncJobRetriever could
|
106
105
|
share the logic of doing transformations on a set of records.
|
107
106
|
"""
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
transformed_data, stream_state, stream_slice, next_page_token
|
112
|
-
)
|
113
|
-
else:
|
114
|
-
filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
|
115
|
-
transformed_filtered_data = self._transform(filtered_data, stream_state, stream_slice)
|
116
|
-
normalized_data = self._normalize_by_schema(
|
117
|
-
transformed_filtered_data, schema=records_schema
|
118
|
-
)
|
107
|
+
filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
|
108
|
+
transformed_data = self._transform(filtered_data, stream_state, stream_slice)
|
109
|
+
normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
|
119
110
|
for data in normalized_data:
|
120
111
|
yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
121
112
|
|
@@ -5,7 +5,6 @@
|
|
5
5
|
import copy
|
6
6
|
import logging
|
7
7
|
import threading
|
8
|
-
import time
|
9
8
|
from collections import OrderedDict
|
10
9
|
from copy import deepcopy
|
11
10
|
from datetime import timedelta
|
@@ -59,8 +58,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
59
58
|
CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
|
60
59
|
"""
|
61
60
|
|
62
|
-
DEFAULT_MAX_PARTITIONS_NUMBER =
|
63
|
-
SWITCH_TO_GLOBAL_LIMIT = 10_000
|
61
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
64
62
|
_NO_STATE: Mapping[str, Any] = {}
|
65
63
|
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
66
64
|
_GLOBAL_STATE_KEY = "state"
|
@@ -95,21 +93,15 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
95
93
|
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
96
94
|
self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
|
97
95
|
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
98
|
-
|
99
|
-
# Parent-state tracking: store each partition’s parent state in creation order
|
100
|
-
self._partition_parent_state_map: OrderedDict[str, Mapping[str, Any]] = OrderedDict()
|
101
|
-
|
102
96
|
self._finished_partitions: set[str] = set()
|
103
97
|
self._lock = threading.Lock()
|
104
98
|
self._timer = Timer()
|
105
99
|
self._new_global_cursor: Optional[StreamState] = None
|
106
100
|
self._lookback_window: int = 0
|
107
101
|
self._parent_state: Optional[StreamState] = None
|
108
|
-
self.
|
102
|
+
self._over_limit: int = 0
|
109
103
|
self._use_global_cursor: bool = False
|
110
104
|
self._partition_serializer = PerPartitionKeySerializer()
|
111
|
-
# Track the last time a state message was emitted
|
112
|
-
self._last_emission_time: float = 0.0
|
113
105
|
|
114
106
|
self._set_initial_state(stream_state)
|
115
107
|
|
@@ -149,55 +141,22 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
149
141
|
raise ValueError("stream_slice cannot be None")
|
150
142
|
|
151
143
|
partition_key = self._to_partition_key(stream_slice.partition)
|
144
|
+
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
152
145
|
with self._lock:
|
153
146
|
self._semaphore_per_partition[partition_key].acquire()
|
154
|
-
|
155
|
-
|
156
|
-
|
147
|
+
cursor = self._cursor_per_partition[partition_key]
|
148
|
+
if (
|
149
|
+
partition_key in self._finished_partitions
|
150
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
151
|
+
):
|
157
152
|
if (
|
158
|
-
|
159
|
-
|
153
|
+
self._new_global_cursor is None
|
154
|
+
or self._new_global_cursor[self.cursor_field.cursor_field_key]
|
155
|
+
< cursor.state[self.cursor_field.cursor_field_key]
|
160
156
|
):
|
161
|
-
self.
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
self._emit_state_message()
|
166
|
-
|
167
|
-
def _check_and_update_parent_state(self) -> None:
|
168
|
-
"""
|
169
|
-
Pop the leftmost partition state from _partition_parent_state_map only if
|
170
|
-
*all partitions* up to (and including) that partition key in _semaphore_per_partition
|
171
|
-
are fully finished (i.e. in _finished_partitions and semaphore._value == 0).
|
172
|
-
"""
|
173
|
-
last_closed_state = None
|
174
|
-
|
175
|
-
while self._partition_parent_state_map:
|
176
|
-
# Look at the earliest partition key in creation order
|
177
|
-
earliest_key = next(iter(self._partition_parent_state_map))
|
178
|
-
|
179
|
-
# Verify ALL partitions from the left up to earliest_key are finished
|
180
|
-
all_left_finished = True
|
181
|
-
for p_key, sem in self._semaphore_per_partition.items():
|
182
|
-
# If any earlier partition is still not finished, we must stop
|
183
|
-
if p_key not in self._finished_partitions or sem._value != 0:
|
184
|
-
all_left_finished = False
|
185
|
-
break
|
186
|
-
# Once we've reached earliest_key in the semaphore order, we can stop checking
|
187
|
-
if p_key == earliest_key:
|
188
|
-
break
|
189
|
-
|
190
|
-
# If the partitions up to earliest_key are not all finished, break the while-loop
|
191
|
-
if not all_left_finished:
|
192
|
-
break
|
193
|
-
|
194
|
-
# Otherwise, pop the leftmost entry from parent-state map
|
195
|
-
_, closed_parent_state = self._partition_parent_state_map.popitem(last=False)
|
196
|
-
last_closed_state = closed_parent_state
|
197
|
-
|
198
|
-
# Update _parent_state if we actually popped at least one partition
|
199
|
-
if last_closed_state is not None:
|
200
|
-
self._parent_state = last_closed_state
|
157
|
+
self._new_global_cursor = copy.deepcopy(cursor.state)
|
158
|
+
if not self._use_global_cursor:
|
159
|
+
self._emit_state_message()
|
201
160
|
|
202
161
|
def ensure_at_least_one_state_emitted(self) -> None:
|
203
162
|
"""
|
@@ -210,23 +169,9 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
210
169
|
self._global_cursor = self._new_global_cursor
|
211
170
|
self._lookback_window = self._timer.finish()
|
212
171
|
self._parent_state = self._partition_router.get_stream_state()
|
213
|
-
self._emit_state_message(
|
172
|
+
self._emit_state_message()
|
214
173
|
|
215
|
-
def
|
216
|
-
"""
|
217
|
-
Throttles the state message emission to once every 60 seconds.
|
218
|
-
"""
|
219
|
-
current_time = time.time()
|
220
|
-
if current_time - self._last_emission_time <= 60:
|
221
|
-
return None
|
222
|
-
return current_time
|
223
|
-
|
224
|
-
def _emit_state_message(self, throttle: bool = True) -> None:
|
225
|
-
if throttle:
|
226
|
-
current_time = self._throttle_state_message()
|
227
|
-
if current_time is None:
|
228
|
-
return
|
229
|
-
self._last_emission_time = current_time
|
174
|
+
def _emit_state_message(self) -> None:
|
230
175
|
self._connector_state_manager.update_state_for_stream(
|
231
176
|
self._stream_name,
|
232
177
|
self._stream_namespace,
|
@@ -243,19 +188,13 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
243
188
|
|
244
189
|
slices = self._partition_router.stream_slices()
|
245
190
|
self._timer.start()
|
246
|
-
for partition
|
247
|
-
|
248
|
-
):
|
249
|
-
yield from self._generate_slices_from_partition(partition, parent_state)
|
191
|
+
for partition in slices:
|
192
|
+
yield from self._generate_slices_from_partition(partition)
|
250
193
|
|
251
|
-
def _generate_slices_from_partition(
|
252
|
-
self, partition: StreamSlice, parent_state: Mapping[str, Any]
|
253
|
-
) -> Iterable[StreamSlice]:
|
194
|
+
def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
254
195
|
# Ensure the maximum number of partitions is not exceeded
|
255
196
|
self._ensure_partition_limit()
|
256
197
|
|
257
|
-
partition_key = self._to_partition_key(partition.partition)
|
258
|
-
|
259
198
|
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
260
199
|
if not cursor:
|
261
200
|
cursor = self._create_cursor(
|
@@ -263,27 +202,18 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
263
202
|
self._lookback_window if self._global_cursor else 0,
|
264
203
|
)
|
265
204
|
with self._lock:
|
266
|
-
self.
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
with self._lock:
|
271
|
-
if (
|
272
|
-
len(self._partition_parent_state_map) == 0
|
273
|
-
or self._partition_parent_state_map[
|
274
|
-
next(reversed(self._partition_parent_state_map))
|
275
|
-
]
|
276
|
-
!= parent_state
|
277
|
-
):
|
278
|
-
self._partition_parent_state_map[partition_key] = deepcopy(parent_state)
|
205
|
+
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
206
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
207
|
+
threading.Semaphore(0)
|
208
|
+
)
|
279
209
|
|
280
210
|
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
281
211
|
cursor.stream_slices(),
|
282
212
|
lambda: None,
|
283
213
|
):
|
284
|
-
self._semaphore_per_partition[
|
214
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
|
285
215
|
if is_last_slice:
|
286
|
-
self._finished_partitions.add(
|
216
|
+
self._finished_partitions.add(self._to_partition_key(partition.partition))
|
287
217
|
yield StreamSlice(
|
288
218
|
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
289
219
|
)
|
@@ -302,15 +232,9 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
302
232
|
- Logs a warning each time a partition is removed, indicating whether it was finished
|
303
233
|
or removed due to being the oldest.
|
304
234
|
"""
|
305
|
-
if not self._use_global_cursor and self.limit_reached():
|
306
|
-
logger.info(
|
307
|
-
f"Exceeded the 'SWITCH_TO_GLOBAL_LIMIT' of {self.SWITCH_TO_GLOBAL_LIMIT}. "
|
308
|
-
f"Switching to global cursor for {self._stream_name}."
|
309
|
-
)
|
310
|
-
self._use_global_cursor = True
|
311
|
-
|
312
235
|
with self._lock:
|
313
236
|
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
237
|
+
self._over_limit += 1
|
314
238
|
# Try removing finished partitions first
|
315
239
|
for partition_key in list(self._cursor_per_partition.keys()):
|
316
240
|
if (
|
@@ -321,7 +245,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
321
245
|
partition_key
|
322
246
|
) # Remove the oldest partition
|
323
247
|
logger.warning(
|
324
|
-
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self.
|
248
|
+
f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
|
325
249
|
)
|
326
250
|
break
|
327
251
|
else:
|
@@ -330,7 +254,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
330
254
|
1
|
331
255
|
] # Remove the oldest partition
|
332
256
|
logger.warning(
|
333
|
-
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self.
|
257
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
334
258
|
)
|
335
259
|
|
336
260
|
def _set_initial_state(self, stream_state: StreamState) -> None:
|
@@ -390,10 +314,12 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
390
314
|
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
391
315
|
|
392
316
|
for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
|
393
|
-
self._number_of_partitions += 1
|
394
317
|
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
395
318
|
self._create_cursor(state["cursor"])
|
396
319
|
)
|
320
|
+
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
321
|
+
threading.Semaphore(0)
|
322
|
+
)
|
397
323
|
|
398
324
|
# set default state for missing partitions if it is per partition with fallback to global
|
399
325
|
if self._GLOBAL_STATE_KEY in stream_state:
|
@@ -428,26 +354,16 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
428
354
|
self._new_global_cursor = deepcopy(fixed_global_state)
|
429
355
|
|
430
356
|
def observe(self, record: Record) -> None:
|
357
|
+
if not self._use_global_cursor and self.limit_reached():
|
358
|
+
self._use_global_cursor = True
|
359
|
+
|
431
360
|
if not record.associated_slice:
|
432
361
|
raise ValueError(
|
433
362
|
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
434
363
|
)
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
)
|
439
|
-
self._update_global_cursor(record_cursor)
|
440
|
-
if not self._use_global_cursor:
|
441
|
-
self._cursor_per_partition[
|
442
|
-
self._to_partition_key(record.associated_slice.partition)
|
443
|
-
].observe(record)
|
444
|
-
|
445
|
-
def _update_global_cursor(self, value: Any) -> None:
|
446
|
-
if (
|
447
|
-
self._new_global_cursor is None
|
448
|
-
or self._new_global_cursor[self.cursor_field.cursor_field_key] < value
|
449
|
-
):
|
450
|
-
self._new_global_cursor = {self.cursor_field.cursor_field_key: copy.deepcopy(value)}
|
364
|
+
self._cursor_per_partition[
|
365
|
+
self._to_partition_key(record.associated_slice.partition)
|
366
|
+
].observe(record)
|
451
367
|
|
452
368
|
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
453
369
|
return self._partition_serializer.to_partition_key(partition)
|
@@ -481,4 +397,4 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
481
397
|
return cursor
|
482
398
|
|
483
399
|
def limit_reached(self) -> bool:
|
484
|
-
return self.
|
400
|
+
return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
|
@@ -21,7 +21,6 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
|
|
21
21
|
)
|
22
22
|
from airbyte_cdk.sources.message import MessageRepository
|
23
23
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
24
|
-
from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths
|
25
24
|
|
26
25
|
|
27
26
|
@dataclass
|
@@ -123,10 +122,6 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
123
122
|
if not self.cursor_datetime_formats:
|
124
123
|
self.cursor_datetime_formats = [self.datetime_format]
|
125
124
|
|
126
|
-
_validate_component_request_option_paths(
|
127
|
-
self.config, self.start_time_option, self.end_time_option
|
128
|
-
)
|
129
|
-
|
130
125
|
def get_stream_state(self) -> StreamState:
|
131
126
|
return {self.cursor_field.eval(self.config): self._cursor} if self._cursor else {} # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
132
127
|
|
@@ -370,15 +365,14 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
370
365
|
options: MutableMapping[str, Any] = {}
|
371
366
|
if not stream_slice:
|
372
367
|
return options
|
373
|
-
|
374
368
|
if self.start_time_option and self.start_time_option.inject_into == option_type:
|
375
|
-
|
376
|
-
|
377
|
-
|
369
|
+
options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore # field_name is always casted to an interpolated string
|
370
|
+
self._partition_field_start.eval(self.config)
|
371
|
+
)
|
378
372
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
379
|
-
|
380
|
-
|
381
|
-
|
373
|
+
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
|
374
|
+
self._partition_field_end.eval(self.config)
|
375
|
+
)
|
382
376
|
return options
|
383
377
|
|
384
378
|
def should_be_synced(self, record: Record) -> bool:
|
@@ -137,10 +137,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
137
137
|
self._source_config, config
|
138
138
|
)
|
139
139
|
|
140
|
-
api_budget_model = self._source_config.get("api_budget")
|
141
|
-
if api_budget_model:
|
142
|
-
self._constructor.set_api_budget(api_budget_model, config)
|
143
|
-
|
144
140
|
source_streams = [
|
145
141
|
self._constructor.create_component(
|
146
142
|
DeclarativeStreamModel,
|
@@ -369,11 +365,6 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
369
365
|
# Ensure that each stream is created with a unique name
|
370
366
|
name = dynamic_stream.get("name")
|
371
367
|
|
372
|
-
if not isinstance(name, str):
|
373
|
-
raise ValueError(
|
374
|
-
f"Expected stream name {name} to be a string, got {type(name)}."
|
375
|
-
)
|
376
|
-
|
377
368
|
if name in seen_dynamic_streams:
|
378
369
|
error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
|
379
370
|
failure_type = FailureType.system_error
|
@@ -642,48 +642,6 @@ class OAuthAuthenticator(BaseModel):
|
|
642
642
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
643
643
|
|
644
644
|
|
645
|
-
class Rate(BaseModel):
|
646
|
-
class Config:
|
647
|
-
extra = Extra.allow
|
648
|
-
|
649
|
-
limit: int = Field(
|
650
|
-
...,
|
651
|
-
description="The maximum number of calls allowed within the interval.",
|
652
|
-
title="Limit",
|
653
|
-
)
|
654
|
-
interval: str = Field(
|
655
|
-
...,
|
656
|
-
description="The time interval for the rate limit.",
|
657
|
-
examples=["PT1H", "P1D"],
|
658
|
-
title="Interval",
|
659
|
-
)
|
660
|
-
|
661
|
-
|
662
|
-
class HttpRequestRegexMatcher(BaseModel):
|
663
|
-
class Config:
|
664
|
-
extra = Extra.allow
|
665
|
-
|
666
|
-
method: Optional[str] = Field(
|
667
|
-
None, description="The HTTP method to match (e.g., GET, POST).", title="Method"
|
668
|
-
)
|
669
|
-
url_base: Optional[str] = Field(
|
670
|
-
None,
|
671
|
-
description='The base URL (scheme and host, e.g. "https://api.example.com") to match.',
|
672
|
-
title="URL Base",
|
673
|
-
)
|
674
|
-
url_path_pattern: Optional[str] = Field(
|
675
|
-
None,
|
676
|
-
description="A regular expression pattern to match the URL path.",
|
677
|
-
title="URL Path Pattern",
|
678
|
-
)
|
679
|
-
params: Optional[Dict[str, Any]] = Field(
|
680
|
-
None, description="The query parameters to match.", title="Parameters"
|
681
|
-
)
|
682
|
-
headers: Optional[Dict[str, Any]] = Field(
|
683
|
-
None, description="The headers to match.", title="Headers"
|
684
|
-
)
|
685
|
-
|
686
|
-
|
687
645
|
class DpathExtractor(BaseModel):
|
688
646
|
type: Literal["DpathExtractor"]
|
689
647
|
field_path: List[str] = Field(
|
@@ -929,6 +887,15 @@ class CustomDecoder(BaseModel):
|
|
929
887
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
930
888
|
|
931
889
|
|
890
|
+
class GzipJsonDecoder(BaseModel):
|
891
|
+
class Config:
|
892
|
+
extra = Extra.allow
|
893
|
+
|
894
|
+
type: Literal["GzipJsonDecoder"]
|
895
|
+
encoding: Optional[str] = "utf-8"
|
896
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
897
|
+
|
898
|
+
|
932
899
|
class MinMaxDatetime(BaseModel):
|
933
900
|
type: Literal["MinMaxDatetime"]
|
934
901
|
datetime: str = Field(
|
@@ -1233,17 +1200,11 @@ class InjectInto(Enum):
|
|
1233
1200
|
|
1234
1201
|
class RequestOption(BaseModel):
|
1235
1202
|
type: Literal["RequestOption"]
|
1236
|
-
field_name:
|
1237
|
-
|
1238
|
-
description="Configures which key should be used in the location that the descriptor is being injected into
|
1203
|
+
field_name: str = Field(
|
1204
|
+
...,
|
1205
|
+
description="Configures which key should be used in the location that the descriptor is being injected into",
|
1239
1206
|
examples=["segment_id"],
|
1240
|
-
title="
|
1241
|
-
)
|
1242
|
-
field_path: Optional[List[str]] = Field(
|
1243
|
-
None,
|
1244
|
-
description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
|
1245
|
-
examples=[["data", "viewer", "id"]],
|
1246
|
-
title="Field Path",
|
1207
|
+
title="Request Option",
|
1247
1208
|
)
|
1248
1209
|
inject_into: InjectInto = Field(
|
1249
1210
|
...,
|
@@ -1307,8 +1268,18 @@ class LegacySessionTokenAuthenticator(BaseModel):
|
|
1307
1268
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1308
1269
|
|
1309
1270
|
|
1310
|
-
class
|
1311
|
-
type: Literal["
|
1271
|
+
class JsonParser(BaseModel):
|
1272
|
+
type: Literal["JsonParser"]
|
1273
|
+
encoding: Optional[str] = "utf-8"
|
1274
|
+
|
1275
|
+
|
1276
|
+
class JsonLineParser(BaseModel):
|
1277
|
+
type: Literal["JsonLineParser"]
|
1278
|
+
encoding: Optional[str] = "utf-8"
|
1279
|
+
|
1280
|
+
|
1281
|
+
class CsvParser(BaseModel):
|
1282
|
+
type: Literal["CsvParser"]
|
1312
1283
|
encoding: Optional[str] = "utf-8"
|
1313
1284
|
delimiter: Optional[str] = ","
|
1314
1285
|
|
@@ -1607,55 +1578,6 @@ class DatetimeBasedCursor(BaseModel):
|
|
1607
1578
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1608
1579
|
|
1609
1580
|
|
1610
|
-
class FixedWindowCallRatePolicy(BaseModel):
|
1611
|
-
class Config:
|
1612
|
-
extra = Extra.allow
|
1613
|
-
|
1614
|
-
type: Literal["FixedWindowCallRatePolicy"]
|
1615
|
-
period: str = Field(
|
1616
|
-
..., description="The time interval for the rate limit window.", title="Period"
|
1617
|
-
)
|
1618
|
-
call_limit: int = Field(
|
1619
|
-
...,
|
1620
|
-
description="The maximum number of calls allowed within the period.",
|
1621
|
-
title="Call Limit",
|
1622
|
-
)
|
1623
|
-
matchers: List[HttpRequestRegexMatcher] = Field(
|
1624
|
-
...,
|
1625
|
-
description="List of matchers that define which requests this policy applies to.",
|
1626
|
-
title="Matchers",
|
1627
|
-
)
|
1628
|
-
|
1629
|
-
|
1630
|
-
class MovingWindowCallRatePolicy(BaseModel):
|
1631
|
-
class Config:
|
1632
|
-
extra = Extra.allow
|
1633
|
-
|
1634
|
-
type: Literal["MovingWindowCallRatePolicy"]
|
1635
|
-
rates: List[Rate] = Field(
|
1636
|
-
...,
|
1637
|
-
description="List of rates that define the call limits for different time intervals.",
|
1638
|
-
title="Rates",
|
1639
|
-
)
|
1640
|
-
matchers: List[HttpRequestRegexMatcher] = Field(
|
1641
|
-
...,
|
1642
|
-
description="List of matchers that define which requests this policy applies to.",
|
1643
|
-
title="Matchers",
|
1644
|
-
)
|
1645
|
-
|
1646
|
-
|
1647
|
-
class UnlimitedCallRatePolicy(BaseModel):
|
1648
|
-
class Config:
|
1649
|
-
extra = Extra.allow
|
1650
|
-
|
1651
|
-
type: Literal["UnlimitedCallRatePolicy"]
|
1652
|
-
matchers: List[HttpRequestRegexMatcher] = Field(
|
1653
|
-
...,
|
1654
|
-
description="List of matchers that define which requests this policy applies to.",
|
1655
|
-
title="Matchers",
|
1656
|
-
)
|
1657
|
-
|
1658
|
-
|
1659
1581
|
class DefaultErrorHandler(BaseModel):
|
1660
1582
|
type: Literal["DefaultErrorHandler"]
|
1661
1583
|
backoff_strategies: Optional[
|
@@ -1752,9 +1674,9 @@ class RecordSelector(BaseModel):
|
|
1752
1674
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1753
1675
|
|
1754
1676
|
|
1755
|
-
class
|
1756
|
-
type: Literal["
|
1757
|
-
|
1677
|
+
class GzipParser(BaseModel):
|
1678
|
+
type: Literal["GzipParser"]
|
1679
|
+
inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
|
1758
1680
|
|
1759
1681
|
|
1760
1682
|
class Spec(BaseModel):
|
@@ -1787,51 +1709,23 @@ class CompositeErrorHandler(BaseModel):
|
|
1787
1709
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1788
1710
|
|
1789
1711
|
|
1790
|
-
class HTTPAPIBudget(BaseModel):
|
1791
|
-
class Config:
|
1792
|
-
extra = Extra.allow
|
1793
|
-
|
1794
|
-
type: Literal["HTTPAPIBudget"]
|
1795
|
-
policies: List[
|
1796
|
-
Union[
|
1797
|
-
FixedWindowCallRatePolicy,
|
1798
|
-
MovingWindowCallRatePolicy,
|
1799
|
-
UnlimitedCallRatePolicy,
|
1800
|
-
]
|
1801
|
-
] = Field(
|
1802
|
-
...,
|
1803
|
-
description="List of call rate policies that define how many calls are allowed.",
|
1804
|
-
title="Policies",
|
1805
|
-
)
|
1806
|
-
ratelimit_reset_header: Optional[str] = Field(
|
1807
|
-
"ratelimit-reset",
|
1808
|
-
description="The HTTP response header name that indicates when the rate limit resets.",
|
1809
|
-
title="Rate Limit Reset Header",
|
1810
|
-
)
|
1811
|
-
ratelimit_remaining_header: Optional[str] = Field(
|
1812
|
-
"ratelimit-remaining",
|
1813
|
-
description="The HTTP response header name that indicates the number of remaining allowed calls.",
|
1814
|
-
title="Rate Limit Remaining Header",
|
1815
|
-
)
|
1816
|
-
status_codes_for_ratelimit_hit: Optional[List[int]] = Field(
|
1817
|
-
[429],
|
1818
|
-
description="List of HTTP status codes that indicate a rate limit has been hit.",
|
1819
|
-
title="Status Codes for Rate Limit Hit",
|
1820
|
-
)
|
1821
|
-
|
1822
|
-
|
1823
1712
|
class ZipfileDecoder(BaseModel):
|
1824
1713
|
class Config:
|
1825
1714
|
extra = Extra.allow
|
1826
1715
|
|
1827
1716
|
type: Literal["ZipfileDecoder"]
|
1828
|
-
|
1717
|
+
parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
|
1829
1718
|
...,
|
1830
1719
|
description="Parser to parse the decompressed data from the zipfile(s).",
|
1831
1720
|
title="Parser",
|
1832
1721
|
)
|
1833
1722
|
|
1834
1723
|
|
1724
|
+
class CompositeRawDecoder(BaseModel):
|
1725
|
+
type: Literal["CompositeRawDecoder"]
|
1726
|
+
parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
|
1727
|
+
|
1728
|
+
|
1835
1729
|
class DeclarativeSource1(BaseModel):
|
1836
1730
|
class Config:
|
1837
1731
|
extra = Extra.forbid
|
@@ -1848,7 +1742,6 @@ class DeclarativeSource1(BaseModel):
|
|
1848
1742
|
definitions: Optional[Dict[str, Any]] = None
|
1849
1743
|
spec: Optional[Spec] = None
|
1850
1744
|
concurrency_level: Optional[ConcurrencyLevel] = None
|
1851
|
-
api_budget: Optional[HTTPAPIBudget] = None
|
1852
1745
|
metadata: Optional[Dict[str, Any]] = Field(
|
1853
1746
|
None,
|
1854
1747
|
description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
|
@@ -1875,7 +1768,6 @@ class DeclarativeSource2(BaseModel):
|
|
1875
1768
|
definitions: Optional[Dict[str, Any]] = None
|
1876
1769
|
spec: Optional[Spec] = None
|
1877
1770
|
concurrency_level: Optional[ConcurrencyLevel] = None
|
1878
|
-
api_budget: Optional[HTTPAPIBudget] = None
|
1879
1771
|
metadata: Optional[Dict[str, Any]] = Field(
|
1880
1772
|
None,
|
1881
1773
|
description="For internal Airbyte use only - DO NOT modify manually. Used by consumers of declarative manifests for storing related metadata.",
|
@@ -2035,7 +1927,7 @@ class SessionTokenAuthenticator(BaseModel):
|
|
2035
1927
|
description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
|
2036
1928
|
title="Data Request Authentication",
|
2037
1929
|
)
|
2038
|
-
decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
|
1930
|
+
decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
|
2039
1931
|
None, description="Component used to decode the response.", title="Decoder"
|
2040
1932
|
)
|
2041
1933
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
@@ -2235,12 +2127,12 @@ class SimpleRetriever(BaseModel):
|
|
2235
2127
|
decoder: Optional[
|
2236
2128
|
Union[
|
2237
2129
|
CustomDecoder,
|
2238
|
-
CsvDecoder,
|
2239
|
-
GzipDecoder,
|
2240
2130
|
JsonDecoder,
|
2241
2131
|
JsonlDecoder,
|
2242
2132
|
IterableDecoder,
|
2243
2133
|
XmlDecoder,
|
2134
|
+
GzipJsonDecoder,
|
2135
|
+
CompositeRawDecoder,
|
2244
2136
|
ZipfileDecoder,
|
2245
2137
|
]
|
2246
2138
|
] = Field(
|
@@ -2313,12 +2205,12 @@ class AsyncRetriever(BaseModel):
|
|
2313
2205
|
decoder: Optional[
|
2314
2206
|
Union[
|
2315
2207
|
CustomDecoder,
|
2316
|
-
CsvDecoder,
|
2317
|
-
GzipDecoder,
|
2318
2208
|
JsonDecoder,
|
2319
2209
|
JsonlDecoder,
|
2320
2210
|
IterableDecoder,
|
2321
2211
|
XmlDecoder,
|
2212
|
+
GzipJsonDecoder,
|
2213
|
+
CompositeRawDecoder,
|
2322
2214
|
ZipfileDecoder,
|
2323
2215
|
]
|
2324
2216
|
] = Field(
|
@@ -2329,12 +2221,12 @@ class AsyncRetriever(BaseModel):
|
|
2329
2221
|
download_decoder: Optional[
|
2330
2222
|
Union[
|
2331
2223
|
CustomDecoder,
|
2332
|
-
CsvDecoder,
|
2333
|
-
GzipDecoder,
|
2334
2224
|
JsonDecoder,
|
2335
2225
|
JsonlDecoder,
|
2336
2226
|
IterableDecoder,
|
2337
2227
|
XmlDecoder,
|
2228
|
+
GzipJsonDecoder,
|
2229
|
+
CompositeRawDecoder,
|
2338
2230
|
ZipfileDecoder,
|
2339
2231
|
]
|
2340
2232
|
] = Field(
|
@@ -2379,7 +2271,6 @@ class DynamicDeclarativeStream(BaseModel):
|
|
2379
2271
|
|
2380
2272
|
|
2381
2273
|
ComplexFieldType.update_forward_refs()
|
2382
|
-
GzipDecoder.update_forward_refs()
|
2383
2274
|
CompositeErrorHandler.update_forward_refs()
|
2384
2275
|
DeclarativeSource1.update_forward_refs()
|
2385
2276
|
DeclarativeSource2.update_forward_refs()
|