airbyte-cdk 6.20.1__py3-none-any.whl → 6.20.2.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -34
- airbyte_cdk/sources/declarative/checks/__init__.py +2 -18
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +21 -97
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +0 -43
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +331 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +1 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -97
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +106 -116
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +3 -13
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -11
- airbyte_cdk/sources/file_based/exceptions.py +0 -34
- airbyte_cdk/sources/file_based/file_based_source.py +5 -28
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -18
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -30
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -33
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -42
- airbyte_cdk/sources/types.py +0 -3
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/RECORD +31 -32
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +0 -51
- airbyte_cdk/sources/declarative/requesters/README.md +0 -56
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,331 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import copy
|
6
|
+
import logging
|
7
|
+
import threading
|
8
|
+
from collections import OrderedDict
|
9
|
+
from copy import deepcopy
|
10
|
+
from datetime import timedelta
|
11
|
+
from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
|
12
|
+
|
13
|
+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
14
|
+
from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
|
15
|
+
Timer,
|
16
|
+
iterate_with_last_flag_and_state,
|
17
|
+
)
|
18
|
+
from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
|
19
|
+
from airbyte_cdk.sources.message import MessageRepository
|
20
|
+
from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
|
21
|
+
PerPartitionKeySerializer,
|
22
|
+
)
|
23
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
|
24
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
25
|
+
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
26
|
+
|
27
|
+
logger = logging.getLogger("airbyte")
|
28
|
+
|
29
|
+
|
30
|
+
class ConcurrentCursorFactory:
|
31
|
+
def __init__(self, create_function: Callable[..., ConcurrentCursor]):
|
32
|
+
self._create_function = create_function
|
33
|
+
|
34
|
+
def create(
|
35
|
+
self, stream_state: Mapping[str, Any], runtime_lookback_window: Any
|
36
|
+
) -> ConcurrentCursor:
|
37
|
+
return self._create_function(
|
38
|
+
stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
|
39
|
+
)
|
40
|
+
|
41
|
+
|
42
|
+
class ConcurrentPerPartitionCursor(Cursor):
|
43
|
+
"""
|
44
|
+
Manages state per partition when a stream has many partitions, preventing data loss or duplication.
|
45
|
+
|
46
|
+
Attributes:
|
47
|
+
DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
|
48
|
+
|
49
|
+
- **Partition Limitation Logic**
|
50
|
+
Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
|
51
|
+
|
52
|
+
- **Global Cursor Fallback**
|
53
|
+
New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
|
54
|
+
"""
|
55
|
+
|
56
|
+
DEFAULT_MAX_PARTITIONS_NUMBER = 10000
|
57
|
+
_NO_STATE: Mapping[str, Any] = {}
|
58
|
+
_NO_CURSOR_STATE: Mapping[str, Any] = {}
|
59
|
+
_GLOBAL_STATE_KEY = "state"
|
60
|
+
_PERPARTITION_STATE_KEY = "states"
|
61
|
+
_KEY = 0
|
62
|
+
_VALUE = 1
|
63
|
+
|
64
|
+
def __init__(
|
65
|
+
self,
|
66
|
+
cursor_factory: ConcurrentCursorFactory,
|
67
|
+
partition_router: PartitionRouter,
|
68
|
+
stream_name: str,
|
69
|
+
stream_namespace: Optional[str],
|
70
|
+
stream_state: Any,
|
71
|
+
message_repository: MessageRepository,
|
72
|
+
connector_state_manager: ConnectorStateManager,
|
73
|
+
cursor_field: CursorField,
|
74
|
+
) -> None:
|
75
|
+
self._global_cursor: Optional[StreamState] = {}
|
76
|
+
self._stream_name = stream_name
|
77
|
+
self._stream_namespace = stream_namespace
|
78
|
+
self._message_repository = message_repository
|
79
|
+
self._connector_state_manager = connector_state_manager
|
80
|
+
self._cursor_field = cursor_field
|
81
|
+
|
82
|
+
self._cursor_factory = cursor_factory
|
83
|
+
self._partition_router = partition_router
|
84
|
+
|
85
|
+
# The dict is ordered to ensure that once the maximum number of partitions is reached,
|
86
|
+
# the oldest partitions can be efficiently removed, maintaining the most recent partitions.
|
87
|
+
self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
|
88
|
+
self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
|
89
|
+
self._finished_partitions: set[str] = set()
|
90
|
+
self._lock = threading.Lock()
|
91
|
+
self._timer = Timer()
|
92
|
+
self._new_global_cursor: Optional[StreamState] = None
|
93
|
+
self._lookback_window: int = 0
|
94
|
+
self._parent_state: Optional[StreamState] = None
|
95
|
+
self._over_limit: int = 0
|
96
|
+
self._partition_serializer = PerPartitionKeySerializer()
|
97
|
+
|
98
|
+
self._set_initial_state(stream_state)
|
99
|
+
|
100
|
+
@property
|
101
|
+
def cursor_field(self) -> CursorField:
|
102
|
+
return self._cursor_field
|
103
|
+
|
104
|
+
@property
|
105
|
+
def state(self) -> MutableMapping[str, Any]:
|
106
|
+
states = []
|
107
|
+
for partition_tuple, cursor in self._cursor_per_partition.items():
|
108
|
+
if cursor.state:
|
109
|
+
states.append(
|
110
|
+
{
|
111
|
+
"partition": self._to_dict(partition_tuple),
|
112
|
+
"cursor": copy.deepcopy(cursor.state),
|
113
|
+
}
|
114
|
+
)
|
115
|
+
state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
|
116
|
+
|
117
|
+
if self._global_cursor:
|
118
|
+
state[self._GLOBAL_STATE_KEY] = self._global_cursor
|
119
|
+
if self._lookback_window is not None:
|
120
|
+
state["lookback_window"] = self._lookback_window
|
121
|
+
if self._parent_state is not None:
|
122
|
+
state["parent_state"] = self._parent_state
|
123
|
+
return state
|
124
|
+
|
125
|
+
def close_partition(self, partition: Partition) -> None:
|
126
|
+
# Attempt to retrieve the stream slice
|
127
|
+
stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
|
128
|
+
|
129
|
+
# Ensure stream_slice is not None
|
130
|
+
if stream_slice is None:
|
131
|
+
raise ValueError("stream_slice cannot be None")
|
132
|
+
|
133
|
+
partition_key = self._to_partition_key(stream_slice.partition)
|
134
|
+
self._cursor_per_partition[partition_key].close_partition(partition=partition)
|
135
|
+
with self._lock:
|
136
|
+
self._semaphore_per_partition[partition_key].acquire()
|
137
|
+
cursor = self._cursor_per_partition[partition_key]
|
138
|
+
if (
|
139
|
+
partition_key in self._finished_partitions
|
140
|
+
and self._semaphore_per_partition[partition_key]._value == 0
|
141
|
+
):
|
142
|
+
if (
|
143
|
+
self._new_global_cursor is None
|
144
|
+
or self._new_global_cursor[self.cursor_field.cursor_field_key]
|
145
|
+
< cursor.state[self.cursor_field.cursor_field_key]
|
146
|
+
):
|
147
|
+
self._new_global_cursor = copy.deepcopy(cursor.state)
|
148
|
+
|
149
|
+
def ensure_at_least_one_state_emitted(self) -> None:
|
150
|
+
"""
|
151
|
+
The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
|
152
|
+
called.
|
153
|
+
"""
|
154
|
+
if not any(
|
155
|
+
semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
|
156
|
+
):
|
157
|
+
self._global_cursor = self._new_global_cursor
|
158
|
+
self._lookback_window = self._timer.finish()
|
159
|
+
self._parent_state = self._partition_router.get_stream_state()
|
160
|
+
self._emit_state_message()
|
161
|
+
|
162
|
+
def _emit_state_message(self) -> None:
|
163
|
+
self._connector_state_manager.update_state_for_stream(
|
164
|
+
self._stream_name,
|
165
|
+
self._stream_namespace,
|
166
|
+
self.state,
|
167
|
+
)
|
168
|
+
state_message = self._connector_state_manager.create_state_message(
|
169
|
+
self._stream_name, self._stream_namespace
|
170
|
+
)
|
171
|
+
self._message_repository.emit_message(state_message)
|
172
|
+
|
173
|
+
def stream_slices(self) -> Iterable[StreamSlice]:
|
174
|
+
if self._timer.is_running():
|
175
|
+
raise RuntimeError("stream_slices has been executed more than once.")
|
176
|
+
|
177
|
+
slices = self._partition_router.stream_slices()
|
178
|
+
self._timer.start()
|
179
|
+
for partition in slices:
|
180
|
+
yield from self._generate_slices_from_partition(partition)
|
181
|
+
|
182
|
+
def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
183
|
+
# Ensure the maximum number of partitions is not exceeded
|
184
|
+
self._ensure_partition_limit()
|
185
|
+
|
186
|
+
cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
|
187
|
+
if not cursor:
|
188
|
+
cursor = self._create_cursor(
|
189
|
+
self._global_cursor,
|
190
|
+
self._lookback_window if self._global_cursor else self._NO_CURSOR_STATE,
|
191
|
+
)
|
192
|
+
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
193
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
194
|
+
threading.Semaphore(0)
|
195
|
+
)
|
196
|
+
|
197
|
+
for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
|
198
|
+
cursor.stream_slices(),
|
199
|
+
lambda: None,
|
200
|
+
):
|
201
|
+
self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
|
202
|
+
if is_last_slice:
|
203
|
+
self._finished_partitions.add(self._to_partition_key(partition.partition))
|
204
|
+
yield StreamSlice(
|
205
|
+
partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
|
206
|
+
)
|
207
|
+
|
208
|
+
def _ensure_partition_limit(self) -> None:
|
209
|
+
"""
|
210
|
+
Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
|
211
|
+
"""
|
212
|
+
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
213
|
+
self._over_limit += 1
|
214
|
+
oldest_partition = self._cursor_per_partition.popitem(last=False)[
|
215
|
+
0
|
216
|
+
] # Remove the oldest partition
|
217
|
+
logger.warning(
|
218
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
219
|
+
)
|
220
|
+
|
221
|
+
def limit_reached(self) -> bool:
|
222
|
+
return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
|
223
|
+
|
224
|
+
def _set_initial_state(self, stream_state: StreamState) -> None:
|
225
|
+
"""
|
226
|
+
Initialize the cursor's state using the provided `stream_state`.
|
227
|
+
|
228
|
+
This method supports global and per-partition state initialization.
|
229
|
+
|
230
|
+
- **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
|
231
|
+
The `global state` holds a single cursor position representing the latest processed record across all partitions.
|
232
|
+
|
233
|
+
- **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
|
234
|
+
This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
|
235
|
+
|
236
|
+
- **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
|
237
|
+
|
238
|
+
- **Parent State**: (if available) Used to initialize partition routers based on parent streams.
|
239
|
+
|
240
|
+
Args:
|
241
|
+
stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
|
242
|
+
{
|
243
|
+
"states": [
|
244
|
+
{
|
245
|
+
"partition": {
|
246
|
+
"partition_key": "value"
|
247
|
+
},
|
248
|
+
"cursor": {
|
249
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
250
|
+
}
|
251
|
+
}
|
252
|
+
],
|
253
|
+
"state": {
|
254
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
255
|
+
},
|
256
|
+
lookback_window: 10,
|
257
|
+
"parent_state": {
|
258
|
+
"parent_stream_name": {
|
259
|
+
"last_updated": "2023-05-27T00:00:00Z"
|
260
|
+
}
|
261
|
+
}
|
262
|
+
}
|
263
|
+
"""
|
264
|
+
if not stream_state:
|
265
|
+
return
|
266
|
+
|
267
|
+
if self._PERPARTITION_STATE_KEY not in stream_state:
|
268
|
+
# We assume that `stream_state` is in a global format that can be applied to all partitions.
|
269
|
+
# Example: {"global_state_format_key": "global_state_format_value"}
|
270
|
+
self._global_cursor = deepcopy(stream_state)
|
271
|
+
self._new_global_cursor = deepcopy(stream_state)
|
272
|
+
|
273
|
+
else:
|
274
|
+
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
275
|
+
|
276
|
+
for state in stream_state[self._PERPARTITION_STATE_KEY]:
|
277
|
+
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
278
|
+
self._create_cursor(state["cursor"])
|
279
|
+
)
|
280
|
+
self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
|
281
|
+
threading.Semaphore(0)
|
282
|
+
)
|
283
|
+
|
284
|
+
# set default state for missing partitions if it is per partition with fallback to global
|
285
|
+
if self._GLOBAL_STATE_KEY in stream_state:
|
286
|
+
self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
287
|
+
self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
|
288
|
+
|
289
|
+
# Set parent state for partition routers based on parent streams
|
290
|
+
self._partition_router.set_initial_state(stream_state)
|
291
|
+
|
292
|
+
def observe(self, record: Record) -> None:
|
293
|
+
if not record.associated_slice:
|
294
|
+
raise ValueError(
|
295
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
296
|
+
)
|
297
|
+
self._cursor_per_partition[
|
298
|
+
self._to_partition_key(record.associated_slice.partition)
|
299
|
+
].observe(record)
|
300
|
+
|
301
|
+
def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
|
302
|
+
return self._partition_serializer.to_partition_key(partition)
|
303
|
+
|
304
|
+
def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
|
305
|
+
return self._partition_serializer.to_partition(partition_key)
|
306
|
+
|
307
|
+
def _create_cursor(
|
308
|
+
self, cursor_state: Any, runtime_lookback_window: Any = None
|
309
|
+
) -> ConcurrentCursor:
|
310
|
+
if runtime_lookback_window:
|
311
|
+
runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
|
312
|
+
cursor = self._cursor_factory.create(
|
313
|
+
stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
|
314
|
+
)
|
315
|
+
return cursor
|
316
|
+
|
317
|
+
def should_be_synced(self, record: Record) -> bool:
|
318
|
+
return self._get_cursor(record).should_be_synced(record)
|
319
|
+
|
320
|
+
def _get_cursor(self, record: Record) -> ConcurrentCursor:
|
321
|
+
if not record.associated_slice:
|
322
|
+
raise ValueError(
|
323
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
324
|
+
)
|
325
|
+
partition_key = self._to_partition_key(record.associated_slice.partition)
|
326
|
+
if partition_key not in self._cursor_per_partition:
|
327
|
+
raise ValueError(
|
328
|
+
"Invalid state as stream slices that are emitted should refer to an existing cursor"
|
329
|
+
)
|
330
|
+
cursor = self._cursor_per_partition[partition_key]
|
331
|
+
return cursor
|
@@ -303,6 +303,21 @@ class PerPartitionCursor(DeclarativeCursor):
|
|
303
303
|
raise ValueError("A partition needs to be provided in order to get request body json")
|
304
304
|
|
305
305
|
def should_be_synced(self, record: Record) -> bool:
|
306
|
+
if (
|
307
|
+
record.associated_slice
|
308
|
+
and self._to_partition_key(record.associated_slice.partition)
|
309
|
+
not in self._cursor_per_partition
|
310
|
+
):
|
311
|
+
partition_state = (
|
312
|
+
self._state_to_migrate_from
|
313
|
+
if self._state_to_migrate_from
|
314
|
+
else self._NO_CURSOR_STATE
|
315
|
+
)
|
316
|
+
cursor = self._create_cursor(partition_state)
|
317
|
+
|
318
|
+
self._cursor_per_partition[
|
319
|
+
self._to_partition_key(record.associated_slice.partition)
|
320
|
+
] = cursor
|
306
321
|
return self._get_cursor(record).should_be_synced(
|
307
322
|
self._convert_record_to_cursor_record(record)
|
308
323
|
)
|
@@ -22,7 +22,6 @@ from airbyte_cdk.models import (
|
|
22
22
|
ConnectorSpecification,
|
23
23
|
FailureType,
|
24
24
|
)
|
25
|
-
from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
|
26
25
|
from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
|
27
26
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
28
27
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -108,7 +107,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
|
|
108
107
|
if "type" not in check:
|
109
108
|
check["type"] = "CheckStream"
|
110
109
|
check_stream = self._constructor.create_component(
|
111
|
-
|
110
|
+
CheckStreamModel,
|
112
111
|
check,
|
113
112
|
dict(),
|
114
113
|
emit_connector_builder_messages=self._emit_connector_builder_messages,
|
@@ -52,15 +52,6 @@ class CheckStream(BaseModel):
|
|
52
52
|
)
|
53
53
|
|
54
54
|
|
55
|
-
class CheckDynamicStream(BaseModel):
|
56
|
-
type: Literal["CheckDynamicStream"]
|
57
|
-
stream_count: int = Field(
|
58
|
-
...,
|
59
|
-
description="Numbers of the streams to try reading from when running a check operation.",
|
60
|
-
title="Stream Count",
|
61
|
-
)
|
62
|
-
|
63
|
-
|
64
55
|
class ConcurrencyLevel(BaseModel):
|
65
56
|
type: Optional[Literal["ConcurrencyLevel"]] = None
|
66
57
|
default_concurrency: Union[int, str] = Field(
|
@@ -490,24 +481,12 @@ class RefreshTokenUpdater(BaseModel):
|
|
490
481
|
|
491
482
|
class OAuthAuthenticator(BaseModel):
|
492
483
|
type: Literal["OAuthAuthenticator"]
|
493
|
-
client_id_name: Optional[str] = Field(
|
494
|
-
"client_id",
|
495
|
-
description="The name of the property to use to refresh the `access_token`.",
|
496
|
-
examples=["custom_app_id"],
|
497
|
-
title="Client ID Property Name",
|
498
|
-
)
|
499
484
|
client_id: str = Field(
|
500
485
|
...,
|
501
486
|
description="The OAuth client ID. Fill it in the user inputs.",
|
502
487
|
examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
|
503
488
|
title="Client ID",
|
504
489
|
)
|
505
|
-
client_secret_name: Optional[str] = Field(
|
506
|
-
"client_secret",
|
507
|
-
description="The name of the property to use to refresh the `access_token`.",
|
508
|
-
examples=["custom_app_secret"],
|
509
|
-
title="Client Secret Property Name",
|
510
|
-
)
|
511
490
|
client_secret: str = Field(
|
512
491
|
...,
|
513
492
|
description="The OAuth client secret. Fill it in the user inputs.",
|
@@ -517,12 +496,6 @@ class OAuthAuthenticator(BaseModel):
|
|
517
496
|
],
|
518
497
|
title="Client Secret",
|
519
498
|
)
|
520
|
-
refresh_token_name: Optional[str] = Field(
|
521
|
-
"refresh_token",
|
522
|
-
description="The name of the property to use to refresh the `access_token`.",
|
523
|
-
examples=["custom_app_refresh_value"],
|
524
|
-
title="Refresh Token Property Name",
|
525
|
-
)
|
526
499
|
refresh_token: Optional[str] = Field(
|
527
500
|
None,
|
528
501
|
description="Credential artifact used to get a new access token.",
|
@@ -556,12 +529,6 @@ class OAuthAuthenticator(BaseModel):
|
|
556
529
|
examples=["expires_in"],
|
557
530
|
title="Token Expiry Property Name",
|
558
531
|
)
|
559
|
-
grant_type_name: Optional[str] = Field(
|
560
|
-
"grant_type",
|
561
|
-
description="The name of the property to use to refresh the `access_token`.",
|
562
|
-
examples=["custom_grant_type"],
|
563
|
-
title="Grant Type Property Name",
|
564
|
-
)
|
565
532
|
grant_type: Optional[str] = Field(
|
566
533
|
"refresh_token",
|
567
534
|
description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
|
@@ -580,17 +547,6 @@ class OAuthAuthenticator(BaseModel):
|
|
580
547
|
],
|
581
548
|
title="Refresh Request Body",
|
582
549
|
)
|
583
|
-
refresh_request_headers: Optional[Dict[str, Any]] = Field(
|
584
|
-
None,
|
585
|
-
description="Headers of the request sent to get a new access token.",
|
586
|
-
examples=[
|
587
|
-
{
|
588
|
-
"Authorization": "<AUTH_TOKEN>",
|
589
|
-
"Content-Type": "application/x-www-form-urlencoded",
|
590
|
-
}
|
591
|
-
],
|
592
|
-
title="Refresh Request Headers",
|
593
|
-
)
|
594
550
|
scopes: Optional[List[str]] = Field(
|
595
551
|
None,
|
596
552
|
description="List of scopes that should be granted to the access token.",
|
@@ -719,7 +675,6 @@ class HttpResponseFilter(BaseModel):
|
|
719
675
|
class TypesMap(BaseModel):
|
720
676
|
target_type: Union[str, List[str]]
|
721
677
|
current_type: Union[str, List[str]]
|
722
|
-
condition: Optional[str]
|
723
678
|
|
724
679
|
|
725
680
|
class SchemaTypeIdentifier(BaseModel):
|
@@ -782,43 +737,33 @@ class KeysToSnakeCase(BaseModel):
|
|
782
737
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
783
738
|
|
784
739
|
|
785
|
-
class FlattenFields(BaseModel):
|
786
|
-
type: Literal["FlattenFields"]
|
787
|
-
flatten_lists: Optional[bool] = Field(
|
788
|
-
True,
|
789
|
-
description="Whether to flatten lists or leave it as is. Default is True.",
|
790
|
-
title="Flatten Lists",
|
791
|
-
)
|
792
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
793
|
-
|
794
|
-
|
795
740
|
class KeysReplace(BaseModel):
|
796
741
|
type: Literal["KeysReplace"]
|
797
742
|
old: str = Field(
|
798
743
|
...,
|
799
744
|
description="Old value to replace.",
|
800
|
-
examples=[
|
801
|
-
" ",
|
802
|
-
"{{ record.id }}",
|
803
|
-
"{{ config['id'] }}",
|
804
|
-
"{{ stream_slice['id'] }}",
|
805
|
-
],
|
745
|
+
examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
|
806
746
|
title="Old value",
|
807
747
|
)
|
808
748
|
new: str = Field(
|
809
749
|
...,
|
810
750
|
description="New value to set.",
|
811
|
-
examples=[
|
812
|
-
"_",
|
813
|
-
"{{ record.id }}",
|
814
|
-
"{{ config['id'] }}",
|
815
|
-
"{{ stream_slice['id'] }}",
|
816
|
-
],
|
751
|
+
examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
|
817
752
|
title="New value",
|
818
753
|
)
|
819
754
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
820
755
|
|
821
756
|
|
757
|
+
class FlattenFields(BaseModel):
|
758
|
+
type: Literal["FlattenFields"]
|
759
|
+
flatten_lists: Optional[bool] = Field(
|
760
|
+
True,
|
761
|
+
description="Whether to flatten lists or leave it as is. Default is True.",
|
762
|
+
title="Flatten Lists",
|
763
|
+
)
|
764
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
765
|
+
|
766
|
+
|
822
767
|
class IterableDecoder(BaseModel):
|
823
768
|
type: Literal["IterableDecoder"]
|
824
769
|
|
@@ -904,8 +849,8 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
904
849
|
...,
|
905
850
|
description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
|
906
851
|
examples=[
|
907
|
-
"https://domain.host.com/marketing_api/auth?{
|
908
|
-
"https://endpoint.host.com/oauth2/authorize?{
|
852
|
+
"https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
|
853
|
+
"https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}",
|
909
854
|
],
|
910
855
|
title="Consent URL",
|
911
856
|
)
|
@@ -919,18 +864,14 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
919
864
|
...,
|
920
865
|
description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
|
921
866
|
examples=[
|
922
|
-
"https://auth.host.com/oauth2/token?{
|
867
|
+
"https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
|
923
868
|
],
|
924
869
|
title="Access Token URL",
|
925
870
|
)
|
926
871
|
access_token_headers: Optional[Dict[str, Any]] = Field(
|
927
872
|
None,
|
928
873
|
description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
|
929
|
-
examples=[
|
930
|
-
{
|
931
|
-
"Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}"
|
932
|
-
}
|
933
|
-
],
|
874
|
+
examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}],
|
934
875
|
title="Access Token Headers",
|
935
876
|
)
|
936
877
|
access_token_params: Optional[Dict[str, Any]] = Field(
|
@@ -938,15 +879,15 @@ class OauthConnectorInputSpecification(BaseModel):
|
|
938
879
|
description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
|
939
880
|
examples=[
|
940
881
|
{
|
941
|
-
"{
|
942
|
-
"{
|
943
|
-
"{
|
882
|
+
"{auth_code_key}": "{{auth_code_key}}",
|
883
|
+
"{client_id_key}": "{{client_id_key}}",
|
884
|
+
"{client_secret_key}": "{{client_secret_key}}",
|
944
885
|
}
|
945
886
|
],
|
946
887
|
title="Access Token Query Params (Json Encoded)",
|
947
888
|
)
|
948
|
-
extract_output:
|
949
|
-
|
889
|
+
extract_output: List[str] = Field(
|
890
|
+
...,
|
950
891
|
description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
|
951
892
|
examples=[["access_token", "refresh_token", "other_field"]],
|
952
893
|
title="Extract Output",
|
@@ -1015,7 +956,7 @@ class OAuthConfigSpecification(BaseModel):
|
|
1015
956
|
)
|
1016
957
|
oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
|
1017
958
|
None,
|
1018
|
-
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{
|
959
|
+
description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
|
1019
960
|
title="DeclarativeOAuth Connector Specification",
|
1020
961
|
)
|
1021
962
|
complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
|
@@ -1222,14 +1163,6 @@ class LegacySessionTokenAuthenticator(BaseModel):
|
|
1222
1163
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
1223
1164
|
|
1224
1165
|
|
1225
|
-
class JsonParser(BaseModel):
|
1226
|
-
class Config:
|
1227
|
-
extra = Extra.allow
|
1228
|
-
|
1229
|
-
type: Literal["JsonParser"]
|
1230
|
-
encoding: Optional[str] = "utf-8"
|
1231
|
-
|
1232
|
-
|
1233
1166
|
class JsonLineParser(BaseModel):
|
1234
1167
|
type: Literal["JsonLineParser"]
|
1235
1168
|
encoding: Optional[str] = "utf-8"
|
@@ -1628,7 +1561,7 @@ class RecordSelector(BaseModel):
|
|
1628
1561
|
|
1629
1562
|
class GzipParser(BaseModel):
|
1630
1563
|
type: Literal["GzipParser"]
|
1631
|
-
inner_parser: Union[JsonLineParser, CsvParser
|
1564
|
+
inner_parser: Union[JsonLineParser, CsvParser]
|
1632
1565
|
|
1633
1566
|
|
1634
1567
|
class Spec(BaseModel):
|
@@ -1663,7 +1596,7 @@ class CompositeErrorHandler(BaseModel):
|
|
1663
1596
|
|
1664
1597
|
class CompositeRawDecoder(BaseModel):
|
1665
1598
|
type: Literal["CompositeRawDecoder"]
|
1666
|
-
parser: Union[GzipParser,
|
1599
|
+
parser: Union[GzipParser, JsonLineParser, CsvParser]
|
1667
1600
|
|
1668
1601
|
|
1669
1602
|
class DeclarativeSource1(BaseModel):
|
@@ -1671,7 +1604,7 @@ class DeclarativeSource1(BaseModel):
|
|
1671
1604
|
extra = Extra.forbid
|
1672
1605
|
|
1673
1606
|
type: Literal["DeclarativeSource"]
|
1674
|
-
check:
|
1607
|
+
check: CheckStream
|
1675
1608
|
streams: List[DeclarativeStream]
|
1676
1609
|
dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
|
1677
1610
|
version: str = Field(
|
@@ -1697,7 +1630,7 @@ class DeclarativeSource2(BaseModel):
|
|
1697
1630
|
extra = Extra.forbid
|
1698
1631
|
|
1699
1632
|
type: Literal["DeclarativeSource"]
|
1700
|
-
check:
|
1633
|
+
check: CheckStream
|
1701
1634
|
streams: Optional[List[DeclarativeStream]] = None
|
1702
1635
|
dynamic_streams: List[DynamicDeclarativeStream]
|
1703
1636
|
version: str = Field(
|
@@ -2107,10 +2040,6 @@ class AsyncRetriever(BaseModel):
|
|
2107
2040
|
...,
|
2108
2041
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
|
2109
2042
|
)
|
2110
|
-
url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
|
2111
|
-
None,
|
2112
|
-
description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
|
2113
|
-
)
|
2114
2043
|
download_requester: Union[CustomRequester, HttpRequester] = Field(
|
2115
2044
|
...,
|
2116
2045
|
description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",
|