airbyte-cdk 6.20.1__py3-none-any.whl → 6.20.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. airbyte_cdk/sources/declarative/auth/oauth.py +0 -34
  2. airbyte_cdk/sources/declarative/checks/__init__.py +2 -18
  3. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
  4. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +21 -97
  5. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +0 -43
  6. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  7. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  8. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +331 -0
  9. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  10. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
  11. airbyte_cdk/sources/declarative/manifest_declarative_source.py +1 -2
  12. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -97
  13. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +106 -116
  14. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
  15. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  16. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +3 -13
  17. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -11
  18. airbyte_cdk/sources/file_based/exceptions.py +0 -34
  19. airbyte_cdk/sources/file_based/file_based_source.py +5 -28
  20. airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -18
  21. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
  22. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -30
  23. airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
  24. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -33
  25. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -42
  26. airbyte_cdk/sources/types.py +0 -3
  27. {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/METADATA +1 -1
  28. {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/RECORD +31 -32
  29. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +0 -51
  30. airbyte_cdk/sources/declarative/requesters/README.md +0 -56
  31. {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/LICENSE.txt +0 -0
  32. {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/WHEEL +0 -0
  33. {airbyte_cdk-6.20.1.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,331 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import copy
6
+ import logging
7
+ import threading
8
+ from collections import OrderedDict
9
+ from copy import deepcopy
10
+ from datetime import timedelta
11
+ from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
12
+
13
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
+ from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
15
+ Timer,
16
+ iterate_with_last_flag_and_state,
17
+ )
18
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
19
+ from airbyte_cdk.sources.message import MessageRepository
20
+ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
21
+ PerPartitionKeySerializer,
22
+ )
23
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
24
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
+ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
26
+
27
+ logger = logging.getLogger("airbyte")
28
+
29
+
30
+ class ConcurrentCursorFactory:
31
+ def __init__(self, create_function: Callable[..., ConcurrentCursor]):
32
+ self._create_function = create_function
33
+
34
+ def create(
35
+ self, stream_state: Mapping[str, Any], runtime_lookback_window: Any
36
+ ) -> ConcurrentCursor:
37
+ return self._create_function(
38
+ stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
39
+ )
40
+
41
+
42
+ class ConcurrentPerPartitionCursor(Cursor):
43
+ """
44
+ Manages state per partition when a stream has many partitions, preventing data loss or duplication.
45
+
46
+ Attributes:
47
+ DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
48
+
49
+ - **Partition Limitation Logic**
50
+ Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
51
+
52
+ - **Global Cursor Fallback**
53
+ New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
54
+ """
55
+
56
+ DEFAULT_MAX_PARTITIONS_NUMBER = 10000
57
+ _NO_STATE: Mapping[str, Any] = {}
58
+ _NO_CURSOR_STATE: Mapping[str, Any] = {}
59
+ _GLOBAL_STATE_KEY = "state"
60
+ _PERPARTITION_STATE_KEY = "states"
61
+ _KEY = 0
62
+ _VALUE = 1
63
+
64
+ def __init__(
65
+ self,
66
+ cursor_factory: ConcurrentCursorFactory,
67
+ partition_router: PartitionRouter,
68
+ stream_name: str,
69
+ stream_namespace: Optional[str],
70
+ stream_state: Any,
71
+ message_repository: MessageRepository,
72
+ connector_state_manager: ConnectorStateManager,
73
+ cursor_field: CursorField,
74
+ ) -> None:
75
+ self._global_cursor: Optional[StreamState] = {}
76
+ self._stream_name = stream_name
77
+ self._stream_namespace = stream_namespace
78
+ self._message_repository = message_repository
79
+ self._connector_state_manager = connector_state_manager
80
+ self._cursor_field = cursor_field
81
+
82
+ self._cursor_factory = cursor_factory
83
+ self._partition_router = partition_router
84
+
85
+ # The dict is ordered to ensure that once the maximum number of partitions is reached,
86
+ # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
87
+ self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
88
+ self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
89
+ self._finished_partitions: set[str] = set()
90
+ self._lock = threading.Lock()
91
+ self._timer = Timer()
92
+ self._new_global_cursor: Optional[StreamState] = None
93
+ self._lookback_window: int = 0
94
+ self._parent_state: Optional[StreamState] = None
95
+ self._over_limit: int = 0
96
+ self._partition_serializer = PerPartitionKeySerializer()
97
+
98
+ self._set_initial_state(stream_state)
99
+
100
+ @property
101
+ def cursor_field(self) -> CursorField:
102
+ return self._cursor_field
103
+
104
+ @property
105
+ def state(self) -> MutableMapping[str, Any]:
106
+ states = []
107
+ for partition_tuple, cursor in self._cursor_per_partition.items():
108
+ if cursor.state:
109
+ states.append(
110
+ {
111
+ "partition": self._to_dict(partition_tuple),
112
+ "cursor": copy.deepcopy(cursor.state),
113
+ }
114
+ )
115
+ state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
116
+
117
+ if self._global_cursor:
118
+ state[self._GLOBAL_STATE_KEY] = self._global_cursor
119
+ if self._lookback_window is not None:
120
+ state["lookback_window"] = self._lookback_window
121
+ if self._parent_state is not None:
122
+ state["parent_state"] = self._parent_state
123
+ return state
124
+
125
+ def close_partition(self, partition: Partition) -> None:
126
+ # Attempt to retrieve the stream slice
127
+ stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
128
+
129
+ # Ensure stream_slice is not None
130
+ if stream_slice is None:
131
+ raise ValueError("stream_slice cannot be None")
132
+
133
+ partition_key = self._to_partition_key(stream_slice.partition)
134
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
135
+ with self._lock:
136
+ self._semaphore_per_partition[partition_key].acquire()
137
+ cursor = self._cursor_per_partition[partition_key]
138
+ if (
139
+ partition_key in self._finished_partitions
140
+ and self._semaphore_per_partition[partition_key]._value == 0
141
+ ):
142
+ if (
143
+ self._new_global_cursor is None
144
+ or self._new_global_cursor[self.cursor_field.cursor_field_key]
145
+ < cursor.state[self.cursor_field.cursor_field_key]
146
+ ):
147
+ self._new_global_cursor = copy.deepcopy(cursor.state)
148
+
149
+ def ensure_at_least_one_state_emitted(self) -> None:
150
+ """
151
+ The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
152
+ called.
153
+ """
154
+ if not any(
155
+ semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
156
+ ):
157
+ self._global_cursor = self._new_global_cursor
158
+ self._lookback_window = self._timer.finish()
159
+ self._parent_state = self._partition_router.get_stream_state()
160
+ self._emit_state_message()
161
+
162
+ def _emit_state_message(self) -> None:
163
+ self._connector_state_manager.update_state_for_stream(
164
+ self._stream_name,
165
+ self._stream_namespace,
166
+ self.state,
167
+ )
168
+ state_message = self._connector_state_manager.create_state_message(
169
+ self._stream_name, self._stream_namespace
170
+ )
171
+ self._message_repository.emit_message(state_message)
172
+
173
+ def stream_slices(self) -> Iterable[StreamSlice]:
174
+ if self._timer.is_running():
175
+ raise RuntimeError("stream_slices has been executed more than once.")
176
+
177
+ slices = self._partition_router.stream_slices()
178
+ self._timer.start()
179
+ for partition in slices:
180
+ yield from self._generate_slices_from_partition(partition)
181
+
182
+ def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
183
+ # Ensure the maximum number of partitions is not exceeded
184
+ self._ensure_partition_limit()
185
+
186
+ cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
187
+ if not cursor:
188
+ cursor = self._create_cursor(
189
+ self._global_cursor,
190
+ self._lookback_window if self._global_cursor else self._NO_CURSOR_STATE,
191
+ )
192
+ self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
193
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
194
+ threading.Semaphore(0)
195
+ )
196
+
197
+ for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
198
+ cursor.stream_slices(),
199
+ lambda: None,
200
+ ):
201
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
202
+ if is_last_slice:
203
+ self._finished_partitions.add(self._to_partition_key(partition.partition))
204
+ yield StreamSlice(
205
+ partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
206
+ )
207
+
208
+ def _ensure_partition_limit(self) -> None:
209
+ """
210
+ Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
211
+ """
212
+ while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
213
+ self._over_limit += 1
214
+ oldest_partition = self._cursor_per_partition.popitem(last=False)[
215
+ 0
216
+ ] # Remove the oldest partition
217
+ logger.warning(
218
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
219
+ )
220
+
221
+ def limit_reached(self) -> bool:
222
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
223
+
224
+ def _set_initial_state(self, stream_state: StreamState) -> None:
225
+ """
226
+ Initialize the cursor's state using the provided `stream_state`.
227
+
228
+ This method supports global and per-partition state initialization.
229
+
230
+ - **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
231
+ The `global state` holds a single cursor position representing the latest processed record across all partitions.
232
+
233
+ - **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
234
+ This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
235
+
236
+ - **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
237
+
238
+ - **Parent State**: (if available) Used to initialize partition routers based on parent streams.
239
+
240
+ Args:
241
+ stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
242
+ {
243
+ "states": [
244
+ {
245
+ "partition": {
246
+ "partition_key": "value"
247
+ },
248
+ "cursor": {
249
+ "last_updated": "2023-05-27T00:00:00Z"
250
+ }
251
+ }
252
+ ],
253
+ "state": {
254
+ "last_updated": "2023-05-27T00:00:00Z"
255
+ },
256
+ lookback_window: 10,
257
+ "parent_state": {
258
+ "parent_stream_name": {
259
+ "last_updated": "2023-05-27T00:00:00Z"
260
+ }
261
+ }
262
+ }
263
+ """
264
+ if not stream_state:
265
+ return
266
+
267
+ if self._PERPARTITION_STATE_KEY not in stream_state:
268
+ # We assume that `stream_state` is in a global format that can be applied to all partitions.
269
+ # Example: {"global_state_format_key": "global_state_format_value"}
270
+ self._global_cursor = deepcopy(stream_state)
271
+ self._new_global_cursor = deepcopy(stream_state)
272
+
273
+ else:
274
+ self._lookback_window = int(stream_state.get("lookback_window", 0))
275
+
276
+ for state in stream_state[self._PERPARTITION_STATE_KEY]:
277
+ self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
278
+ self._create_cursor(state["cursor"])
279
+ )
280
+ self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
281
+ threading.Semaphore(0)
282
+ )
283
+
284
+ # set default state for missing partitions if it is per partition with fallback to global
285
+ if self._GLOBAL_STATE_KEY in stream_state:
286
+ self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
287
+ self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
288
+
289
+ # Set parent state for partition routers based on parent streams
290
+ self._partition_router.set_initial_state(stream_state)
291
+
292
+ def observe(self, record: Record) -> None:
293
+ if not record.associated_slice:
294
+ raise ValueError(
295
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
296
+ )
297
+ self._cursor_per_partition[
298
+ self._to_partition_key(record.associated_slice.partition)
299
+ ].observe(record)
300
+
301
+ def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
302
+ return self._partition_serializer.to_partition_key(partition)
303
+
304
+ def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
305
+ return self._partition_serializer.to_partition(partition_key)
306
+
307
+ def _create_cursor(
308
+ self, cursor_state: Any, runtime_lookback_window: Any = None
309
+ ) -> ConcurrentCursor:
310
+ if runtime_lookback_window:
311
+ runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
312
+ cursor = self._cursor_factory.create(
313
+ stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
314
+ )
315
+ return cursor
316
+
317
+ def should_be_synced(self, record: Record) -> bool:
318
+ return self._get_cursor(record).should_be_synced(record)
319
+
320
+ def _get_cursor(self, record: Record) -> ConcurrentCursor:
321
+ if not record.associated_slice:
322
+ raise ValueError(
323
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
324
+ )
325
+ partition_key = self._to_partition_key(record.associated_slice.partition)
326
+ if partition_key not in self._cursor_per_partition:
327
+ raise ValueError(
328
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
329
+ )
330
+ cursor = self._cursor_per_partition[partition_key]
331
+ return cursor
@@ -64,6 +64,9 @@ class Timer:
64
64
  else:
65
65
  raise RuntimeError("Global substream cursor timer not started")
66
66
 
67
+ def is_running(self) -> bool:
68
+ return self._start is not None
69
+
67
70
 
68
71
  class GlobalSubstreamCursor(DeclarativeCursor):
69
72
  """
@@ -303,6 +303,21 @@ class PerPartitionCursor(DeclarativeCursor):
303
303
  raise ValueError("A partition needs to be provided in order to get request body json")
304
304
 
305
305
  def should_be_synced(self, record: Record) -> bool:
306
+ if (
307
+ record.associated_slice
308
+ and self._to_partition_key(record.associated_slice.partition)
309
+ not in self._cursor_per_partition
310
+ ):
311
+ partition_state = (
312
+ self._state_to_migrate_from
313
+ if self._state_to_migrate_from
314
+ else self._NO_CURSOR_STATE
315
+ )
316
+ cursor = self._create_cursor(partition_state)
317
+
318
+ self._cursor_per_partition[
319
+ self._to_partition_key(record.associated_slice.partition)
320
+ ] = cursor
306
321
  return self._get_cursor(record).should_be_synced(
307
322
  self._convert_record_to_cursor_record(record)
308
323
  )
@@ -22,7 +22,6 @@ from airbyte_cdk.models import (
22
22
  ConnectorSpecification,
23
23
  FailureType,
24
24
  )
25
- from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
26
25
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
27
26
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
28
27
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -108,7 +107,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
108
107
  if "type" not in check:
109
108
  check["type"] = "CheckStream"
110
109
  check_stream = self._constructor.create_component(
111
- COMPONENTS_CHECKER_TYPE_MAPPING[check["type"]],
110
+ CheckStreamModel,
112
111
  check,
113
112
  dict(),
114
113
  emit_connector_builder_messages=self._emit_connector_builder_messages,
@@ -52,15 +52,6 @@ class CheckStream(BaseModel):
52
52
  )
53
53
 
54
54
 
55
- class CheckDynamicStream(BaseModel):
56
- type: Literal["CheckDynamicStream"]
57
- stream_count: int = Field(
58
- ...,
59
- description="Numbers of the streams to try reading from when running a check operation.",
60
- title="Stream Count",
61
- )
62
-
63
-
64
55
  class ConcurrencyLevel(BaseModel):
65
56
  type: Optional[Literal["ConcurrencyLevel"]] = None
66
57
  default_concurrency: Union[int, str] = Field(
@@ -490,24 +481,12 @@ class RefreshTokenUpdater(BaseModel):
490
481
 
491
482
  class OAuthAuthenticator(BaseModel):
492
483
  type: Literal["OAuthAuthenticator"]
493
- client_id_name: Optional[str] = Field(
494
- "client_id",
495
- description="The name of the property to use to refresh the `access_token`.",
496
- examples=["custom_app_id"],
497
- title="Client ID Property Name",
498
- )
499
484
  client_id: str = Field(
500
485
  ...,
501
486
  description="The OAuth client ID. Fill it in the user inputs.",
502
487
  examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
503
488
  title="Client ID",
504
489
  )
505
- client_secret_name: Optional[str] = Field(
506
- "client_secret",
507
- description="The name of the property to use to refresh the `access_token`.",
508
- examples=["custom_app_secret"],
509
- title="Client Secret Property Name",
510
- )
511
490
  client_secret: str = Field(
512
491
  ...,
513
492
  description="The OAuth client secret. Fill it in the user inputs.",
@@ -517,12 +496,6 @@ class OAuthAuthenticator(BaseModel):
517
496
  ],
518
497
  title="Client Secret",
519
498
  )
520
- refresh_token_name: Optional[str] = Field(
521
- "refresh_token",
522
- description="The name of the property to use to refresh the `access_token`.",
523
- examples=["custom_app_refresh_value"],
524
- title="Refresh Token Property Name",
525
- )
526
499
  refresh_token: Optional[str] = Field(
527
500
  None,
528
501
  description="Credential artifact used to get a new access token.",
@@ -556,12 +529,6 @@ class OAuthAuthenticator(BaseModel):
556
529
  examples=["expires_in"],
557
530
  title="Token Expiry Property Name",
558
531
  )
559
- grant_type_name: Optional[str] = Field(
560
- "grant_type",
561
- description="The name of the property to use to refresh the `access_token`.",
562
- examples=["custom_grant_type"],
563
- title="Grant Type Property Name",
564
- )
565
532
  grant_type: Optional[str] = Field(
566
533
  "refresh_token",
567
534
  description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
@@ -580,17 +547,6 @@ class OAuthAuthenticator(BaseModel):
580
547
  ],
581
548
  title="Refresh Request Body",
582
549
  )
583
- refresh_request_headers: Optional[Dict[str, Any]] = Field(
584
- None,
585
- description="Headers of the request sent to get a new access token.",
586
- examples=[
587
- {
588
- "Authorization": "<AUTH_TOKEN>",
589
- "Content-Type": "application/x-www-form-urlencoded",
590
- }
591
- ],
592
- title="Refresh Request Headers",
593
- )
594
550
  scopes: Optional[List[str]] = Field(
595
551
  None,
596
552
  description="List of scopes that should be granted to the access token.",
@@ -719,7 +675,6 @@ class HttpResponseFilter(BaseModel):
719
675
  class TypesMap(BaseModel):
720
676
  target_type: Union[str, List[str]]
721
677
  current_type: Union[str, List[str]]
722
- condition: Optional[str]
723
678
 
724
679
 
725
680
  class SchemaTypeIdentifier(BaseModel):
@@ -782,43 +737,33 @@ class KeysToSnakeCase(BaseModel):
782
737
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
783
738
 
784
739
 
785
- class FlattenFields(BaseModel):
786
- type: Literal["FlattenFields"]
787
- flatten_lists: Optional[bool] = Field(
788
- True,
789
- description="Whether to flatten lists or leave it as is. Default is True.",
790
- title="Flatten Lists",
791
- )
792
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
793
-
794
-
795
740
  class KeysReplace(BaseModel):
796
741
  type: Literal["KeysReplace"]
797
742
  old: str = Field(
798
743
  ...,
799
744
  description="Old value to replace.",
800
- examples=[
801
- " ",
802
- "{{ record.id }}",
803
- "{{ config['id'] }}",
804
- "{{ stream_slice['id'] }}",
805
- ],
745
+ examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
806
746
  title="Old value",
807
747
  )
808
748
  new: str = Field(
809
749
  ...,
810
750
  description="New value to set.",
811
- examples=[
812
- "_",
813
- "{{ record.id }}",
814
- "{{ config['id'] }}",
815
- "{{ stream_slice['id'] }}",
816
- ],
751
+ examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
817
752
  title="New value",
818
753
  )
819
754
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
820
755
 
821
756
 
757
+ class FlattenFields(BaseModel):
758
+ type: Literal["FlattenFields"]
759
+ flatten_lists: Optional[bool] = Field(
760
+ True,
761
+ description="Whether to flatten lists or leave it as is. Default is True.",
762
+ title="Flatten Lists",
763
+ )
764
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
765
+
766
+
822
767
  class IterableDecoder(BaseModel):
823
768
  type: Literal["IterableDecoder"]
824
769
 
@@ -904,8 +849,8 @@ class OauthConnectorInputSpecification(BaseModel):
904
849
  ...,
905
850
  description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
906
851
  examples=[
907
- "https://domain.host.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",
908
- "https://endpoint.host.com/oauth2/authorize?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{scope_key}}={{{{scope_value}} | urlEncoder}}&{{state_key}}={{state_value}}&subdomain={{subdomain}}",
852
+ "https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
853
+ "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}",
909
854
  ],
910
855
  title="Consent URL",
911
856
  )
@@ -919,18 +864,14 @@ class OauthConnectorInputSpecification(BaseModel):
919
864
  ...,
920
865
  description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
921
866
  examples=[
922
- "https://auth.host.com/oauth2/token?{{client_id_key}}={{client_id_value}}&{{client_secret_key}}={{client_secret_value}}&{{auth_code_key}}={{auth_code_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}"
867
+ "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
923
868
  ],
924
869
  title="Access Token URL",
925
870
  )
926
871
  access_token_headers: Optional[Dict[str, Any]] = Field(
927
872
  None,
928
873
  description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
929
- examples=[
930
- {
931
- "Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}"
932
- }
933
- ],
874
+ examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}],
934
875
  title="Access Token Headers",
935
876
  )
936
877
  access_token_params: Optional[Dict[str, Any]] = Field(
@@ -938,15 +879,15 @@ class OauthConnectorInputSpecification(BaseModel):
938
879
  description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
939
880
  examples=[
940
881
  {
941
- "{{ auth_code_key }}": "{{ auth_code_value }}",
942
- "{{ client_id_key }}": "{{ client_id_value }}",
943
- "{{ client_secret_key }}": "{{ client_secret_value }}",
882
+ "{auth_code_key}": "{{auth_code_key}}",
883
+ "{client_id_key}": "{{client_id_key}}",
884
+ "{client_secret_key}": "{{client_secret_key}}",
944
885
  }
945
886
  ],
946
887
  title="Access Token Query Params (Json Encoded)",
947
888
  )
948
- extract_output: Optional[List[str]] = Field(
949
- None,
889
+ extract_output: List[str] = Field(
890
+ ...,
950
891
  description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
951
892
  examples=[["access_token", "refresh_token", "other_field"]],
952
893
  title="Extract Output",
@@ -1015,7 +956,7 @@ class OAuthConfigSpecification(BaseModel):
1015
956
  )
1016
957
  oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
1017
958
  None,
1018
- description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
959
+ description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1019
960
  title="DeclarativeOAuth Connector Specification",
1020
961
  )
1021
962
  complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
@@ -1222,14 +1163,6 @@ class LegacySessionTokenAuthenticator(BaseModel):
1222
1163
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1223
1164
 
1224
1165
 
1225
- class JsonParser(BaseModel):
1226
- class Config:
1227
- extra = Extra.allow
1228
-
1229
- type: Literal["JsonParser"]
1230
- encoding: Optional[str] = "utf-8"
1231
-
1232
-
1233
1166
  class JsonLineParser(BaseModel):
1234
1167
  type: Literal["JsonLineParser"]
1235
1168
  encoding: Optional[str] = "utf-8"
@@ -1628,7 +1561,7 @@ class RecordSelector(BaseModel):
1628
1561
 
1629
1562
  class GzipParser(BaseModel):
1630
1563
  type: Literal["GzipParser"]
1631
- inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1564
+ inner_parser: Union[JsonLineParser, CsvParser]
1632
1565
 
1633
1566
 
1634
1567
  class Spec(BaseModel):
@@ -1663,7 +1596,7 @@ class CompositeErrorHandler(BaseModel):
1663
1596
 
1664
1597
  class CompositeRawDecoder(BaseModel):
1665
1598
  type: Literal["CompositeRawDecoder"]
1666
- parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1599
+ parser: Union[GzipParser, JsonLineParser, CsvParser]
1667
1600
 
1668
1601
 
1669
1602
  class DeclarativeSource1(BaseModel):
@@ -1671,7 +1604,7 @@ class DeclarativeSource1(BaseModel):
1671
1604
  extra = Extra.forbid
1672
1605
 
1673
1606
  type: Literal["DeclarativeSource"]
1674
- check: Union[CheckStream, CheckDynamicStream]
1607
+ check: CheckStream
1675
1608
  streams: List[DeclarativeStream]
1676
1609
  dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
1677
1610
  version: str = Field(
@@ -1697,7 +1630,7 @@ class DeclarativeSource2(BaseModel):
1697
1630
  extra = Extra.forbid
1698
1631
 
1699
1632
  type: Literal["DeclarativeSource"]
1700
- check: Union[CheckStream, CheckDynamicStream]
1633
+ check: CheckStream
1701
1634
  streams: Optional[List[DeclarativeStream]] = None
1702
1635
  dynamic_streams: List[DynamicDeclarativeStream]
1703
1636
  version: str = Field(
@@ -2107,10 +2040,6 @@ class AsyncRetriever(BaseModel):
2107
2040
  ...,
2108
2041
  description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
2109
2042
  )
2110
- url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2111
- None,
2112
- description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
2113
- )
2114
2043
  download_requester: Union[CustomRequester, HttpRequester] = Field(
2115
2044
  ...,
2116
2045
  description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",