airbyte-cdk 6.20.0__py3-none-any.whl → 6.20.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. airbyte_cdk/sources/declarative/auth/oauth.py +0 -34
  2. airbyte_cdk/sources/declarative/checks/__init__.py +2 -18
  3. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +80 -16
  4. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +21 -93
  5. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +0 -43
  6. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  7. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  8. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +331 -0
  9. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  10. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +15 -0
  11. airbyte_cdk/sources/declarative/manifest_declarative_source.py +1 -2
  12. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +26 -96
  13. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +105 -111
  14. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +4 -33
  15. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  16. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -11
  17. airbyte_cdk/sources/file_based/exceptions.py +0 -34
  18. airbyte_cdk/sources/file_based/file_based_source.py +5 -28
  19. airbyte_cdk/sources/file_based/file_based_stream_reader.py +4 -18
  20. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +2 -25
  21. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +2 -30
  22. airbyte_cdk/sources/streams/concurrent/cursor.py +30 -21
  23. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +4 -33
  24. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -42
  25. airbyte_cdk/sources/types.py +0 -3
  26. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/METADATA +1 -1
  27. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/RECORD +30 -31
  28. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +0 -51
  29. airbyte_cdk/sources/declarative/requesters/README.md +0 -56
  30. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/LICENSE.txt +0 -0
  31. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/WHEEL +0 -0
  32. {airbyte_cdk-6.20.0.dist-info → airbyte_cdk-6.20.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,331 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import copy
6
+ import logging
7
+ import threading
8
+ from collections import OrderedDict
9
+ from copy import deepcopy
10
+ from datetime import timedelta
11
+ from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
12
+
13
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
+ from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
15
+ Timer,
16
+ iterate_with_last_flag_and_state,
17
+ )
18
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
19
+ from airbyte_cdk.sources.message import MessageRepository
20
+ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
21
+ PerPartitionKeySerializer,
22
+ )
23
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
24
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
+ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
26
+
27
+ logger = logging.getLogger("airbyte")
28
+
29
+
30
+ class ConcurrentCursorFactory:
31
+ def __init__(self, create_function: Callable[..., ConcurrentCursor]):
32
+ self._create_function = create_function
33
+
34
+ def create(
35
+ self, stream_state: Mapping[str, Any], runtime_lookback_window: Any
36
+ ) -> ConcurrentCursor:
37
+ return self._create_function(
38
+ stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
39
+ )
40
+
41
+
42
+ class ConcurrentPerPartitionCursor(Cursor):
43
+ """
44
+ Manages state per partition when a stream has many partitions, preventing data loss or duplication.
45
+
46
+ Attributes:
47
+ DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
48
+
49
+ - **Partition Limitation Logic**
50
+ Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
51
+
52
+ - **Global Cursor Fallback**
53
+ New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
54
+ """
55
+
56
+ DEFAULT_MAX_PARTITIONS_NUMBER = 10000
57
+ _NO_STATE: Mapping[str, Any] = {}
58
+ _NO_CURSOR_STATE: Mapping[str, Any] = {}
59
+ _GLOBAL_STATE_KEY = "state"
60
+ _PERPARTITION_STATE_KEY = "states"
61
+ _KEY = 0
62
+ _VALUE = 1
63
+
64
+ def __init__(
65
+ self,
66
+ cursor_factory: ConcurrentCursorFactory,
67
+ partition_router: PartitionRouter,
68
+ stream_name: str,
69
+ stream_namespace: Optional[str],
70
+ stream_state: Any,
71
+ message_repository: MessageRepository,
72
+ connector_state_manager: ConnectorStateManager,
73
+ cursor_field: CursorField,
74
+ ) -> None:
75
+ self._global_cursor: Optional[StreamState] = {}
76
+ self._stream_name = stream_name
77
+ self._stream_namespace = stream_namespace
78
+ self._message_repository = message_repository
79
+ self._connector_state_manager = connector_state_manager
80
+ self._cursor_field = cursor_field
81
+
82
+ self._cursor_factory = cursor_factory
83
+ self._partition_router = partition_router
84
+
85
+ # The dict is ordered to ensure that once the maximum number of partitions is reached,
86
+ # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
87
+ self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
88
+ self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
89
+ self._finished_partitions: set[str] = set()
90
+ self._lock = threading.Lock()
91
+ self._timer = Timer()
92
+ self._new_global_cursor: Optional[StreamState] = None
93
+ self._lookback_window: int = 0
94
+ self._parent_state: Optional[StreamState] = None
95
+ self._over_limit: int = 0
96
+ self._partition_serializer = PerPartitionKeySerializer()
97
+
98
+ self._set_initial_state(stream_state)
99
+
100
+ @property
101
+ def cursor_field(self) -> CursorField:
102
+ return self._cursor_field
103
+
104
+ @property
105
+ def state(self) -> MutableMapping[str, Any]:
106
+ states = []
107
+ for partition_tuple, cursor in self._cursor_per_partition.items():
108
+ if cursor.state:
109
+ states.append(
110
+ {
111
+ "partition": self._to_dict(partition_tuple),
112
+ "cursor": copy.deepcopy(cursor.state),
113
+ }
114
+ )
115
+ state: dict[str, Any] = {self._PERPARTITION_STATE_KEY: states}
116
+
117
+ if self._global_cursor:
118
+ state[self._GLOBAL_STATE_KEY] = self._global_cursor
119
+ if self._lookback_window is not None:
120
+ state["lookback_window"] = self._lookback_window
121
+ if self._parent_state is not None:
122
+ state["parent_state"] = self._parent_state
123
+ return state
124
+
125
+ def close_partition(self, partition: Partition) -> None:
126
+ # Attempt to retrieve the stream slice
127
+ stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
128
+
129
+ # Ensure stream_slice is not None
130
+ if stream_slice is None:
131
+ raise ValueError("stream_slice cannot be None")
132
+
133
+ partition_key = self._to_partition_key(stream_slice.partition)
134
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
135
+ with self._lock:
136
+ self._semaphore_per_partition[partition_key].acquire()
137
+ cursor = self._cursor_per_partition[partition_key]
138
+ if (
139
+ partition_key in self._finished_partitions
140
+ and self._semaphore_per_partition[partition_key]._value == 0
141
+ ):
142
+ if (
143
+ self._new_global_cursor is None
144
+ or self._new_global_cursor[self.cursor_field.cursor_field_key]
145
+ < cursor.state[self.cursor_field.cursor_field_key]
146
+ ):
147
+ self._new_global_cursor = copy.deepcopy(cursor.state)
148
+
149
+ def ensure_at_least_one_state_emitted(self) -> None:
150
+ """
151
+ The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
152
+ called.
153
+ """
154
+ if not any(
155
+ semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
156
+ ):
157
+ self._global_cursor = self._new_global_cursor
158
+ self._lookback_window = self._timer.finish()
159
+ self._parent_state = self._partition_router.get_stream_state()
160
+ self._emit_state_message()
161
+
162
+ def _emit_state_message(self) -> None:
163
+ self._connector_state_manager.update_state_for_stream(
164
+ self._stream_name,
165
+ self._stream_namespace,
166
+ self.state,
167
+ )
168
+ state_message = self._connector_state_manager.create_state_message(
169
+ self._stream_name, self._stream_namespace
170
+ )
171
+ self._message_repository.emit_message(state_message)
172
+
173
+ def stream_slices(self) -> Iterable[StreamSlice]:
174
+ if self._timer.is_running():
175
+ raise RuntimeError("stream_slices has been executed more than once.")
176
+
177
+ slices = self._partition_router.stream_slices()
178
+ self._timer.start()
179
+ for partition in slices:
180
+ yield from self._generate_slices_from_partition(partition)
181
+
182
+ def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
183
+ # Ensure the maximum number of partitions is not exceeded
184
+ self._ensure_partition_limit()
185
+
186
+ cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
187
+ if not cursor:
188
+ cursor = self._create_cursor(
189
+ self._global_cursor,
190
+ self._lookback_window if self._global_cursor else self._NO_CURSOR_STATE,
191
+ )
192
+ self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
193
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
194
+ threading.Semaphore(0)
195
+ )
196
+
197
+ for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
198
+ cursor.stream_slices(),
199
+ lambda: None,
200
+ ):
201
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
202
+ if is_last_slice:
203
+ self._finished_partitions.add(self._to_partition_key(partition.partition))
204
+ yield StreamSlice(
205
+ partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
206
+ )
207
+
208
+ def _ensure_partition_limit(self) -> None:
209
+ """
210
+ Ensure the maximum number of partitions is not exceeded. If so, the oldest added partition will be dropped.
211
+ """
212
+ while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
213
+ self._over_limit += 1
214
+ oldest_partition = self._cursor_per_partition.popitem(last=False)[
215
+ 0
216
+ ] # Remove the oldest partition
217
+ logger.warning(
218
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
219
+ )
220
+
221
+ def limit_reached(self) -> bool:
222
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
223
+
224
+ def _set_initial_state(self, stream_state: StreamState) -> None:
225
+ """
226
+ Initialize the cursor's state using the provided `stream_state`.
227
+
228
+ This method supports global and per-partition state initialization.
229
+
230
+ - **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
231
+ The `global state` holds a single cursor position representing the latest processed record across all partitions.
232
+
233
+ - **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
234
+ This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
235
+
236
+ - **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
237
+
238
+ - **Parent State**: (if available) Used to initialize partition routers based on parent streams.
239
+
240
+ Args:
241
+ stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
242
+ {
243
+ "states": [
244
+ {
245
+ "partition": {
246
+ "partition_key": "value"
247
+ },
248
+ "cursor": {
249
+ "last_updated": "2023-05-27T00:00:00Z"
250
+ }
251
+ }
252
+ ],
253
+ "state": {
254
+ "last_updated": "2023-05-27T00:00:00Z"
255
+ },
256
+ lookback_window: 10,
257
+ "parent_state": {
258
+ "parent_stream_name": {
259
+ "last_updated": "2023-05-27T00:00:00Z"
260
+ }
261
+ }
262
+ }
263
+ """
264
+ if not stream_state:
265
+ return
266
+
267
+ if self._PERPARTITION_STATE_KEY not in stream_state:
268
+ # We assume that `stream_state` is in a global format that can be applied to all partitions.
269
+ # Example: {"global_state_format_key": "global_state_format_value"}
270
+ self._global_cursor = deepcopy(stream_state)
271
+ self._new_global_cursor = deepcopy(stream_state)
272
+
273
+ else:
274
+ self._lookback_window = int(stream_state.get("lookback_window", 0))
275
+
276
+ for state in stream_state[self._PERPARTITION_STATE_KEY]:
277
+ self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
278
+ self._create_cursor(state["cursor"])
279
+ )
280
+ self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
281
+ threading.Semaphore(0)
282
+ )
283
+
284
+ # set default state for missing partitions if it is per partition with fallback to global
285
+ if self._GLOBAL_STATE_KEY in stream_state:
286
+ self._global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
287
+ self._new_global_cursor = deepcopy(stream_state[self._GLOBAL_STATE_KEY])
288
+
289
+ # Set parent state for partition routers based on parent streams
290
+ self._partition_router.set_initial_state(stream_state)
291
+
292
+ def observe(self, record: Record) -> None:
293
+ if not record.associated_slice:
294
+ raise ValueError(
295
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
296
+ )
297
+ self._cursor_per_partition[
298
+ self._to_partition_key(record.associated_slice.partition)
299
+ ].observe(record)
300
+
301
+ def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
302
+ return self._partition_serializer.to_partition_key(partition)
303
+
304
+ def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
305
+ return self._partition_serializer.to_partition(partition_key)
306
+
307
+ def _create_cursor(
308
+ self, cursor_state: Any, runtime_lookback_window: Any = None
309
+ ) -> ConcurrentCursor:
310
+ if runtime_lookback_window:
311
+ runtime_lookback_window = timedelta(seconds=runtime_lookback_window)
312
+ cursor = self._cursor_factory.create(
313
+ stream_state=deepcopy(cursor_state), runtime_lookback_window=runtime_lookback_window
314
+ )
315
+ return cursor
316
+
317
+ def should_be_synced(self, record: Record) -> bool:
318
+ return self._get_cursor(record).should_be_synced(record)
319
+
320
+ def _get_cursor(self, record: Record) -> ConcurrentCursor:
321
+ if not record.associated_slice:
322
+ raise ValueError(
323
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
324
+ )
325
+ partition_key = self._to_partition_key(record.associated_slice.partition)
326
+ if partition_key not in self._cursor_per_partition:
327
+ raise ValueError(
328
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
329
+ )
330
+ cursor = self._cursor_per_partition[partition_key]
331
+ return cursor
@@ -64,6 +64,9 @@ class Timer:
64
64
  else:
65
65
  raise RuntimeError("Global substream cursor timer not started")
66
66
 
67
+ def is_running(self) -> bool:
68
+ return self._start is not None
69
+
67
70
 
68
71
  class GlobalSubstreamCursor(DeclarativeCursor):
69
72
  """
@@ -303,6 +303,21 @@ class PerPartitionCursor(DeclarativeCursor):
303
303
  raise ValueError("A partition needs to be provided in order to get request body json")
304
304
 
305
305
  def should_be_synced(self, record: Record) -> bool:
306
+ if (
307
+ record.associated_slice
308
+ and self._to_partition_key(record.associated_slice.partition)
309
+ not in self._cursor_per_partition
310
+ ):
311
+ partition_state = (
312
+ self._state_to_migrate_from
313
+ if self._state_to_migrate_from
314
+ else self._NO_CURSOR_STATE
315
+ )
316
+ cursor = self._create_cursor(partition_state)
317
+
318
+ self._cursor_per_partition[
319
+ self._to_partition_key(record.associated_slice.partition)
320
+ ] = cursor
306
321
  return self._get_cursor(record).should_be_synced(
307
322
  self._convert_record_to_cursor_record(record)
308
323
  )
@@ -22,7 +22,6 @@ from airbyte_cdk.models import (
22
22
  ConnectorSpecification,
23
23
  FailureType,
24
24
  )
25
- from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
26
25
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
27
26
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
28
27
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -108,7 +107,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
108
107
  if "type" not in check:
109
108
  check["type"] = "CheckStream"
110
109
  check_stream = self._constructor.create_component(
111
- COMPONENTS_CHECKER_TYPE_MAPPING[check["type"]],
110
+ CheckStreamModel,
112
111
  check,
113
112
  dict(),
114
113
  emit_connector_builder_messages=self._emit_connector_builder_messages,
@@ -52,15 +52,6 @@ class CheckStream(BaseModel):
52
52
  )
53
53
 
54
54
 
55
- class CheckDynamicStream(BaseModel):
56
- type: Literal["CheckDynamicStream"]
57
- stream_count: int = Field(
58
- ...,
59
- description="Numbers of the streams to try reading from when running a check operation.",
60
- title="Stream Count",
61
- )
62
-
63
-
64
55
  class ConcurrencyLevel(BaseModel):
65
56
  type: Optional[Literal["ConcurrencyLevel"]] = None
66
57
  default_concurrency: Union[int, str] = Field(
@@ -490,24 +481,12 @@ class RefreshTokenUpdater(BaseModel):
490
481
 
491
482
  class OAuthAuthenticator(BaseModel):
492
483
  type: Literal["OAuthAuthenticator"]
493
- client_id_name: Optional[str] = Field(
494
- "client_id",
495
- description="The name of the property to use to refresh the `access_token`.",
496
- examples=["custom_app_id"],
497
- title="Client ID Property Name",
498
- )
499
484
  client_id: str = Field(
500
485
  ...,
501
486
  description="The OAuth client ID. Fill it in the user inputs.",
502
487
  examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
503
488
  title="Client ID",
504
489
  )
505
- client_secret_name: Optional[str] = Field(
506
- "client_secret",
507
- description="The name of the property to use to refresh the `access_token`.",
508
- examples=["custom_app_secret"],
509
- title="Client Secret Property Name",
510
- )
511
490
  client_secret: str = Field(
512
491
  ...,
513
492
  description="The OAuth client secret. Fill it in the user inputs.",
@@ -517,12 +496,6 @@ class OAuthAuthenticator(BaseModel):
517
496
  ],
518
497
  title="Client Secret",
519
498
  )
520
- refresh_token_name: Optional[str] = Field(
521
- "refresh_token",
522
- description="The name of the property to use to refresh the `access_token`.",
523
- examples=["custom_app_refresh_value"],
524
- title="Refresh Token Property Name",
525
- )
526
499
  refresh_token: Optional[str] = Field(
527
500
  None,
528
501
  description="Credential artifact used to get a new access token.",
@@ -556,12 +529,6 @@ class OAuthAuthenticator(BaseModel):
556
529
  examples=["expires_in"],
557
530
  title="Token Expiry Property Name",
558
531
  )
559
- grant_type_name: Optional[str] = Field(
560
- "grant_type",
561
- description="The name of the property to use to refresh the `access_token`.",
562
- examples=["custom_grant_type"],
563
- title="Grant Type Property Name",
564
- )
565
532
  grant_type: Optional[str] = Field(
566
533
  "refresh_token",
567
534
  description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
@@ -580,17 +547,6 @@ class OAuthAuthenticator(BaseModel):
580
547
  ],
581
548
  title="Refresh Request Body",
582
549
  )
583
- refresh_request_headers: Optional[Dict[str, Any]] = Field(
584
- None,
585
- description="Headers of the request sent to get a new access token.",
586
- examples=[
587
- {
588
- "Authorization": "<AUTH_TOKEN>",
589
- "Content-Type": "application/x-www-form-urlencoded",
590
- }
591
- ],
592
- title="Refresh Request Headers",
593
- )
594
550
  scopes: Optional[List[str]] = Field(
595
551
  None,
596
552
  description="List of scopes that should be granted to the access token.",
@@ -781,43 +737,33 @@ class KeysToSnakeCase(BaseModel):
781
737
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
782
738
 
783
739
 
784
- class FlattenFields(BaseModel):
785
- type: Literal["FlattenFields"]
786
- flatten_lists: Optional[bool] = Field(
787
- True,
788
- description="Whether to flatten lists or leave it as is. Default is True.",
789
- title="Flatten Lists",
790
- )
791
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
792
-
793
-
794
740
  class KeysReplace(BaseModel):
795
741
  type: Literal["KeysReplace"]
796
742
  old: str = Field(
797
743
  ...,
798
744
  description="Old value to replace.",
799
- examples=[
800
- " ",
801
- "{{ record.id }}",
802
- "{{ config['id'] }}",
803
- "{{ stream_slice['id'] }}",
804
- ],
745
+ examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
805
746
  title="Old value",
806
747
  )
807
748
  new: str = Field(
808
749
  ...,
809
750
  description="New value to set.",
810
- examples=[
811
- "_",
812
- "{{ record.id }}",
813
- "{{ config['id'] }}",
814
- "{{ stream_slice['id'] }}",
815
- ],
751
+ examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
816
752
  title="New value",
817
753
  )
818
754
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
819
755
 
820
756
 
757
+ class FlattenFields(BaseModel):
758
+ type: Literal["FlattenFields"]
759
+ flatten_lists: Optional[bool] = Field(
760
+ True,
761
+ description="Whether to flatten lists or leave it as is. Default is True.",
762
+ title="Flatten Lists",
763
+ )
764
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
765
+
766
+
821
767
  class IterableDecoder(BaseModel):
822
768
  type: Literal["IterableDecoder"]
823
769
 
@@ -903,8 +849,8 @@ class OauthConnectorInputSpecification(BaseModel):
903
849
  ...,
904
850
  description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
905
851
  examples=[
906
- "https://domain.host.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",
907
- "https://endpoint.host.com/oauth2/authorize?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{scope_key}}={{{{scope_value}} | urlEncoder}}&{{state_key}}={{state_value}}&subdomain={{subdomain}}",
852
+ "https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
853
+ "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}",
908
854
  ],
909
855
  title="Consent URL",
910
856
  )
@@ -918,18 +864,14 @@ class OauthConnectorInputSpecification(BaseModel):
918
864
  ...,
919
865
  description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
920
866
  examples=[
921
- "https://auth.host.com/oauth2/token?{{client_id_key}}={{client_id_value}}&{{client_secret_key}}={{client_secret_value}}&{{auth_code_key}}={{auth_code_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}"
867
+ "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
922
868
  ],
923
869
  title="Access Token URL",
924
870
  )
925
871
  access_token_headers: Optional[Dict[str, Any]] = Field(
926
872
  None,
927
873
  description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
928
- examples=[
929
- {
930
- "Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}"
931
- }
932
- ],
874
+ examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}],
933
875
  title="Access Token Headers",
934
876
  )
935
877
  access_token_params: Optional[Dict[str, Any]] = Field(
@@ -937,15 +879,15 @@ class OauthConnectorInputSpecification(BaseModel):
937
879
  description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
938
880
  examples=[
939
881
  {
940
- "{{ auth_code_key }}": "{{ auth_code_value }}",
941
- "{{ client_id_key }}": "{{ client_id_value }}",
942
- "{{ client_secret_key }}": "{{ client_secret_value }}",
882
+ "{auth_code_key}": "{{auth_code_key}}",
883
+ "{client_id_key}": "{{client_id_key}}",
884
+ "{client_secret_key}": "{{client_secret_key}}",
943
885
  }
944
886
  ],
945
887
  title="Access Token Query Params (Json Encoded)",
946
888
  )
947
- extract_output: Optional[List[str]] = Field(
948
- None,
889
+ extract_output: List[str] = Field(
890
+ ...,
949
891
  description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
950
892
  examples=[["access_token", "refresh_token", "other_field"]],
951
893
  title="Extract Output",
@@ -1014,7 +956,7 @@ class OAuthConfigSpecification(BaseModel):
1014
956
  )
1015
957
  oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
1016
958
  None,
1017
- description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
959
+ description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1018
960
  title="DeclarativeOAuth Connector Specification",
1019
961
  )
1020
962
  complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
@@ -1221,14 +1163,6 @@ class LegacySessionTokenAuthenticator(BaseModel):
1221
1163
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1222
1164
 
1223
1165
 
1224
- class JsonParser(BaseModel):
1225
- class Config:
1226
- extra = Extra.allow
1227
-
1228
- type: Literal["JsonParser"]
1229
- encoding: Optional[str] = "utf-8"
1230
-
1231
-
1232
1166
  class JsonLineParser(BaseModel):
1233
1167
  type: Literal["JsonLineParser"]
1234
1168
  encoding: Optional[str] = "utf-8"
@@ -1627,7 +1561,7 @@ class RecordSelector(BaseModel):
1627
1561
 
1628
1562
  class GzipParser(BaseModel):
1629
1563
  type: Literal["GzipParser"]
1630
- inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1564
+ inner_parser: Union[JsonLineParser, CsvParser]
1631
1565
 
1632
1566
 
1633
1567
  class Spec(BaseModel):
@@ -1662,7 +1596,7 @@ class CompositeErrorHandler(BaseModel):
1662
1596
 
1663
1597
  class CompositeRawDecoder(BaseModel):
1664
1598
  type: Literal["CompositeRawDecoder"]
1665
- parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1599
+ parser: Union[GzipParser, JsonLineParser, CsvParser]
1666
1600
 
1667
1601
 
1668
1602
  class DeclarativeSource1(BaseModel):
@@ -1670,7 +1604,7 @@ class DeclarativeSource1(BaseModel):
1670
1604
  extra = Extra.forbid
1671
1605
 
1672
1606
  type: Literal["DeclarativeSource"]
1673
- check: Union[CheckStream, CheckDynamicStream]
1607
+ check: CheckStream
1674
1608
  streams: List[DeclarativeStream]
1675
1609
  dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
1676
1610
  version: str = Field(
@@ -1696,7 +1630,7 @@ class DeclarativeSource2(BaseModel):
1696
1630
  extra = Extra.forbid
1697
1631
 
1698
1632
  type: Literal["DeclarativeSource"]
1699
- check: Union[CheckStream, CheckDynamicStream]
1633
+ check: CheckStream
1700
1634
  streams: Optional[List[DeclarativeStream]] = None
1701
1635
  dynamic_streams: List[DynamicDeclarativeStream]
1702
1636
  version: str = Field(
@@ -2106,10 +2040,6 @@ class AsyncRetriever(BaseModel):
2106
2040
  ...,
2107
2041
  description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
2108
2042
  )
2109
- url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2110
- None,
2111
- description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
2112
- )
2113
2043
  download_requester: Union[CustomRequester, HttpRequester] = Field(
2114
2044
  ...,
2115
2045
  description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",