airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.33.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
  4. airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
  5. airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
  6. airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
  7. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
  8. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +145 -43
  9. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +51 -2
  10. airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
  11. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  12. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  13. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
  14. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  15. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  16. airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
  17. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +41 -5
  18. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +313 -30
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +46 -12
  22. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  24. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
  25. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  26. airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
  27. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
  28. airbyte_cdk/sources/http_logger.py +1 -1
  29. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  30. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  31. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  32. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
  33. airbyte_cdk/sources/streams/core.py +6 -6
  34. airbyte_cdk/sources/streams/http/http.py +1 -2
  35. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
  36. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
  37. airbyte_cdk/sources/types.py +4 -2
  38. airbyte_cdk/sources/utils/transform.py +23 -2
  39. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  40. airbyte_cdk/utils/datetime_helpers.py +499 -0
  41. airbyte_cdk/utils/slice_hasher.py +8 -1
  42. airbyte_cdk-6.33.0.dist-info/LICENSE_SHORT +1 -0
  43. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/METADATA +6 -6
  44. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/RECORD +47 -41
  45. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/WHEEL +1 -1
  46. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/LICENSE.txt +0 -0
  47. {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.33.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,400 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import copy
6
+ import logging
7
+ import threading
8
+ from collections import OrderedDict
9
+ from copy import deepcopy
10
+ from datetime import timedelta
11
+ from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
12
+
13
+ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
+ from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
15
+ Timer,
16
+ iterate_with_last_flag_and_state,
17
+ )
18
+ from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
19
+ from airbyte_cdk.sources.message import MessageRepository
20
+ from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
21
+ PerPartitionKeySerializer,
22
+ )
23
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
24
+ from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
+ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
26
+ AbstractStreamStateConverter,
27
+ )
28
+ from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
29
+
30
+ logger = logging.getLogger("airbyte")
31
+
32
+
33
+ class ConcurrentCursorFactory:
34
+ def __init__(self, create_function: Callable[..., ConcurrentCursor]):
35
+ self._create_function = create_function
36
+
37
+ def create(
38
+ self, stream_state: Mapping[str, Any], runtime_lookback_window: Optional[timedelta]
39
+ ) -> ConcurrentCursor:
40
+ return self._create_function(
41
+ stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
42
+ )
43
+
44
+
45
+ class ConcurrentPerPartitionCursor(Cursor):
46
+ """
47
+ Manages state per partition when a stream has many partitions, preventing data loss or duplication.
48
+
49
+ Attributes:
50
+ DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
51
+
52
+ - **Partition Limitation Logic**
53
+ Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
54
+
55
+ - **Global Cursor Fallback**
56
+ New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
57
+
58
+ CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
59
+ """
60
+
61
+ DEFAULT_MAX_PARTITIONS_NUMBER = 10000
62
+ _NO_STATE: Mapping[str, Any] = {}
63
+ _NO_CURSOR_STATE: Mapping[str, Any] = {}
64
+ _GLOBAL_STATE_KEY = "state"
65
+ _PERPARTITION_STATE_KEY = "states"
66
+ _KEY = 0
67
+ _VALUE = 1
68
+
69
+ def __init__(
70
+ self,
71
+ cursor_factory: ConcurrentCursorFactory,
72
+ partition_router: PartitionRouter,
73
+ stream_name: str,
74
+ stream_namespace: Optional[str],
75
+ stream_state: Any,
76
+ message_repository: MessageRepository,
77
+ connector_state_manager: ConnectorStateManager,
78
+ connector_state_converter: AbstractStreamStateConverter,
79
+ cursor_field: CursorField,
80
+ ) -> None:
81
+ self._global_cursor: Optional[StreamState] = {}
82
+ self._stream_name = stream_name
83
+ self._stream_namespace = stream_namespace
84
+ self._message_repository = message_repository
85
+ self._connector_state_manager = connector_state_manager
86
+ self._connector_state_converter = connector_state_converter
87
+ self._cursor_field = cursor_field
88
+
89
+ self._cursor_factory = cursor_factory
90
+ self._partition_router = partition_router
91
+
92
+ # The dict is ordered to ensure that once the maximum number of partitions is reached,
93
+ # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
94
+ self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
95
+ self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
96
+ self._finished_partitions: set[str] = set()
97
+ self._lock = threading.Lock()
98
+ self._timer = Timer()
99
+ self._new_global_cursor: Optional[StreamState] = None
100
+ self._lookback_window: int = 0
101
+ self._parent_state: Optional[StreamState] = None
102
+ self._over_limit: int = 0
103
+ self._use_global_cursor: bool = False
104
+ self._partition_serializer = PerPartitionKeySerializer()
105
+
106
+ self._set_initial_state(stream_state)
107
+
108
+ @property
109
+ def cursor_field(self) -> CursorField:
110
+ return self._cursor_field
111
+
112
+ @property
113
+ def state(self) -> MutableMapping[str, Any]:
114
+ state: dict[str, Any] = {"use_global_cursor": self._use_global_cursor}
115
+ if not self._use_global_cursor:
116
+ states = []
117
+ for partition_tuple, cursor in self._cursor_per_partition.items():
118
+ if cursor.state:
119
+ states.append(
120
+ {
121
+ "partition": self._to_dict(partition_tuple),
122
+ "cursor": copy.deepcopy(cursor.state),
123
+ }
124
+ )
125
+ state[self._PERPARTITION_STATE_KEY] = states
126
+
127
+ if self._global_cursor:
128
+ state[self._GLOBAL_STATE_KEY] = self._global_cursor
129
+ if self._lookback_window is not None:
130
+ state["lookback_window"] = self._lookback_window
131
+ if self._parent_state is not None:
132
+ state["parent_state"] = self._parent_state
133
+ return state
134
+
135
+ def close_partition(self, partition: Partition) -> None:
136
+ # Attempt to retrieve the stream slice
137
+ stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
138
+
139
+ # Ensure stream_slice is not None
140
+ if stream_slice is None:
141
+ raise ValueError("stream_slice cannot be None")
142
+
143
+ partition_key = self._to_partition_key(stream_slice.partition)
144
+ self._cursor_per_partition[partition_key].close_partition(partition=partition)
145
+ with self._lock:
146
+ self._semaphore_per_partition[partition_key].acquire()
147
+ cursor = self._cursor_per_partition[partition_key]
148
+ if (
149
+ partition_key in self._finished_partitions
150
+ and self._semaphore_per_partition[partition_key]._value == 0
151
+ ):
152
+ if (
153
+ self._new_global_cursor is None
154
+ or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
+ < cursor.state[self.cursor_field.cursor_field_key]
156
+ ):
157
+ self._new_global_cursor = copy.deepcopy(cursor.state)
158
+ if not self._use_global_cursor:
159
+ self._emit_state_message()
160
+
161
+ def ensure_at_least_one_state_emitted(self) -> None:
162
+ """
163
+ The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
164
+ called.
165
+ """
166
+ if not any(
167
+ semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
168
+ ):
169
+ self._global_cursor = self._new_global_cursor
170
+ self._lookback_window = self._timer.finish()
171
+ self._parent_state = self._partition_router.get_stream_state()
172
+ self._emit_state_message()
173
+
174
+ def _emit_state_message(self) -> None:
175
+ self._connector_state_manager.update_state_for_stream(
176
+ self._stream_name,
177
+ self._stream_namespace,
178
+ self.state,
179
+ )
180
+ state_message = self._connector_state_manager.create_state_message(
181
+ self._stream_name, self._stream_namespace
182
+ )
183
+ self._message_repository.emit_message(state_message)
184
+
185
+ def stream_slices(self) -> Iterable[StreamSlice]:
186
+ if self._timer.is_running():
187
+ raise RuntimeError("stream_slices has been executed more than once.")
188
+
189
+ slices = self._partition_router.stream_slices()
190
+ self._timer.start()
191
+ for partition in slices:
192
+ yield from self._generate_slices_from_partition(partition)
193
+
194
+ def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
195
+ # Ensure the maximum number of partitions is not exceeded
196
+ self._ensure_partition_limit()
197
+
198
+ cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
199
+ if not cursor:
200
+ cursor = self._create_cursor(
201
+ self._global_cursor,
202
+ self._lookback_window if self._global_cursor else 0,
203
+ )
204
+ with self._lock:
205
+ self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
206
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
207
+ threading.Semaphore(0)
208
+ )
209
+
210
+ for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
211
+ cursor.stream_slices(),
212
+ lambda: None,
213
+ ):
214
+ self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
215
+ if is_last_slice:
216
+ self._finished_partitions.add(self._to_partition_key(partition.partition))
217
+ yield StreamSlice(
218
+ partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
219
+ )
220
+
221
+ def _ensure_partition_limit(self) -> None:
222
+ """
223
+ Ensure the maximum number of partitions does not exceed the predefined limit.
224
+
225
+ Steps:
226
+ 1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
227
+ These partitions are considered processed and safe to delete.
228
+ 2. If the limit is still exceeded and no finished partitions are available for removal,
229
+ remove the oldest partition unconditionally. We expect failed partitions to be removed.
230
+
231
+ Logging:
232
+ - Logs a warning each time a partition is removed, indicating whether it was finished
233
+ or removed due to being the oldest.
234
+ """
235
+ with self._lock:
236
+ while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
+ self._over_limit += 1
238
+ # Try removing finished partitions first
239
+ for partition_key in list(self._cursor_per_partition.keys()):
240
+ if (
241
+ partition_key in self._finished_partitions
242
+ and self._semaphore_per_partition[partition_key]._value == 0
243
+ ):
244
+ oldest_partition = self._cursor_per_partition.pop(
245
+ partition_key
246
+ ) # Remove the oldest partition
247
+ logger.warning(
248
+ f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
249
+ )
250
+ break
251
+ else:
252
+ # If no finished partitions can be removed, fall back to removing the oldest partition
253
+ oldest_partition = self._cursor_per_partition.popitem(last=False)[
254
+ 1
255
+ ] # Remove the oldest partition
256
+ logger.warning(
257
+ f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
258
+ )
259
+
260
+ def _set_initial_state(self, stream_state: StreamState) -> None:
261
+ """
262
+ Initialize the cursor's state using the provided `stream_state`.
263
+
264
+ This method supports global and per-partition state initialization.
265
+
266
+ - **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
267
+ The `global state` holds a single cursor position representing the latest processed record across all partitions.
268
+
269
+ - **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
270
+ This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
271
+
272
+ - **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
273
+
274
+ - **Parent State**: (if available) Used to initialize partition routers based on parent streams.
275
+
276
+ Args:
277
+ stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
278
+ {
279
+ "states": [
280
+ {
281
+ "partition": {
282
+ "partition_key": "value"
283
+ },
284
+ "cursor": {
285
+ "last_updated": "2023-05-27T00:00:00Z"
286
+ }
287
+ }
288
+ ],
289
+ "state": {
290
+ "last_updated": "2023-05-27T00:00:00Z"
291
+ },
292
+ lookback_window: 10,
293
+ "parent_state": {
294
+ "parent_stream_name": {
295
+ "last_updated": "2023-05-27T00:00:00Z"
296
+ }
297
+ }
298
+ }
299
+ """
300
+ if not stream_state:
301
+ return
302
+
303
+ if (
304
+ self._PERPARTITION_STATE_KEY not in stream_state
305
+ and self._GLOBAL_STATE_KEY not in stream_state
306
+ ):
307
+ # We assume that `stream_state` is in a global format that can be applied to all partitions.
308
+ # Example: {"global_state_format_key": "global_state_format_value"}
309
+ self._set_global_state(stream_state)
310
+
311
+ else:
312
+ self._use_global_cursor = stream_state.get("use_global_cursor", False)
313
+
314
+ self._lookback_window = int(stream_state.get("lookback_window", 0))
315
+
316
+ for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
317
+ self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
318
+ self._create_cursor(state["cursor"])
319
+ )
320
+ self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
321
+ threading.Semaphore(0)
322
+ )
323
+
324
+ # set default state for missing partitions if it is per partition with fallback to global
325
+ if self._GLOBAL_STATE_KEY in stream_state:
326
+ self._set_global_state(stream_state[self._GLOBAL_STATE_KEY])
327
+
328
+ # Set initial parent state
329
+ if stream_state.get("parent_state"):
330
+ self._parent_state = stream_state["parent_state"]
331
+
332
+ # Set parent state for partition routers based on parent streams
333
+ self._partition_router.set_initial_state(stream_state)
334
+
335
+ def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
336
+ """
337
+ Initializes the global cursor state from the provided stream state.
338
+
339
+ If the cursor field key is present in the stream state, its value is parsed,
340
+ formatted, and stored as the global cursor. This ensures consistency in state
341
+ representation across partitions.
342
+ """
343
+ if self.cursor_field.cursor_field_key in stream_state:
344
+ global_state_value = stream_state[self.cursor_field.cursor_field_key]
345
+ final_format_global_state_value = self._connector_state_converter.output_format(
346
+ self._connector_state_converter.parse_value(global_state_value)
347
+ )
348
+
349
+ fixed_global_state = {
350
+ self.cursor_field.cursor_field_key: final_format_global_state_value
351
+ }
352
+
353
+ self._global_cursor = deepcopy(fixed_global_state)
354
+ self._new_global_cursor = deepcopy(fixed_global_state)
355
+
356
+ def observe(self, record: Record) -> None:
357
+ if not self._use_global_cursor and self.limit_reached():
358
+ self._use_global_cursor = True
359
+
360
+ if not record.associated_slice:
361
+ raise ValueError(
362
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
363
+ )
364
+ self._cursor_per_partition[
365
+ self._to_partition_key(record.associated_slice.partition)
366
+ ].observe(record)
367
+
368
+ def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
369
+ return self._partition_serializer.to_partition_key(partition)
370
+
371
+ def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
372
+ return self._partition_serializer.to_partition(partition_key)
373
+
374
+ def _create_cursor(
375
+ self, cursor_state: Any, runtime_lookback_window: int = 0
376
+ ) -> ConcurrentCursor:
377
+ cursor = self._cursor_factory.create(
378
+ stream_state=deepcopy(cursor_state),
379
+ runtime_lookback_window=timedelta(seconds=runtime_lookback_window),
380
+ )
381
+ return cursor
382
+
383
+ def should_be_synced(self, record: Record) -> bool:
384
+ return self._get_cursor(record).should_be_synced(record)
385
+
386
+ def _get_cursor(self, record: Record) -> ConcurrentCursor:
387
+ if not record.associated_slice:
388
+ raise ValueError(
389
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
390
+ )
391
+ partition_key = self._to_partition_key(record.associated_slice.partition)
392
+ if partition_key not in self._cursor_per_partition:
393
+ raise ValueError(
394
+ "Invalid state as stream slices that are emitted should refer to an existing cursor"
395
+ )
396
+ cursor = self._cursor_per_partition[partition_key]
397
+ return cursor
398
+
399
+ def limit_reached(self) -> bool:
400
+ return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
@@ -64,6 +64,9 @@ class Timer:
64
64
  else:
65
65
  raise RuntimeError("Global substream cursor timer not started")
66
66
 
67
+ def is_running(self) -> bool:
68
+ return self._start is not None
69
+
67
70
 
68
71
  class GlobalSubstreamCursor(DeclarativeCursor):
69
72
  """
@@ -222,6 +222,8 @@ class PerPartitionCursor(DeclarativeCursor):
222
222
  next_page_token: Optional[Mapping[str, Any]] = None,
223
223
  ) -> Mapping[str, Any]:
224
224
  if stream_slice:
225
+ if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
226
+ self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
225
227
  return self._partition_router.get_request_params( # type: ignore # this always returns a mapping
226
228
  stream_state=stream_state,
227
229
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -244,6 +246,8 @@ class PerPartitionCursor(DeclarativeCursor):
244
246
  next_page_token: Optional[Mapping[str, Any]] = None,
245
247
  ) -> Mapping[str, Any]:
246
248
  if stream_slice:
249
+ if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
250
+ self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
247
251
  return self._partition_router.get_request_headers( # type: ignore # this always returns a mapping
248
252
  stream_state=stream_state,
249
253
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -266,6 +270,8 @@ class PerPartitionCursor(DeclarativeCursor):
266
270
  next_page_token: Optional[Mapping[str, Any]] = None,
267
271
  ) -> Union[Mapping[str, Any], str]:
268
272
  if stream_slice:
273
+ if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
274
+ self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
269
275
  return self._partition_router.get_request_body_data( # type: ignore # this always returns a mapping
270
276
  stream_state=stream_state,
271
277
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -288,6 +294,8 @@ class PerPartitionCursor(DeclarativeCursor):
288
294
  next_page_token: Optional[Mapping[str, Any]] = None,
289
295
  ) -> Mapping[str, Any]:
290
296
  if stream_slice:
297
+ if self._to_partition_key(stream_slice.partition) not in self._cursor_per_partition:
298
+ self._create_cursor_for_partition(self._to_partition_key(stream_slice.partition))
291
299
  return self._partition_router.get_request_body_json( # type: ignore # this always returns a mapping
292
300
  stream_state=stream_state,
293
301
  stream_slice=StreamSlice(partition=stream_slice.partition, cursor_slice={}),
@@ -341,8 +349,32 @@ class PerPartitionCursor(DeclarativeCursor):
341
349
  )
342
350
  partition_key = self._to_partition_key(record.associated_slice.partition)
343
351
  if partition_key not in self._cursor_per_partition:
344
- raise ValueError(
345
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
346
- )
352
+ self._create_cursor_for_partition(partition_key)
347
353
  cursor = self._cursor_per_partition[partition_key]
348
354
  return cursor
355
+
356
+ def _create_cursor_for_partition(self, partition_key: str) -> None:
357
+ """
358
+ Dynamically creates and initializes a cursor for the specified partition.
359
+
360
+ This method is required for `ConcurrentPerPartitionCursor`. For concurrent cursors,
361
+ stream_slices is executed only for the concurrent cursor, so cursors per partition
362
+ are not created for the declarative cursor. This method ensures that a cursor is available
363
+ to create requests for the specified partition. The cursor is initialized
364
+ with the per-partition state if present in the initial state, or with the global state
365
+ adjusted by the lookback window, or with the state to migrate from.
366
+
367
+ Note:
368
+ This is a temporary workaround and should be removed once the declarative cursor
369
+ is decoupled from the concurrent cursor implementation.
370
+
371
+ Args:
372
+ partition_key (str): The unique identifier for the partition for which the cursor
373
+ needs to be created.
374
+ """
375
+ partition_state = (
376
+ self._state_to_migrate_from if self._state_to_migrate_from else self._NO_CURSOR_STATE
377
+ )
378
+ cursor = self._create_cursor(partition_state)
379
+
380
+ self._cursor_per_partition[partition_key] = cursor
@@ -7,6 +7,7 @@ import logging
7
7
  import pkgutil
8
8
  from copy import deepcopy
9
9
  from importlib import metadata
10
+ from types import ModuleType
10
11
  from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
11
12
 
12
13
  import yaml
@@ -25,13 +26,13 @@ from airbyte_cdk.models import (
25
26
  from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
26
27
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
27
28
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
28
- from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
- CheckStream as CheckStreamModel,
30
- )
31
29
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
32
30
  DeclarativeStream as DeclarativeStreamModel,
33
31
  )
34
32
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import Spec as SpecModel
33
+ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
34
+ get_registered_components_module,
35
+ )
35
36
  from airbyte_cdk.sources.declarative.parsers.manifest_component_transformer import (
36
37
  ManifestComponentTransformer,
37
38
  )
@@ -59,22 +60,29 @@ class ManifestDeclarativeSource(DeclarativeSource):
59
60
  def __init__(
60
61
  self,
61
62
  source_config: ConnectionDefinition,
63
+ *,
64
+ config: Mapping[str, Any] | None = None,
62
65
  debug: bool = False,
63
66
  emit_connector_builder_messages: bool = False,
64
67
  component_factory: Optional[ModelToComponentFactory] = None,
65
68
  ):
66
69
  """
67
- :param source_config(Mapping[str, Any]): The manifest of low-code components that describe the source connector
68
- :param debug(bool): True if debug mode is enabled
69
- :param component_factory(ModelToComponentFactory): optional factory if ModelToComponentFactory's default behaviour needs to be tweaked
70
+ Args:
71
+ config: The provided config dict.
72
+ source_config: The manifest of low-code components that describe the source connector.
73
+ debug: True if debug mode is enabled.
74
+ emit_connector_builder_messages: True if messages should be emitted to the connector builder.
75
+ component_factory: optional factory if ModelToComponentFactory's default behavior needs to be tweaked.
70
76
  """
71
77
  self.logger = logging.getLogger(f"airbyte.{self.name}")
72
-
73
78
  # For ease of use we don't require the type to be specified at the top level manifest, but it should be included during processing
74
79
  manifest = dict(source_config)
75
80
  if "type" not in manifest:
76
81
  manifest["type"] = "DeclarativeSource"
77
82
 
83
+ # If custom components are needed, locate and/or register them.
84
+ self.components_module: ModuleType | None = get_registered_components_module(config=config)
85
+
78
86
  resolved_source_config = ManifestReferenceResolver().preprocess_manifest(manifest)
79
87
  propagated_source_config = ManifestComponentTransformer().propagate_types_and_parameters(
80
88
  "", resolved_source_config, {}
@@ -357,6 +365,11 @@ class ManifestDeclarativeSource(DeclarativeSource):
357
365
  # Ensure that each stream is created with a unique name
358
366
  name = dynamic_stream.get("name")
359
367
 
368
+ if not isinstance(name, str):
369
+ raise ValueError(
370
+ f"Expected stream name {name} to be a string, got {type(name)}."
371
+ )
372
+
360
373
  if name in seen_dynamic_streams:
361
374
  error_message = f"Dynamic streams list contains a duplicate name: {name}. Please contact Airbyte Support."
362
375
  failure_type = FailureType.system_error
@@ -59,6 +59,11 @@ class CheckDynamicStream(BaseModel):
59
59
  description="Numbers of the streams to try reading from when running a check operation.",
60
60
  title="Stream Count",
61
61
  )
62
+ use_check_availability: Optional[bool] = Field(
63
+ True,
64
+ description="Enables stream check availability. This field is automatically set by the CDK.",
65
+ title="Use Check Availability",
66
+ )
62
67
 
63
68
 
64
69
  class ConcurrencyLevel(BaseModel):
@@ -328,6 +333,16 @@ class LegacyToPerPartitionStateMigration(BaseModel):
328
333
  type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None
329
334
 
330
335
 
336
+ class Clamping(BaseModel):
337
+ target: str = Field(
338
+ ...,
339
+ description="The period of time that datetime windows will be clamped by",
340
+ examples=["DAY", "WEEK", "MONTH", "{{ config['target'] }}"],
341
+ title="Target",
342
+ )
343
+ target_details: Optional[Dict[str, Any]] = None
344
+
345
+
331
346
  class Algorithm(Enum):
332
347
  HS256 = "HS256"
333
348
  HS384 = "HS384"
@@ -496,8 +511,8 @@ class OAuthAuthenticator(BaseModel):
496
511
  examples=["custom_app_id"],
497
512
  title="Client ID Property Name",
498
513
  )
499
- client_id: str = Field(
500
- ...,
514
+ client_id: Optional[str] = Field(
515
+ None,
501
516
  description="The OAuth client ID. Fill it in the user inputs.",
502
517
  examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
503
518
  title="Client ID",
@@ -508,8 +523,8 @@ class OAuthAuthenticator(BaseModel):
508
523
  examples=["custom_app_secret"],
509
524
  title="Client Secret Property Name",
510
525
  )
511
- client_secret: str = Field(
512
- ...,
526
+ client_secret: Optional[str] = Field(
527
+ None,
513
528
  description="The OAuth client secret. Fill it in the user inputs.",
514
529
  examples=[
515
530
  "{{ config['client_secret }}",
@@ -614,6 +629,16 @@ class OAuthAuthenticator(BaseModel):
614
629
  description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.",
615
630
  title="Token Updater",
616
631
  )
632
+ profile_assertion: Optional[JwtAuthenticator] = Field(
633
+ None,
634
+ description="The authenticator being used to authenticate the client authenticator.",
635
+ title="Profile Assertion",
636
+ )
637
+ use_profile_assertion: Optional[bool] = Field(
638
+ False,
639
+ description="Enable using profile assertion as a flow for OAuth authorization.",
640
+ title="Use Profile Assertion",
641
+ )
617
642
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
618
643
 
619
644
 
@@ -716,8 +741,13 @@ class HttpResponseFilter(BaseModel):
716
741
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
717
742
 
718
743
 
744
+ class ComplexFieldType(BaseModel):
745
+ field_type: str
746
+ items: Optional[Union[str, ComplexFieldType]] = None
747
+
748
+
719
749
  class TypesMap(BaseModel):
720
- target_type: Union[str, List[str]]
750
+ target_type: Union[str, List[str], ComplexFieldType]
721
751
  current_type: Union[str, List[str]]
722
752
  condition: Optional[str] = None
723
753
 
@@ -1457,6 +1487,11 @@ class AuthFlow(BaseModel):
1457
1487
 
1458
1488
  class DatetimeBasedCursor(BaseModel):
1459
1489
  type: Literal["DatetimeBasedCursor"]
1490
+ clamping: Optional[Clamping] = Field(
1491
+ None,
1492
+ description="This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)",
1493
+ title="Date Range Clamping",
1494
+ )
1460
1495
  cursor_field: str = Field(
1461
1496
  ...,
1462
1497
  description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
@@ -2241,6 +2276,7 @@ class DynamicDeclarativeStream(BaseModel):
2241
2276
  )
2242
2277
 
2243
2278
 
2279
+ ComplexFieldType.update_forward_refs()
2244
2280
  CompositeErrorHandler.update_forward_refs()
2245
2281
  DeclarativeSource1.update_forward_refs()
2246
2282
  DeclarativeSource2.update_forward_refs()