airbyte-cdk 6.31.1.dev0__py3-none-any.whl → 6.31.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +3 -9
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +2 -3
  3. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +4 -4
  4. airbyte_cdk/sources/declarative/auth/jwt.py +11 -17
  5. airbyte_cdk/sources/declarative/auth/oauth.py +23 -89
  6. airbyte_cdk/sources/declarative/auth/token.py +3 -8
  7. airbyte_cdk/sources/declarative/auth/token_provider.py +5 -4
  8. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +9 -19
  9. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +43 -134
  10. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -55
  11. airbyte_cdk/sources/declarative/declarative_stream.py +1 -3
  12. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  13. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  14. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +7 -6
  15. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  16. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +3 -35
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +7 -15
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +15 -45
  19. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +64 -343
  20. airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
  21. airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
  22. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +15 -55
  23. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -22
  24. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
  25. airbyte_cdk/sources/declarative/requesters/http_requester.py +5 -1
  26. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +6 -5
  27. airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
  28. airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
  29. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -6
  30. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +5 -2
  31. airbyte_cdk/sources/declarative/schema/__init__.py +0 -2
  32. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +5 -44
  33. airbyte_cdk/sources/http_logger.py +1 -1
  34. airbyte_cdk/sources/streams/concurrent/cursor.py +57 -51
  35. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +13 -22
  36. airbyte_cdk/sources/streams/core.py +6 -6
  37. airbyte_cdk/sources/streams/http/http.py +2 -1
  38. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +6 -17
  39. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +31 -43
  40. airbyte_cdk/sources/types.py +2 -4
  41. airbyte_cdk/sources/utils/transform.py +2 -23
  42. airbyte_cdk/test/utils/manifest_only_fixtures.py +2 -1
  43. airbyte_cdk/utils/mapping_helpers.py +86 -27
  44. airbyte_cdk/utils/slice_hasher.py +1 -8
  45. {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/METADATA +6 -6
  46. {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/RECORD +49 -55
  47. {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/WHEEL +1 -1
  48. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -400
  49. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +0 -143
  50. airbyte_cdk/sources/streams/concurrent/clamping.py +0 -99
  51. airbyte_cdk/sources/streams/concurrent/cursor_types.py +0 -32
  52. airbyte_cdk/utils/datetime_helpers.py +0 -499
  53. airbyte_cdk-6.31.1.dev0.dist-info/LICENSE_SHORT +0 -1
  54. {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/LICENSE.txt +0 -0
  55. {airbyte_cdk-6.31.1.dev0.dist-info → airbyte_cdk-6.31.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -1,400 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- import copy
6
- import logging
7
- import threading
8
- from collections import OrderedDict
9
- from copy import deepcopy
10
- from datetime import timedelta
11
- from typing import Any, Callable, Iterable, Mapping, MutableMapping, Optional
12
-
13
- from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
14
- from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
15
- Timer,
16
- iterate_with_last_flag_and_state,
17
- )
18
- from airbyte_cdk.sources.declarative.partition_routers.partition_router import PartitionRouter
19
- from airbyte_cdk.sources.message import MessageRepository
20
- from airbyte_cdk.sources.streams.checkpoint.per_partition_key_serializer import (
21
- PerPartitionKeySerializer,
22
- )
23
- from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, Cursor, CursorField
24
- from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
25
- from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
26
- AbstractStreamStateConverter,
27
- )
28
- from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
29
-
30
- logger = logging.getLogger("airbyte")
31
-
32
-
33
- class ConcurrentCursorFactory:
34
- def __init__(self, create_function: Callable[..., ConcurrentCursor]):
35
- self._create_function = create_function
36
-
37
- def create(
38
- self, stream_state: Mapping[str, Any], runtime_lookback_window: Optional[timedelta]
39
- ) -> ConcurrentCursor:
40
- return self._create_function(
41
- stream_state=stream_state, runtime_lookback_window=runtime_lookback_window
42
- )
43
-
44
-
45
- class ConcurrentPerPartitionCursor(Cursor):
46
- """
47
- Manages state per partition when a stream has many partitions, preventing data loss or duplication.
48
-
49
- Attributes:
50
- DEFAULT_MAX_PARTITIONS_NUMBER (int): Maximum number of partitions to retain in memory (default is 10,000).
51
-
52
- - **Partition Limitation Logic**
53
- Ensures the number of tracked partitions does not exceed the specified limit to prevent memory overuse. Oldest partitions are removed when the limit is reached.
54
-
55
- - **Global Cursor Fallback**
56
- New partitions use global state as the initial state to progress the state for deleted or new partitions. The history data added after the initial sync will be missing.
57
-
58
- CurrentPerPartitionCursor expects the state of the ConcurrentCursor to follow the format {cursor_field: cursor_value}.
59
- """
60
-
61
- DEFAULT_MAX_PARTITIONS_NUMBER = 10000
62
- _NO_STATE: Mapping[str, Any] = {}
63
- _NO_CURSOR_STATE: Mapping[str, Any] = {}
64
- _GLOBAL_STATE_KEY = "state"
65
- _PERPARTITION_STATE_KEY = "states"
66
- _KEY = 0
67
- _VALUE = 1
68
-
69
- def __init__(
70
- self,
71
- cursor_factory: ConcurrentCursorFactory,
72
- partition_router: PartitionRouter,
73
- stream_name: str,
74
- stream_namespace: Optional[str],
75
- stream_state: Any,
76
- message_repository: MessageRepository,
77
- connector_state_manager: ConnectorStateManager,
78
- connector_state_converter: AbstractStreamStateConverter,
79
- cursor_field: CursorField,
80
- ) -> None:
81
- self._global_cursor: Optional[StreamState] = {}
82
- self._stream_name = stream_name
83
- self._stream_namespace = stream_namespace
84
- self._message_repository = message_repository
85
- self._connector_state_manager = connector_state_manager
86
- self._connector_state_converter = connector_state_converter
87
- self._cursor_field = cursor_field
88
-
89
- self._cursor_factory = cursor_factory
90
- self._partition_router = partition_router
91
-
92
- # The dict is ordered to ensure that once the maximum number of partitions is reached,
93
- # the oldest partitions can be efficiently removed, maintaining the most recent partitions.
94
- self._cursor_per_partition: OrderedDict[str, ConcurrentCursor] = OrderedDict()
95
- self._semaphore_per_partition: OrderedDict[str, threading.Semaphore] = OrderedDict()
96
- self._finished_partitions: set[str] = set()
97
- self._lock = threading.Lock()
98
- self._timer = Timer()
99
- self._new_global_cursor: Optional[StreamState] = None
100
- self._lookback_window: int = 0
101
- self._parent_state: Optional[StreamState] = None
102
- self._over_limit: int = 0
103
- self._use_global_cursor: bool = False
104
- self._partition_serializer = PerPartitionKeySerializer()
105
-
106
- self._set_initial_state(stream_state)
107
-
108
- @property
109
- def cursor_field(self) -> CursorField:
110
- return self._cursor_field
111
-
112
- @property
113
- def state(self) -> MutableMapping[str, Any]:
114
- state: dict[str, Any] = {"use_global_cursor": self._use_global_cursor}
115
- if not self._use_global_cursor:
116
- states = []
117
- for partition_tuple, cursor in self._cursor_per_partition.items():
118
- if cursor.state:
119
- states.append(
120
- {
121
- "partition": self._to_dict(partition_tuple),
122
- "cursor": copy.deepcopy(cursor.state),
123
- }
124
- )
125
- state[self._PERPARTITION_STATE_KEY] = states
126
-
127
- if self._global_cursor:
128
- state[self._GLOBAL_STATE_KEY] = self._global_cursor
129
- if self._lookback_window is not None:
130
- state["lookback_window"] = self._lookback_window
131
- if self._parent_state is not None:
132
- state["parent_state"] = self._parent_state
133
- return state
134
-
135
- def close_partition(self, partition: Partition) -> None:
136
- # Attempt to retrieve the stream slice
137
- stream_slice: Optional[StreamSlice] = partition.to_slice() # type: ignore[assignment]
138
-
139
- # Ensure stream_slice is not None
140
- if stream_slice is None:
141
- raise ValueError("stream_slice cannot be None")
142
-
143
- partition_key = self._to_partition_key(stream_slice.partition)
144
- self._cursor_per_partition[partition_key].close_partition(partition=partition)
145
- with self._lock:
146
- self._semaphore_per_partition[partition_key].acquire()
147
- cursor = self._cursor_per_partition[partition_key]
148
- if (
149
- partition_key in self._finished_partitions
150
- and self._semaphore_per_partition[partition_key]._value == 0
151
- ):
152
- if (
153
- self._new_global_cursor is None
154
- or self._new_global_cursor[self.cursor_field.cursor_field_key]
155
- < cursor.state[self.cursor_field.cursor_field_key]
156
- ):
157
- self._new_global_cursor = copy.deepcopy(cursor.state)
158
- if not self._use_global_cursor:
159
- self._emit_state_message()
160
-
161
- def ensure_at_least_one_state_emitted(self) -> None:
162
- """
163
- The platform expect to have at least one state message on successful syncs. Hence, whatever happens, we expect this method to be
164
- called.
165
- """
166
- if not any(
167
- semaphore_item[1]._value for semaphore_item in self._semaphore_per_partition.items()
168
- ):
169
- self._global_cursor = self._new_global_cursor
170
- self._lookback_window = self._timer.finish()
171
- self._parent_state = self._partition_router.get_stream_state()
172
- self._emit_state_message()
173
-
174
- def _emit_state_message(self) -> None:
175
- self._connector_state_manager.update_state_for_stream(
176
- self._stream_name,
177
- self._stream_namespace,
178
- self.state,
179
- )
180
- state_message = self._connector_state_manager.create_state_message(
181
- self._stream_name, self._stream_namespace
182
- )
183
- self._message_repository.emit_message(state_message)
184
-
185
- def stream_slices(self) -> Iterable[StreamSlice]:
186
- if self._timer.is_running():
187
- raise RuntimeError("stream_slices has been executed more than once.")
188
-
189
- slices = self._partition_router.stream_slices()
190
- self._timer.start()
191
- for partition in slices:
192
- yield from self._generate_slices_from_partition(partition)
193
-
194
- def _generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
195
- # Ensure the maximum number of partitions is not exceeded
196
- self._ensure_partition_limit()
197
-
198
- cursor = self._cursor_per_partition.get(self._to_partition_key(partition.partition))
199
- if not cursor:
200
- cursor = self._create_cursor(
201
- self._global_cursor,
202
- self._lookback_window if self._global_cursor else 0,
203
- )
204
- with self._lock:
205
- self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
206
- self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
207
- threading.Semaphore(0)
208
- )
209
-
210
- for cursor_slice, is_last_slice, _ in iterate_with_last_flag_and_state(
211
- cursor.stream_slices(),
212
- lambda: None,
213
- ):
214
- self._semaphore_per_partition[self._to_partition_key(partition.partition)].release()
215
- if is_last_slice:
216
- self._finished_partitions.add(self._to_partition_key(partition.partition))
217
- yield StreamSlice(
218
- partition=partition, cursor_slice=cursor_slice, extra_fields=partition.extra_fields
219
- )
220
-
221
- def _ensure_partition_limit(self) -> None:
222
- """
223
- Ensure the maximum number of partitions does not exceed the predefined limit.
224
-
225
- Steps:
226
- 1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
227
- These partitions are considered processed and safe to delete.
228
- 2. If the limit is still exceeded and no finished partitions are available for removal,
229
- remove the oldest partition unconditionally. We expect failed partitions to be removed.
230
-
231
- Logging:
232
- - Logs a warning each time a partition is removed, indicating whether it was finished
233
- or removed due to being the oldest.
234
- """
235
- with self._lock:
236
- while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
237
- self._over_limit += 1
238
- # Try removing finished partitions first
239
- for partition_key in list(self._cursor_per_partition.keys()):
240
- if (
241
- partition_key in self._finished_partitions
242
- and self._semaphore_per_partition[partition_key]._value == 0
243
- ):
244
- oldest_partition = self._cursor_per_partition.pop(
245
- partition_key
246
- ) # Remove the oldest partition
247
- logger.warning(
248
- f"The maximum number of partitions has been reached. Dropping the oldest finished partition: {oldest_partition}. Over limit: {self._over_limit}."
249
- )
250
- break
251
- else:
252
- # If no finished partitions can be removed, fall back to removing the oldest partition
253
- oldest_partition = self._cursor_per_partition.popitem(last=False)[
254
- 1
255
- ] # Remove the oldest partition
256
- logger.warning(
257
- f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
258
- )
259
-
260
- def _set_initial_state(self, stream_state: StreamState) -> None:
261
- """
262
- Initialize the cursor's state using the provided `stream_state`.
263
-
264
- This method supports global and per-partition state initialization.
265
-
266
- - **Global State**: If `states` is missing, the `state` is treated as global and applied to all partitions.
267
- The `global state` holds a single cursor position representing the latest processed record across all partitions.
268
-
269
- - **Lookback Window**: Configured via `lookback_window`, it defines the period (in seconds) for reprocessing records.
270
- This ensures robustness in case of upstream data delays or reordering. If not specified, it defaults to 0.
271
-
272
- - **Per-Partition State**: If `states` is present, each partition's cursor state is initialized separately.
273
-
274
- - **Parent State**: (if available) Used to initialize partition routers based on parent streams.
275
-
276
- Args:
277
- stream_state (StreamState): The state of the streams to be set. The format of the stream state should be:
278
- {
279
- "states": [
280
- {
281
- "partition": {
282
- "partition_key": "value"
283
- },
284
- "cursor": {
285
- "last_updated": "2023-05-27T00:00:00Z"
286
- }
287
- }
288
- ],
289
- "state": {
290
- "last_updated": "2023-05-27T00:00:00Z"
291
- },
292
- lookback_window: 10,
293
- "parent_state": {
294
- "parent_stream_name": {
295
- "last_updated": "2023-05-27T00:00:00Z"
296
- }
297
- }
298
- }
299
- """
300
- if not stream_state:
301
- return
302
-
303
- if (
304
- self._PERPARTITION_STATE_KEY not in stream_state
305
- and self._GLOBAL_STATE_KEY not in stream_state
306
- ):
307
- # We assume that `stream_state` is in a global format that can be applied to all partitions.
308
- # Example: {"global_state_format_key": "global_state_format_value"}
309
- self._set_global_state(stream_state)
310
-
311
- else:
312
- self._use_global_cursor = stream_state.get("use_global_cursor", False)
313
-
314
- self._lookback_window = int(stream_state.get("lookback_window", 0))
315
-
316
- for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
317
- self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
318
- self._create_cursor(state["cursor"])
319
- )
320
- self._semaphore_per_partition[self._to_partition_key(state["partition"])] = (
321
- threading.Semaphore(0)
322
- )
323
-
324
- # set default state for missing partitions if it is per partition with fallback to global
325
- if self._GLOBAL_STATE_KEY in stream_state:
326
- self._set_global_state(stream_state[self._GLOBAL_STATE_KEY])
327
-
328
- # Set initial parent state
329
- if stream_state.get("parent_state"):
330
- self._parent_state = stream_state["parent_state"]
331
-
332
- # Set parent state for partition routers based on parent streams
333
- self._partition_router.set_initial_state(stream_state)
334
-
335
- def _set_global_state(self, stream_state: Mapping[str, Any]) -> None:
336
- """
337
- Initializes the global cursor state from the provided stream state.
338
-
339
- If the cursor field key is present in the stream state, its value is parsed,
340
- formatted, and stored as the global cursor. This ensures consistency in state
341
- representation across partitions.
342
- """
343
- if self.cursor_field.cursor_field_key in stream_state:
344
- global_state_value = stream_state[self.cursor_field.cursor_field_key]
345
- final_format_global_state_value = self._connector_state_converter.output_format(
346
- self._connector_state_converter.parse_value(global_state_value)
347
- )
348
-
349
- fixed_global_state = {
350
- self.cursor_field.cursor_field_key: final_format_global_state_value
351
- }
352
-
353
- self._global_cursor = deepcopy(fixed_global_state)
354
- self._new_global_cursor = deepcopy(fixed_global_state)
355
-
356
- def observe(self, record: Record) -> None:
357
- if not self._use_global_cursor and self.limit_reached():
358
- self._use_global_cursor = True
359
-
360
- if not record.associated_slice:
361
- raise ValueError(
362
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
363
- )
364
- self._cursor_per_partition[
365
- self._to_partition_key(record.associated_slice.partition)
366
- ].observe(record)
367
-
368
- def _to_partition_key(self, partition: Mapping[str, Any]) -> str:
369
- return self._partition_serializer.to_partition_key(partition)
370
-
371
- def _to_dict(self, partition_key: str) -> Mapping[str, Any]:
372
- return self._partition_serializer.to_partition(partition_key)
373
-
374
- def _create_cursor(
375
- self, cursor_state: Any, runtime_lookback_window: int = 0
376
- ) -> ConcurrentCursor:
377
- cursor = self._cursor_factory.create(
378
- stream_state=deepcopy(cursor_state),
379
- runtime_lookback_window=timedelta(seconds=runtime_lookback_window),
380
- )
381
- return cursor
382
-
383
- def should_be_synced(self, record: Record) -> bool:
384
- return self._get_cursor(record).should_be_synced(record)
385
-
386
- def _get_cursor(self, record: Record) -> ConcurrentCursor:
387
- if not record.associated_slice:
388
- raise ValueError(
389
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
390
- )
391
- partition_key = self._to_partition_key(record.associated_slice.partition)
392
- if partition_key not in self._cursor_per_partition:
393
- raise ValueError(
394
- "Invalid state as stream slices that are emitted should refer to an existing cursor"
395
- )
396
- cursor = self._cursor_per_partition[partition_key]
397
- return cursor
398
-
399
- def limit_reached(self) -> bool:
400
- return self._over_limit > self.DEFAULT_MAX_PARTITIONS_NUMBER
@@ -1,143 +0,0 @@
1
- """Contains functions to compile custom code from text."""
2
-
3
- import hashlib
4
- import os
5
- import sys
6
- from collections.abc import Mapping
7
- from types import ModuleType
8
- from typing import Any, cast
9
-
10
- from typing_extensions import Literal
11
-
12
- ChecksumType = Literal["md5", "sha256"]
13
- CHECKSUM_FUNCTIONS = {
14
- "md5": hashlib.md5,
15
- "sha256": hashlib.sha256,
16
- }
17
- COMPONENTS_MODULE_NAME = "components"
18
- SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
19
- INJECTED_MANIFEST = "__injected_declarative_manifest"
20
- INJECTED_COMPONENTS_PY = "__injected_components_py"
21
- INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
22
- ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
23
-
24
-
25
- class AirbyteCodeTamperedError(Exception):
26
- """Raised when the connector's components module does not match its checksum.
27
-
28
- This is a fatal error, as it can be a sign of code tampering.
29
- """
30
-
31
-
32
- class AirbyteCustomCodeNotPermittedError(Exception):
33
- """Raised when custom code is attempted to be run in an environment that does not support it."""
34
-
35
- def __init__(self) -> None:
36
- super().__init__(
37
- "Custom connector code is not permitted in this environment. "
38
- "If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
39
- "environment variable to 'true' in your Airbyte environment. "
40
- "If you see this message in Airbyte Cloud, your workspace does not allow executing "
41
- "custom connector code."
42
- )
43
-
44
-
45
- def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46
- """Return the hash of the input text using the specified hash type."""
47
- if not input_text:
48
- raise ValueError("Input text cannot be empty.")
49
-
50
- hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51
- hash_object.update(input_text.encode())
52
- return hash_object.hexdigest()
53
-
54
-
55
- def custom_code_execution_permitted() -> bool:
56
- """Return `True` if custom code execution is permitted, otherwise `False`.
57
-
58
- Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
59
- """
60
- return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
61
-
62
-
63
- def validate_python_code(
64
- code_text: str,
65
- checksums: dict[str, str] | None,
66
- ) -> None:
67
- """Validate the provided Python code text against the provided checksums.
68
-
69
- Currently we fail if no checksums are provided, although this may change in the future.
70
- """
71
- if not checksums:
72
- raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
73
-
74
- for checksum_type, checksum in checksums.items():
75
- if checksum_type not in CHECKSUM_FUNCTIONS:
76
- raise ValueError(
77
- f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
78
- )
79
-
80
- if _hash_text(code_text, checksum_type) != checksum:
81
- raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
82
-
83
-
84
- def get_registered_components_module(
85
- config: Mapping[str, Any] | None,
86
- ) -> ModuleType | None:
87
- """Get a components module object based on the provided config.
88
-
89
- If custom python components is provided, this will be loaded. Otherwise, we will
90
- attempt to load from the `components` module already imported/registered in sys.modules.
91
-
92
- If custom `components.py` text is provided in config, it will be registered with sys.modules
93
- so that it can be later imported by manifest declarations which reference the provided classes.
94
-
95
- Returns `None` if no components is provided and the `components` module is not found.
96
- """
97
- if config and INJECTED_COMPONENTS_PY in config:
98
- if not custom_code_execution_permitted():
99
- raise AirbyteCustomCodeNotPermittedError
100
-
101
- # Create a new module object and execute the provided Python code text within it
102
- python_text: str = config[INJECTED_COMPONENTS_PY]
103
- return register_components_module_from_string(
104
- components_py_text=python_text,
105
- checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
106
- )
107
-
108
- # Check for `components` or `source_declarative_manifest.components`.
109
- if SDM_COMPONENTS_MODULE_NAME in sys.modules:
110
- return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
111
-
112
- if COMPONENTS_MODULE_NAME in sys.modules:
113
- return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
114
-
115
- # Could not find module 'components' in `sys.modules`
116
- # and INJECTED_COMPONENTS_PY was not provided in config.
117
- return None
118
-
119
-
120
- def register_components_module_from_string(
121
- components_py_text: str,
122
- checksums: dict[str, Any] | None,
123
- ) -> ModuleType:
124
- """Load and return the components module from a provided string containing the python code."""
125
- # First validate the code
126
- validate_python_code(
127
- code_text=components_py_text,
128
- checksums=checksums,
129
- )
130
-
131
- # Create a new module object
132
- components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
133
-
134
- # Execute the module text in the module's namespace
135
- exec(components_py_text, components_module.__dict__)
136
-
137
- # Register the module in `sys.modules`` so it can be imported as
138
- # `source_declarative_manifest.components` and/or `components`.
139
- sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
140
- sys.modules[COMPONENTS_MODULE_NAME] = components_module
141
-
142
- # Now you can import and use the module
143
- return components_module
@@ -1,99 +0,0 @@
1
- from abc import ABC
2
- from datetime import datetime, timedelta
3
- from enum import Enum
4
- from typing import Callable
5
-
6
- from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType
7
-
8
-
9
- class ClampingStrategy(ABC):
10
- def clamp(self, value: CursorValueType) -> CursorValueType:
11
- raise NotImplementedError()
12
-
13
-
14
- class NoClamping(ClampingStrategy):
15
- def clamp(self, value: CursorValueType) -> CursorValueType:
16
- return value
17
-
18
-
19
- class ClampingEndProvider:
20
- def __init__(
21
- self,
22
- clamping_strategy: ClampingStrategy,
23
- end_provider: Callable[[], CursorValueType],
24
- granularity: timedelta,
25
- ) -> None:
26
- self._clamping_strategy = clamping_strategy
27
- self._end_provider = end_provider
28
- self._granularity = granularity
29
-
30
- def __call__(self) -> CursorValueType:
31
- return self._clamping_strategy.clamp(self._end_provider()) - self._granularity
32
-
33
-
34
- class DayClampingStrategy(ClampingStrategy):
35
- def __init__(self, is_ceiling: bool = True) -> None:
36
- self._is_ceiling = is_ceiling
37
-
38
- def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
39
- return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
40
- if self._is_ceiling:
41
- return return_value + timedelta(days=1)
42
- return return_value
43
-
44
-
45
- class MonthClampingStrategy(ClampingStrategy):
46
- def __init__(self, is_ceiling: bool = True) -> None:
47
- self._is_ceiling = is_ceiling
48
-
49
- def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
50
- return_value = value.replace(hour=0, minute=0, second=0, microsecond=0)
51
- needs_to_round = value.day != 1
52
- if not needs_to_round:
53
- return return_value
54
-
55
- return self._ceil(return_value) if self._is_ceiling else return_value.replace(day=1)
56
-
57
- def _ceil(self, value: datetime) -> datetime:
58
- return value.replace(
59
- year=value.year + 1 if value.month == 12 else value.year,
60
- month=(value.month % 12) + 1,
61
- day=1,
62
- hour=0,
63
- minute=0,
64
- second=0,
65
- microsecond=0,
66
- )
67
-
68
-
69
- class Weekday(Enum):
70
- """
71
- These integer values map to the same ones used by the Datetime.date.weekday() implementation
72
- """
73
-
74
- MONDAY = 0
75
- TUESDAY = 1
76
- WEDNESDAY = 2
77
- THURSDAY = 3
78
- FRIDAY = 4
79
- SATURDAY = 5
80
- SUNDAY = 6
81
-
82
-
83
- class WeekClampingStrategy(ClampingStrategy):
84
- def __init__(self, day_of_week: Weekday, is_ceiling: bool = True) -> None:
85
- self._day_of_week = day_of_week.value
86
- self._is_ceiling = is_ceiling
87
-
88
- def clamp(self, value: datetime) -> datetime: # type: ignore # datetime implements method from CursorValueType
89
- days_diff_to_ceiling = (
90
- 7 - (value.weekday() - self._day_of_week)
91
- if value.weekday() > self._day_of_week
92
- else abs(value.weekday() - self._day_of_week)
93
- )
94
- delta = (
95
- timedelta(days_diff_to_ceiling)
96
- if self._is_ceiling
97
- else timedelta(days_diff_to_ceiling - 7)
98
- )
99
- return value.replace(hour=0, minute=0, second=0, microsecond=0) + delta
@@ -1,32 +0,0 @@
1
- from abc import abstractmethod
2
- from typing import Protocol
3
-
4
-
5
- class GapType(Protocol):
6
- """
7
- This is the representation of gaps between two cursor values. Examples:
8
- * if cursor values are datetimes, GapType is timedelta
9
- * if cursor values are integer, GapType will also be integer
10
- """
11
-
12
- pass
13
-
14
-
15
- class CursorValueType(Protocol):
16
- """Protocol for annotating comparable types."""
17
-
18
- @abstractmethod
19
- def __lt__(self: "CursorValueType", other: "CursorValueType") -> bool:
20
- pass
21
-
22
- @abstractmethod
23
- def __ge__(self: "CursorValueType", other: "CursorValueType") -> bool:
24
- pass
25
-
26
- @abstractmethod
27
- def __add__(self: "CursorValueType", other: GapType) -> "CursorValueType":
28
- pass
29
-
30
- @abstractmethod
31
- def __sub__(self: "CursorValueType", other: GapType) -> "CursorValueType":
32
- pass