airbyte-cdk 6.31.2.dev0__py3-none-any.whl → 6.32.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +9 -3
- airbyte_cdk/connector_builder/connector_builder_handler.py +3 -2
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +89 -23
- airbyte_cdk/sources/declarative/auth/token.py +8 -3
- airbyte_cdk/sources/declarative/auth/token_provider.py +4 -5
- airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +19 -9
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +134 -43
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +55 -16
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +400 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -7
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +20 -7
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +45 -15
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +343 -64
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +2 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +55 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +4 -4
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -5
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +5 -6
- airbyte_cdk/sources/declarative/requesters/request_option.py +4 -83
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +6 -7
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +2 -5
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +44 -5
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +22 -13
- airbyte_cdk/sources/streams/core.py +6 -6
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +231 -62
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +171 -88
- airbyte_cdk/sources/types.py +4 -2
- airbyte_cdk/sources/utils/transform.py +23 -2
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- airbyte_cdk/utils/datetime_helpers.py +499 -0
- airbyte_cdk/utils/mapping_helpers.py +27 -86
- airbyte_cdk/utils/slice_hasher.py +8 -1
- airbyte_cdk-6.32.0.dist-info/LICENSE_SHORT +1 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/METADATA +6 -6
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/RECORD +55 -49
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/WHEEL +1 -1
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.31.2.dev0.dist-info → airbyte_cdk-6.32.0.dist-info}/entry_points.txt +0 -0
@@ -59,6 +59,11 @@ class CheckDynamicStream(BaseModel):
|
|
59
59
|
description="Numbers of the streams to try reading from when running a check operation.",
|
60
60
|
title="Stream Count",
|
61
61
|
)
|
62
|
+
use_check_availability: Optional[bool] = Field(
|
63
|
+
True,
|
64
|
+
description="Enables stream check availability. This field is automatically set by the CDK.",
|
65
|
+
title="Use Check Availability",
|
66
|
+
)
|
62
67
|
|
63
68
|
|
64
69
|
class ConcurrencyLevel(BaseModel):
|
@@ -328,6 +333,16 @@ class LegacyToPerPartitionStateMigration(BaseModel):
|
|
328
333
|
type: Optional[Literal["LegacyToPerPartitionStateMigration"]] = None
|
329
334
|
|
330
335
|
|
336
|
+
class Clamping(BaseModel):
|
337
|
+
target: str = Field(
|
338
|
+
...,
|
339
|
+
description="The period of time that datetime windows will be clamped by",
|
340
|
+
examples=["DAY", "WEEK", "MONTH", "{{ config['target'] }}"],
|
341
|
+
title="Target",
|
342
|
+
)
|
343
|
+
target_details: Optional[Dict[str, Any]] = None
|
344
|
+
|
345
|
+
|
331
346
|
class Algorithm(Enum):
|
332
347
|
HS256 = "HS256"
|
333
348
|
HS384 = "HS384"
|
@@ -496,8 +511,8 @@ class OAuthAuthenticator(BaseModel):
|
|
496
511
|
examples=["custom_app_id"],
|
497
512
|
title="Client ID Property Name",
|
498
513
|
)
|
499
|
-
client_id: str = Field(
|
500
|
-
|
514
|
+
client_id: Optional[str] = Field(
|
515
|
+
None,
|
501
516
|
description="The OAuth client ID. Fill it in the user inputs.",
|
502
517
|
examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
|
503
518
|
title="Client ID",
|
@@ -508,8 +523,8 @@ class OAuthAuthenticator(BaseModel):
|
|
508
523
|
examples=["custom_app_secret"],
|
509
524
|
title="Client Secret Property Name",
|
510
525
|
)
|
511
|
-
client_secret: str = Field(
|
512
|
-
|
526
|
+
client_secret: Optional[str] = Field(
|
527
|
+
None,
|
513
528
|
description="The OAuth client secret. Fill it in the user inputs.",
|
514
529
|
examples=[
|
515
530
|
"{{ config['client_secret }}",
|
@@ -614,6 +629,16 @@ class OAuthAuthenticator(BaseModel):
|
|
614
629
|
description="When the token updater is defined, new refresh tokens, access tokens and the access token expiry date are written back from the authentication response to the config object. This is important if the refresh token can only used once.",
|
615
630
|
title="Token Updater",
|
616
631
|
)
|
632
|
+
profile_assertion: Optional[JwtAuthenticator] = Field(
|
633
|
+
None,
|
634
|
+
description="The authenticator being used to authenticate the client authenticator.",
|
635
|
+
title="Profile Assertion",
|
636
|
+
)
|
637
|
+
use_profile_assertion: Optional[bool] = Field(
|
638
|
+
False,
|
639
|
+
description="Enable using profile assertion as a flow for OAuth authorization.",
|
640
|
+
title="Use Profile Assertion",
|
641
|
+
)
|
617
642
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
618
643
|
|
619
644
|
|
@@ -716,8 +741,13 @@ class HttpResponseFilter(BaseModel):
|
|
716
741
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
717
742
|
|
718
743
|
|
744
|
+
class ComplexFieldType(BaseModel):
|
745
|
+
field_type: str
|
746
|
+
items: Optional[Union[str, ComplexFieldType]] = None
|
747
|
+
|
748
|
+
|
719
749
|
class TypesMap(BaseModel):
|
720
|
-
target_type: Union[str, List[str]]
|
750
|
+
target_type: Union[str, List[str], ComplexFieldType]
|
721
751
|
current_type: Union[str, List[str]]
|
722
752
|
condition: Optional[str] = None
|
723
753
|
|
@@ -1170,17 +1200,11 @@ class InjectInto(Enum):
|
|
1170
1200
|
|
1171
1201
|
class RequestOption(BaseModel):
|
1172
1202
|
type: Literal["RequestOption"]
|
1173
|
-
field_name:
|
1174
|
-
|
1175
|
-
description="Configures which key should be used in the location that the descriptor is being injected into
|
1203
|
+
field_name: str = Field(
|
1204
|
+
...,
|
1205
|
+
description="Configures which key should be used in the location that the descriptor is being injected into",
|
1176
1206
|
examples=["segment_id"],
|
1177
|
-
title="
|
1178
|
-
)
|
1179
|
-
field_path: Optional[List[str]] = Field(
|
1180
|
-
None,
|
1181
|
-
description="Configures a path to be used for nested structures in JSON body requests (e.g. GraphQL queries)",
|
1182
|
-
examples=[["data", "viewer", "id"]],
|
1183
|
-
title="Field Path",
|
1207
|
+
title="Request Option",
|
1184
1208
|
)
|
1185
1209
|
inject_into: InjectInto = Field(
|
1186
1210
|
...,
|
@@ -1457,6 +1481,11 @@ class AuthFlow(BaseModel):
|
|
1457
1481
|
|
1458
1482
|
class DatetimeBasedCursor(BaseModel):
|
1459
1483
|
type: Literal["DatetimeBasedCursor"]
|
1484
|
+
clamping: Optional[Clamping] = Field(
|
1485
|
+
None,
|
1486
|
+
description="This option is used to adjust the upper and lower boundaries of each datetime window to beginning and end of the provided target period (day, week, month)",
|
1487
|
+
title="Date Range Clamping",
|
1488
|
+
)
|
1460
1489
|
cursor_field: str = Field(
|
1461
1490
|
...,
|
1462
1491
|
description="The location of the value on a record that will be used as a bookmark during sync. To ensure no data loss, the API must return records in ascending order based on the cursor field. Nested fields are not supported, so the field must be at the top level of the record. You can use a combination of Add Field and Remove Field transformations to move the nested field to the top.",
|
@@ -2241,6 +2270,7 @@ class DynamicDeclarativeStream(BaseModel):
|
|
2241
2270
|
)
|
2242
2271
|
|
2243
2272
|
|
2273
|
+
ComplexFieldType.update_forward_refs()
|
2244
2274
|
CompositeErrorHandler.update_forward_refs()
|
2245
2275
|
DeclarativeSource1.update_forward_refs()
|
2246
2276
|
DeclarativeSource2.update_forward_refs()
|
@@ -0,0 +1,143 @@
|
|
1
|
+
"""Contains functions to compile custom code from text."""
|
2
|
+
|
3
|
+
import hashlib
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
from collections.abc import Mapping
|
7
|
+
from types import ModuleType
|
8
|
+
from typing import Any, cast
|
9
|
+
|
10
|
+
from typing_extensions import Literal
|
11
|
+
|
12
|
+
ChecksumType = Literal["md5", "sha256"]
|
13
|
+
CHECKSUM_FUNCTIONS = {
|
14
|
+
"md5": hashlib.md5,
|
15
|
+
"sha256": hashlib.sha256,
|
16
|
+
}
|
17
|
+
COMPONENTS_MODULE_NAME = "components"
|
18
|
+
SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
|
19
|
+
INJECTED_MANIFEST = "__injected_declarative_manifest"
|
20
|
+
INJECTED_COMPONENTS_PY = "__injected_components_py"
|
21
|
+
INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
|
22
|
+
ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
|
23
|
+
|
24
|
+
|
25
|
+
class AirbyteCodeTamperedError(Exception):
|
26
|
+
"""Raised when the connector's components module does not match its checksum.
|
27
|
+
|
28
|
+
This is a fatal error, as it can be a sign of code tampering.
|
29
|
+
"""
|
30
|
+
|
31
|
+
|
32
|
+
class AirbyteCustomCodeNotPermittedError(Exception):
|
33
|
+
"""Raised when custom code is attempted to be run in an environment that does not support it."""
|
34
|
+
|
35
|
+
def __init__(self) -> None:
|
36
|
+
super().__init__(
|
37
|
+
"Custom connector code is not permitted in this environment. "
|
38
|
+
"If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
|
39
|
+
"environment variable to 'true' in your Airbyte environment. "
|
40
|
+
"If you see this message in Airbyte Cloud, your workspace does not allow executing "
|
41
|
+
"custom connector code."
|
42
|
+
)
|
43
|
+
|
44
|
+
|
45
|
+
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
46
|
+
"""Return the hash of the input text using the specified hash type."""
|
47
|
+
if not input_text:
|
48
|
+
raise ValueError("Input text cannot be empty.")
|
49
|
+
|
50
|
+
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
51
|
+
hash_object.update(input_text.encode())
|
52
|
+
return hash_object.hexdigest()
|
53
|
+
|
54
|
+
|
55
|
+
def custom_code_execution_permitted() -> bool:
|
56
|
+
"""Return `True` if custom code execution is permitted, otherwise `False`.
|
57
|
+
|
58
|
+
Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
|
59
|
+
"""
|
60
|
+
return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
|
61
|
+
|
62
|
+
|
63
|
+
def validate_python_code(
|
64
|
+
code_text: str,
|
65
|
+
checksums: dict[str, str] | None,
|
66
|
+
) -> None:
|
67
|
+
"""Validate the provided Python code text against the provided checksums.
|
68
|
+
|
69
|
+
Currently we fail if no checksums are provided, although this may change in the future.
|
70
|
+
"""
|
71
|
+
if not checksums:
|
72
|
+
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
73
|
+
|
74
|
+
for checksum_type, checksum in checksums.items():
|
75
|
+
if checksum_type not in CHECKSUM_FUNCTIONS:
|
76
|
+
raise ValueError(
|
77
|
+
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
78
|
+
)
|
79
|
+
|
80
|
+
if _hash_text(code_text, checksum_type) != checksum:
|
81
|
+
raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
|
82
|
+
|
83
|
+
|
84
|
+
def get_registered_components_module(
|
85
|
+
config: Mapping[str, Any] | None,
|
86
|
+
) -> ModuleType | None:
|
87
|
+
"""Get a components module object based on the provided config.
|
88
|
+
|
89
|
+
If custom python components is provided, this will be loaded. Otherwise, we will
|
90
|
+
attempt to load from the `components` module already imported/registered in sys.modules.
|
91
|
+
|
92
|
+
If custom `components.py` text is provided in config, it will be registered with sys.modules
|
93
|
+
so that it can be later imported by manifest declarations which reference the provided classes.
|
94
|
+
|
95
|
+
Returns `None` if no components is provided and the `components` module is not found.
|
96
|
+
"""
|
97
|
+
if config and INJECTED_COMPONENTS_PY in config:
|
98
|
+
if not custom_code_execution_permitted():
|
99
|
+
raise AirbyteCustomCodeNotPermittedError
|
100
|
+
|
101
|
+
# Create a new module object and execute the provided Python code text within it
|
102
|
+
python_text: str = config[INJECTED_COMPONENTS_PY]
|
103
|
+
return register_components_module_from_string(
|
104
|
+
components_py_text=python_text,
|
105
|
+
checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
|
106
|
+
)
|
107
|
+
|
108
|
+
# Check for `components` or `source_declarative_manifest.components`.
|
109
|
+
if SDM_COMPONENTS_MODULE_NAME in sys.modules:
|
110
|
+
return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
|
111
|
+
|
112
|
+
if COMPONENTS_MODULE_NAME in sys.modules:
|
113
|
+
return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
|
114
|
+
|
115
|
+
# Could not find module 'components' in `sys.modules`
|
116
|
+
# and INJECTED_COMPONENTS_PY was not provided in config.
|
117
|
+
return None
|
118
|
+
|
119
|
+
|
120
|
+
def register_components_module_from_string(
|
121
|
+
components_py_text: str,
|
122
|
+
checksums: dict[str, Any] | None,
|
123
|
+
) -> ModuleType:
|
124
|
+
"""Load and return the components module from a provided string containing the python code."""
|
125
|
+
# First validate the code
|
126
|
+
validate_python_code(
|
127
|
+
code_text=components_py_text,
|
128
|
+
checksums=checksums,
|
129
|
+
)
|
130
|
+
|
131
|
+
# Create a new module object
|
132
|
+
components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
|
133
|
+
|
134
|
+
# Execute the module text in the module's namespace
|
135
|
+
exec(components_py_text, components_module.__dict__)
|
136
|
+
|
137
|
+
# Register the module in `sys.modules`` so it can be imported as
|
138
|
+
# `source_declarative_manifest.components` and/or `components`.
|
139
|
+
sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
|
140
|
+
sys.modules[COMPONENTS_MODULE_NAME] = components_module
|
141
|
+
|
142
|
+
# Now you can import and use the module
|
143
|
+
return components_module
|