airbyte-cdk 6.21.1.dev0__py3-none-any.whl → 6.26.0.dev4103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +6 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +68 -11
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +81 -16
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +58 -2
- airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +334 -0
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -4
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +50 -14
- airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +220 -22
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +15 -0
- airbyte_cdk/sources/file_based/config/identities_based_stream_config.py +8 -0
- airbyte_cdk/sources/file_based/config/permissions.py +34 -0
- airbyte_cdk/sources/file_based/file_based_source.py +65 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +33 -0
- airbyte_cdk/sources/file_based/schema_helpers.py +25 -0
- airbyte_cdk/sources/file_based/stream/__init__.py +2 -1
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +29 -0
- airbyte_cdk/sources/file_based/stream/identities_stream.py +99 -0
- airbyte_cdk/sources/http_logger.py +1 -1
- airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
- airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
- airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +20 -20
- airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/METADATA +3 -3
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/RECORD +39 -31
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,143 @@
|
|
1
|
+
"""Contains functions to compile custom code from text."""
|
2
|
+
|
3
|
+
import hashlib
|
4
|
+
import os
|
5
|
+
import sys
|
6
|
+
from collections.abc import Mapping
|
7
|
+
from types import ModuleType
|
8
|
+
from typing import Any, cast
|
9
|
+
|
10
|
+
from typing_extensions import Literal
|
11
|
+
|
12
|
+
ChecksumType = Literal["md5", "sha256"]
|
13
|
+
CHECKSUM_FUNCTIONS = {
|
14
|
+
"md5": hashlib.md5,
|
15
|
+
"sha256": hashlib.sha256,
|
16
|
+
}
|
17
|
+
COMPONENTS_MODULE_NAME = "components"
|
18
|
+
SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
|
19
|
+
INJECTED_MANIFEST = "__injected_declarative_manifest"
|
20
|
+
INJECTED_COMPONENTS_PY = "__injected_components_py"
|
21
|
+
INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
|
22
|
+
ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
|
23
|
+
|
24
|
+
|
25
|
+
class AirbyteCodeTamperedError(Exception):
|
26
|
+
"""Raised when the connector's components module does not match its checksum.
|
27
|
+
|
28
|
+
This is a fatal error, as it can be a sign of code tampering.
|
29
|
+
"""
|
30
|
+
|
31
|
+
|
32
|
+
class AirbyteCustomCodeNotPermittedError(Exception):
|
33
|
+
"""Raised when custom code is attempted to be run in an environment that does not support it."""
|
34
|
+
|
35
|
+
def __init__(self) -> None:
|
36
|
+
super().__init__(
|
37
|
+
"Custom connector code is not permitted in this environment. "
|
38
|
+
"If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
|
39
|
+
"environment variable to 'true' in your Airbyte environment. "
|
40
|
+
"If you see this message in Airbyte Cloud, your workspace does not allow executing "
|
41
|
+
"custom connector code."
|
42
|
+
)
|
43
|
+
|
44
|
+
|
45
|
+
def _hash_text(input_text: str, hash_type: str = "md5") -> str:
|
46
|
+
"""Return the hash of the input text using the specified hash type."""
|
47
|
+
if not input_text:
|
48
|
+
raise ValueError("Input text cannot be empty.")
|
49
|
+
|
50
|
+
hash_object = CHECKSUM_FUNCTIONS[hash_type]()
|
51
|
+
hash_object.update(input_text.encode())
|
52
|
+
return hash_object.hexdigest()
|
53
|
+
|
54
|
+
|
55
|
+
def custom_code_execution_permitted() -> bool:
|
56
|
+
"""Return `True` if custom code execution is permitted, otherwise `False`.
|
57
|
+
|
58
|
+
Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
|
59
|
+
"""
|
60
|
+
return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
|
61
|
+
|
62
|
+
|
63
|
+
def validate_python_code(
|
64
|
+
code_text: str,
|
65
|
+
checksums: dict[str, str] | None,
|
66
|
+
) -> None:
|
67
|
+
"""Validate the provided Python code text against the provided checksums.
|
68
|
+
|
69
|
+
Currently we fail if no checksums are provided, although this may change in the future.
|
70
|
+
"""
|
71
|
+
if not checksums:
|
72
|
+
raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
|
73
|
+
|
74
|
+
for checksum_type, checksum in checksums.items():
|
75
|
+
if checksum_type not in CHECKSUM_FUNCTIONS:
|
76
|
+
raise ValueError(
|
77
|
+
f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
|
78
|
+
)
|
79
|
+
|
80
|
+
if _hash_text(code_text, checksum_type) != checksum:
|
81
|
+
raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
|
82
|
+
|
83
|
+
|
84
|
+
def get_registered_components_module(
|
85
|
+
config: Mapping[str, Any] | None,
|
86
|
+
) -> ModuleType | None:
|
87
|
+
"""Get a components module object based on the provided config.
|
88
|
+
|
89
|
+
If custom python components is provided, this will be loaded. Otherwise, we will
|
90
|
+
attempt to load from the `components` module already imported/registered in sys.modules.
|
91
|
+
|
92
|
+
If custom `components.py` text is provided in config, it will be registered with sys.modules
|
93
|
+
so that it can be later imported by manifest declarations which reference the provided classes.
|
94
|
+
|
95
|
+
Returns `None` if no components is provided and the `components` module is not found.
|
96
|
+
"""
|
97
|
+
if config and INJECTED_COMPONENTS_PY in config:
|
98
|
+
if not custom_code_execution_permitted():
|
99
|
+
raise AirbyteCustomCodeNotPermittedError
|
100
|
+
|
101
|
+
# Create a new module object and execute the provided Python code text within it
|
102
|
+
python_text: str = config[INJECTED_COMPONENTS_PY]
|
103
|
+
return register_components_module_from_string(
|
104
|
+
components_py_text=python_text,
|
105
|
+
checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
|
106
|
+
)
|
107
|
+
|
108
|
+
# Check for `components` or `source_declarative_manifest.components`.
|
109
|
+
if SDM_COMPONENTS_MODULE_NAME in sys.modules:
|
110
|
+
return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
|
111
|
+
|
112
|
+
if COMPONENTS_MODULE_NAME in sys.modules:
|
113
|
+
return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
|
114
|
+
|
115
|
+
# Could not find module 'components' in `sys.modules`
|
116
|
+
# and INJECTED_COMPONENTS_PY was not provided in config.
|
117
|
+
return None
|
118
|
+
|
119
|
+
|
120
|
+
def register_components_module_from_string(
|
121
|
+
components_py_text: str,
|
122
|
+
checksums: dict[str, Any] | None,
|
123
|
+
) -> ModuleType:
|
124
|
+
"""Load and return the components module from a provided string containing the python code."""
|
125
|
+
# First validate the code
|
126
|
+
validate_python_code(
|
127
|
+
code_text=components_py_text,
|
128
|
+
checksums=checksums,
|
129
|
+
)
|
130
|
+
|
131
|
+
# Create a new module object
|
132
|
+
components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
|
133
|
+
|
134
|
+
# Execute the module text in the module's namespace
|
135
|
+
exec(components_py_text, components_module.__dict__)
|
136
|
+
|
137
|
+
# Register the module in `sys.modules`` so it can be imported as
|
138
|
+
# `source_declarative_manifest.components` and/or `components`.
|
139
|
+
sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
|
140
|
+
sys.modules[COMPONENTS_MODULE_NAME] = components_module
|
141
|
+
|
142
|
+
# Now you can import and use the module
|
143
|
+
return components_module
|
@@ -66,6 +66,7 @@ from airbyte_cdk.sources.declarative.decoders import (
|
|
66
66
|
JsonlDecoder,
|
67
67
|
PaginationDecoderDecorator,
|
68
68
|
XmlDecoder,
|
69
|
+
ZipfileDecoder,
|
69
70
|
)
|
70
71
|
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
|
71
72
|
CompositeRawDecoder,
|
@@ -86,6 +87,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
|
86
87
|
)
|
87
88
|
from airbyte_cdk.sources.declarative.incremental import (
|
88
89
|
ChildPartitionResumableFullRefreshCursor,
|
90
|
+
ConcurrentCursorFactory,
|
91
|
+
ConcurrentPerPartitionCursor,
|
89
92
|
CursorFactory,
|
90
93
|
DatetimeBasedCursor,
|
91
94
|
DeclarativeCursor,
|
@@ -100,6 +103,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
100
103
|
LegacyToPerPartitionStateMigration,
|
101
104
|
)
|
102
105
|
from airbyte_cdk.sources.declarative.models import (
|
106
|
+
Clamping,
|
103
107
|
CustomStateMigration,
|
104
108
|
)
|
105
109
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
@@ -359,6 +363,13 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
359
363
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
360
364
|
XmlDecoder as XmlDecoderModel,
|
361
365
|
)
|
366
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
367
|
+
ZipfileDecoder as ZipfileDecoderModel,
|
368
|
+
)
|
369
|
+
from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
|
370
|
+
COMPONENTS_MODULE_NAME,
|
371
|
+
SDM_COMPONENTS_MODULE_NAME,
|
372
|
+
)
|
362
373
|
from airbyte_cdk.sources.declarative.partition_routers import (
|
363
374
|
CartesianProductStreamSlicer,
|
364
375
|
ListPartitionRouter,
|
@@ -452,6 +463,16 @@ from airbyte_cdk.sources.message import (
|
|
452
463
|
InMemoryMessageRepository,
|
453
464
|
LogAppenderMessageRepositoryDecorator,
|
454
465
|
MessageRepository,
|
466
|
+
NoopMessageRepository,
|
467
|
+
)
|
468
|
+
from airbyte_cdk.sources.streams.concurrent.clamping import (
|
469
|
+
ClampingEndProvider,
|
470
|
+
ClampingStrategy,
|
471
|
+
DayClampingStrategy,
|
472
|
+
MonthClampingStrategy,
|
473
|
+
NoClamping,
|
474
|
+
WeekClampingStrategy,
|
475
|
+
Weekday,
|
455
476
|
)
|
456
477
|
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
|
457
478
|
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
|
@@ -578,6 +599,7 @@ class ModelToComponentFactory:
|
|
578
599
|
ConfigComponentsResolverModel: self.create_config_components_resolver,
|
579
600
|
StreamConfigModel: self.create_stream_config,
|
580
601
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
602
|
+
ZipfileDecoderModel: self.create_zipfile_decoder,
|
581
603
|
}
|
582
604
|
|
583
605
|
# Needed for the case where we need to perform a second parse on the fields of a custom component
|
@@ -907,6 +929,8 @@ class ModelToComponentFactory:
|
|
907
929
|
stream_namespace: Optional[str],
|
908
930
|
config: Config,
|
909
931
|
stream_state: MutableMapping[str, Any],
|
932
|
+
message_repository: Optional[MessageRepository] = None,
|
933
|
+
runtime_lookback_window: Optional[datetime.timedelta] = None,
|
910
934
|
**kwargs: Any,
|
911
935
|
) -> ConcurrentCursor:
|
912
936
|
component_type = component_definition.get("type")
|
@@ -968,10 +992,22 @@ class ModelToComponentFactory:
|
|
968
992
|
connector_state_converter = CustomFormatConcurrentStreamStateConverter(
|
969
993
|
datetime_format=datetime_format,
|
970
994
|
input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
|
971
|
-
is_sequential_state=True,
|
995
|
+
is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
|
972
996
|
cursor_granularity=cursor_granularity,
|
973
997
|
)
|
974
998
|
|
999
|
+
# Adjusts the stream state by applying the runtime lookback window.
|
1000
|
+
# This is used to ensure correct state handling in case of failed partitions.
|
1001
|
+
stream_state_value = stream_state.get(cursor_field.cursor_field_key)
|
1002
|
+
if runtime_lookback_window and stream_state_value:
|
1003
|
+
new_stream_state = (
|
1004
|
+
connector_state_converter.parse_timestamp(stream_state_value)
|
1005
|
+
- runtime_lookback_window
|
1006
|
+
)
|
1007
|
+
stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
|
1008
|
+
new_stream_state
|
1009
|
+
)
|
1010
|
+
|
975
1011
|
start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
|
976
1012
|
if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
|
977
1013
|
start_date_runtime_value = self.create_min_max_datetime(
|
@@ -1038,11 +1074,58 @@ class ModelToComponentFactory:
|
|
1038
1074
|
if evaluated_step:
|
1039
1075
|
step_length = parse_duration(evaluated_step)
|
1040
1076
|
|
1077
|
+
clamping_strategy: ClampingStrategy = NoClamping()
|
1078
|
+
if datetime_based_cursor_model.clamping:
|
1079
|
+
# While it is undesirable to interpolate within the model factory (as opposed to at runtime),
|
1080
|
+
# it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
|
1081
|
+
# object which we want to keep agnostic of being low-code
|
1082
|
+
target = InterpolatedString(
|
1083
|
+
string=datetime_based_cursor_model.clamping.target,
|
1084
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1085
|
+
)
|
1086
|
+
evaluated_target = target.eval(config=config)
|
1087
|
+
match evaluated_target:
|
1088
|
+
case "DAY":
|
1089
|
+
clamping_strategy = DayClampingStrategy()
|
1090
|
+
end_date_provider = ClampingEndProvider(
|
1091
|
+
DayClampingStrategy(is_ceiling=False),
|
1092
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1093
|
+
granularity=cursor_granularity or datetime.timedelta(seconds=1),
|
1094
|
+
)
|
1095
|
+
case "WEEK":
|
1096
|
+
if (
|
1097
|
+
not datetime_based_cursor_model.clamping.target_details
|
1098
|
+
or "weekday" not in datetime_based_cursor_model.clamping.target_details
|
1099
|
+
):
|
1100
|
+
raise ValueError(
|
1101
|
+
"Given WEEK clamping, weekday needs to be provided as target_details"
|
1102
|
+
)
|
1103
|
+
weekday = self._assemble_weekday(
|
1104
|
+
datetime_based_cursor_model.clamping.target_details["weekday"]
|
1105
|
+
)
|
1106
|
+
clamping_strategy = WeekClampingStrategy(weekday)
|
1107
|
+
end_date_provider = ClampingEndProvider(
|
1108
|
+
WeekClampingStrategy(weekday, is_ceiling=False),
|
1109
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1110
|
+
granularity=cursor_granularity or datetime.timedelta(days=1),
|
1111
|
+
)
|
1112
|
+
case "MONTH":
|
1113
|
+
clamping_strategy = MonthClampingStrategy()
|
1114
|
+
end_date_provider = ClampingEndProvider(
|
1115
|
+
MonthClampingStrategy(is_ceiling=False),
|
1116
|
+
end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
|
1117
|
+
granularity=cursor_granularity or datetime.timedelta(days=1),
|
1118
|
+
)
|
1119
|
+
case _:
|
1120
|
+
raise ValueError(
|
1121
|
+
f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
|
1122
|
+
)
|
1123
|
+
|
1041
1124
|
return ConcurrentCursor(
|
1042
1125
|
stream_name=stream_name,
|
1043
1126
|
stream_namespace=stream_namespace,
|
1044
1127
|
stream_state=stream_state,
|
1045
|
-
message_repository=self._message_repository,
|
1128
|
+
message_repository=message_repository or self._message_repository,
|
1046
1129
|
connector_state_manager=state_manager,
|
1047
1130
|
connector_state_converter=connector_state_converter,
|
1048
1131
|
cursor_field=cursor_field,
|
@@ -1052,6 +1135,83 @@ class ModelToComponentFactory:
|
|
1052
1135
|
lookback_window=lookback_window,
|
1053
1136
|
slice_range=step_length,
|
1054
1137
|
cursor_granularity=cursor_granularity,
|
1138
|
+
clamping_strategy=clamping_strategy,
|
1139
|
+
)
|
1140
|
+
|
1141
|
+
def _assemble_weekday(self, weekday: str) -> Weekday:
|
1142
|
+
match weekday:
|
1143
|
+
case "MONDAY":
|
1144
|
+
return Weekday.MONDAY
|
1145
|
+
case "TUESDAY":
|
1146
|
+
return Weekday.TUESDAY
|
1147
|
+
case "WEDNESDAY":
|
1148
|
+
return Weekday.WEDNESDAY
|
1149
|
+
case "THURSDAY":
|
1150
|
+
return Weekday.THURSDAY
|
1151
|
+
case "FRIDAY":
|
1152
|
+
return Weekday.FRIDAY
|
1153
|
+
case "SATURDAY":
|
1154
|
+
return Weekday.SATURDAY
|
1155
|
+
case "SUNDAY":
|
1156
|
+
return Weekday.SUNDAY
|
1157
|
+
case _:
|
1158
|
+
raise ValueError(f"Unknown weekday {weekday}")
|
1159
|
+
|
1160
|
+
def create_concurrent_cursor_from_perpartition_cursor(
|
1161
|
+
self,
|
1162
|
+
state_manager: ConnectorStateManager,
|
1163
|
+
model_type: Type[BaseModel],
|
1164
|
+
component_definition: ComponentDefinition,
|
1165
|
+
stream_name: str,
|
1166
|
+
stream_namespace: Optional[str],
|
1167
|
+
config: Config,
|
1168
|
+
stream_state: MutableMapping[str, Any],
|
1169
|
+
partition_router: PartitionRouter,
|
1170
|
+
**kwargs: Any,
|
1171
|
+
) -> ConcurrentPerPartitionCursor:
|
1172
|
+
component_type = component_definition.get("type")
|
1173
|
+
if component_definition.get("type") != model_type.__name__:
|
1174
|
+
raise ValueError(
|
1175
|
+
f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
|
1176
|
+
)
|
1177
|
+
|
1178
|
+
datetime_based_cursor_model = model_type.parse_obj(component_definition)
|
1179
|
+
|
1180
|
+
if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
|
1181
|
+
raise ValueError(
|
1182
|
+
f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
|
1183
|
+
)
|
1184
|
+
|
1185
|
+
interpolated_cursor_field = InterpolatedString.create(
|
1186
|
+
datetime_based_cursor_model.cursor_field,
|
1187
|
+
parameters=datetime_based_cursor_model.parameters or {},
|
1188
|
+
)
|
1189
|
+
cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
|
1190
|
+
|
1191
|
+
# Create the cursor factory
|
1192
|
+
cursor_factory = ConcurrentCursorFactory(
|
1193
|
+
partial(
|
1194
|
+
self.create_concurrent_cursor_from_datetime_based_cursor,
|
1195
|
+
state_manager=state_manager,
|
1196
|
+
model_type=model_type,
|
1197
|
+
component_definition=component_definition,
|
1198
|
+
stream_name=stream_name,
|
1199
|
+
stream_namespace=stream_namespace,
|
1200
|
+
config=config,
|
1201
|
+
message_repository=NoopMessageRepository(),
|
1202
|
+
)
|
1203
|
+
)
|
1204
|
+
|
1205
|
+
# Return the concurrent cursor and state converter
|
1206
|
+
return ConcurrentPerPartitionCursor(
|
1207
|
+
cursor_factory=cursor_factory,
|
1208
|
+
partition_router=partition_router,
|
1209
|
+
stream_name=stream_name,
|
1210
|
+
stream_namespace=stream_namespace,
|
1211
|
+
stream_state=stream_state,
|
1212
|
+
message_repository=self._message_repository, # type: ignore
|
1213
|
+
connector_state_manager=state_manager,
|
1214
|
+
cursor_field=cursor_field,
|
1055
1215
|
)
|
1056
1216
|
|
1057
1217
|
@staticmethod
|
@@ -1097,7 +1257,6 @@ class ModelToComponentFactory:
|
|
1097
1257
|
:param config: The custom defined connector config
|
1098
1258
|
:return: The declarative component built from the Pydantic model to be used at runtime
|
1099
1259
|
"""
|
1100
|
-
|
1101
1260
|
custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
|
1102
1261
|
component_fields = get_type_hints(custom_component_class)
|
1103
1262
|
model_args = model.dict()
|
@@ -1151,14 +1310,38 @@ class ModelToComponentFactory:
|
|
1151
1310
|
return custom_component_class(**kwargs)
|
1152
1311
|
|
1153
1312
|
@staticmethod
|
1154
|
-
def _get_class_from_fully_qualified_class_name(
|
1313
|
+
def _get_class_from_fully_qualified_class_name(
|
1314
|
+
full_qualified_class_name: str,
|
1315
|
+
) -> Any:
|
1316
|
+
"""Get a class from its fully qualified name.
|
1317
|
+
|
1318
|
+
If a custom components module is needed, we assume it is already registered - probably
|
1319
|
+
as `source_declarative_manifest.components` or `components`.
|
1320
|
+
|
1321
|
+
Args:
|
1322
|
+
full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
|
1323
|
+
|
1324
|
+
Returns:
|
1325
|
+
Any: The class object.
|
1326
|
+
|
1327
|
+
Raises:
|
1328
|
+
ValueError: If the class cannot be loaded.
|
1329
|
+
"""
|
1155
1330
|
split = full_qualified_class_name.split(".")
|
1156
|
-
|
1331
|
+
module_name_full = ".".join(split[:-1])
|
1157
1332
|
class_name = split[-1]
|
1333
|
+
|
1334
|
+
try:
|
1335
|
+
module_ref = importlib.import_module(module_name_full)
|
1336
|
+
except ModuleNotFoundError as e:
|
1337
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1338
|
+
|
1158
1339
|
try:
|
1159
|
-
return getattr(
|
1160
|
-
except AttributeError:
|
1161
|
-
raise ValueError(
|
1340
|
+
return getattr(module_ref, class_name)
|
1341
|
+
except AttributeError as e:
|
1342
|
+
raise ValueError(
|
1343
|
+
f"Could not load class `{class_name}` from module `{module_name_full}`.",
|
1344
|
+
) from e
|
1162
1345
|
|
1163
1346
|
@staticmethod
|
1164
1347
|
def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
|
@@ -1336,18 +1519,15 @@ class ModelToComponentFactory:
|
|
1336
1519
|
raise ValueError(
|
1337
1520
|
"Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
|
1338
1521
|
)
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
|
1345
|
-
|
1346
|
-
|
1347
|
-
|
1348
|
-
else None
|
1349
|
-
),
|
1350
|
-
}
|
1522
|
+
cursor = (
|
1523
|
+
combined_slicers
|
1524
|
+
if isinstance(
|
1525
|
+
combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
|
1526
|
+
)
|
1527
|
+
else self._create_component_from_model(model=model.incremental_sync, config=config)
|
1528
|
+
)
|
1529
|
+
|
1530
|
+
client_side_incremental_sync = {"cursor": cursor}
|
1351
1531
|
|
1352
1532
|
if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
|
1353
1533
|
cursor_model = model.incremental_sync
|
@@ -1820,6 +2000,12 @@ class ModelToComponentFactory:
|
|
1820
2000
|
) -> GzipJsonDecoder:
|
1821
2001
|
return GzipJsonDecoder(parameters={}, encoding=model.encoding)
|
1822
2002
|
|
2003
|
+
def create_zipfile_decoder(
|
2004
|
+
self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
|
2005
|
+
) -> ZipfileDecoder:
|
2006
|
+
parser = self._create_component_from_model(model=model.parser, config=config)
|
2007
|
+
return ZipfileDecoder(parser=parser)
|
2008
|
+
|
1823
2009
|
def create_gzip_parser(
|
1824
2010
|
self, model: GzipParserModel, config: Config, **kwargs: Any
|
1825
2011
|
) -> GzipParser:
|
@@ -1914,6 +2100,12 @@ class ModelToComponentFactory:
|
|
1914
2100
|
def create_oauth_authenticator(
|
1915
2101
|
self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
|
1916
2102
|
) -> DeclarativeOauth2Authenticator:
|
2103
|
+
profile_assertion = (
|
2104
|
+
self._create_component_from_model(model.profile_assertion, config=config)
|
2105
|
+
if model.profile_assertion
|
2106
|
+
else None
|
2107
|
+
)
|
2108
|
+
|
1917
2109
|
if model.refresh_token_updater:
|
1918
2110
|
# ignore type error because fixing it would have a lot of dependencies, revisit later
|
1919
2111
|
return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
|
@@ -1934,13 +2126,17 @@ class ModelToComponentFactory:
|
|
1934
2126
|
).eval(config),
|
1935
2127
|
client_id=InterpolatedString.create(
|
1936
2128
|
model.client_id, parameters=model.parameters or {}
|
1937
|
-
).eval(config)
|
2129
|
+
).eval(config)
|
2130
|
+
if model.client_id
|
2131
|
+
else model.client_id,
|
1938
2132
|
client_secret_name=InterpolatedString.create(
|
1939
2133
|
model.client_secret_name or "client_secret", parameters=model.parameters or {}
|
1940
2134
|
).eval(config),
|
1941
2135
|
client_secret=InterpolatedString.create(
|
1942
2136
|
model.client_secret, parameters=model.parameters or {}
|
1943
|
-
).eval(config)
|
2137
|
+
).eval(config)
|
2138
|
+
if model.client_secret
|
2139
|
+
else model.client_secret,
|
1944
2140
|
access_token_config_path=model.refresh_token_updater.access_token_config_path,
|
1945
2141
|
refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
|
1946
2142
|
token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
|
@@ -1986,6 +2182,8 @@ class ModelToComponentFactory:
|
|
1986
2182
|
config=config,
|
1987
2183
|
parameters=model.parameters or {},
|
1988
2184
|
message_repository=self._message_repository,
|
2185
|
+
profile_assertion=profile_assertion,
|
2186
|
+
use_profile_assertion=model.use_profile_assertion,
|
1989
2187
|
)
|
1990
2188
|
|
1991
2189
|
def create_offset_increment(
|
@@ -296,8 +296,12 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
296
296
|
|
297
297
|
if not parent_state and incremental_dependency:
|
298
298
|
# Attempt to retrieve child state
|
299
|
-
|
300
|
-
substream_state =
|
299
|
+
substream_state_values = list(stream_state.values())
|
300
|
+
substream_state = substream_state_values[0] if substream_state_values else {}
|
301
|
+
# Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
|
302
|
+
if isinstance(substream_state, (list, dict)):
|
303
|
+
substream_state = {}
|
304
|
+
|
301
305
|
parent_state = {}
|
302
306
|
|
303
307
|
# Copy child state to parent streams with incremental dependencies
|
@@ -8,6 +8,7 @@ from typing import Any, List, Mapping, Optional, Union
|
|
8
8
|
import requests
|
9
9
|
|
10
10
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
|
11
|
+
from airbyte_cdk.sources.streams.http.error_handlers.backoff_strategy import BackoffStrategy
|
11
12
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
12
13
|
ErrorResolution,
|
13
14
|
ResponseAction,
|
@@ -77,3 +78,24 @@ class CompositeErrorHandler(ErrorHandler):
|
|
77
78
|
return matched_error_resolution
|
78
79
|
|
79
80
|
return create_fallback_error_resolution(response_or_exception)
|
81
|
+
|
82
|
+
@property
|
83
|
+
def backoff_strategies(self) -> Optional[List[BackoffStrategy]]:
|
84
|
+
"""
|
85
|
+
Combines backoff strategies from all child error handlers into a single flattened list.
|
86
|
+
|
87
|
+
When used with HttpRequester, note the following behavior:
|
88
|
+
- In HttpRequester.__post_init__, the entire list of backoff strategies is assigned to the error handler
|
89
|
+
- However, the error handler's backoff_time() method only ever uses the first non-None strategy in the list
|
90
|
+
- This means that if any backoff strategies are present, the first non-None strategy becomes the default
|
91
|
+
- This applies to both user-defined response filters and errors from DEFAULT_ERROR_MAPPING
|
92
|
+
- The list structure is not used to map different strategies to different error conditions
|
93
|
+
- Therefore, subsequent strategies in the list will not be used
|
94
|
+
|
95
|
+
Returns None if no handlers have strategies defined, which will result in HttpRequester using its default backoff strategy.
|
96
|
+
"""
|
97
|
+
all_strategies = []
|
98
|
+
for handler in self.error_handlers:
|
99
|
+
if hasattr(handler, "backoff_strategies") and handler.backoff_strategies:
|
100
|
+
all_strategies.extend(handler.backoff_strategies)
|
101
|
+
return all_strategies if all_strategies else None
|
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
|
|
160
160
|
stream_slice,
|
161
161
|
next_page_token,
|
162
162
|
self._paginator.get_request_headers,
|
163
|
-
self.
|
163
|
+
self.request_option_provider.get_request_headers,
|
164
164
|
)
|
165
165
|
if isinstance(headers, str):
|
166
166
|
raise ValueError("Request headers cannot be a string")
|
@@ -11,6 +11,9 @@ from pydantic.v1 import AnyUrl, BaseModel, Field
|
|
11
11
|
|
12
12
|
from airbyte_cdk import OneOfOptionConfig
|
13
13
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
14
|
+
from airbyte_cdk.sources.file_based.config.identities_based_stream_config import (
|
15
|
+
IdentitiesStreamConfig,
|
16
|
+
)
|
14
17
|
from airbyte_cdk.sources.utils import schema_helpers
|
15
18
|
|
16
19
|
|
@@ -22,6 +25,18 @@ class DeliverRecords(BaseModel):
|
|
22
25
|
|
23
26
|
delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
|
24
27
|
|
28
|
+
sync_acl_permissions: bool = Field(
|
29
|
+
title="Include ACL Permissions",
|
30
|
+
description="Joins Document allowlists to each stream.",
|
31
|
+
default=False,
|
32
|
+
airbyte_hidden=True,
|
33
|
+
)
|
34
|
+
identities: Optional[IdentitiesStreamConfig] = Field(
|
35
|
+
title="Identities configuration",
|
36
|
+
description="Configuration for identities",
|
37
|
+
airbyte_hidden=True,
|
38
|
+
)
|
39
|
+
|
25
40
|
|
26
41
|
class DeliverRawFiles(BaseModel):
|
27
42
|
class Config(OneOfOptionConfig):
|
@@ -0,0 +1,8 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic.v1 import BaseModel, Field
|
4
|
+
|
5
|
+
|
6
|
+
class IdentitiesStreamConfig(BaseModel):
|
7
|
+
name: Literal["identities"] = Field("identities", const=True, airbyte_hidden=True)
|
8
|
+
domain: str = Field(title="Domain", description="The domain of the identities.")
|
@@ -0,0 +1,34 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import uuid
|
6
|
+
from datetime import datetime
|
7
|
+
from enum import Enum
|
8
|
+
|
9
|
+
from pydantic.v1 import BaseModel
|
10
|
+
|
11
|
+
|
12
|
+
class RemoteFileIdentityType(Enum):
|
13
|
+
USER = "user"
|
14
|
+
GROUP = "group"
|
15
|
+
|
16
|
+
|
17
|
+
class RemoteFileIdentity(BaseModel):
|
18
|
+
id: uuid.UUID
|
19
|
+
remote_id: str
|
20
|
+
parent_id: str | None = None
|
21
|
+
name: str | None = None
|
22
|
+
description: str | None = None
|
23
|
+
email_address: str | None = None
|
24
|
+
member_email_addresses: list[str] | None = None
|
25
|
+
type: RemoteFileIdentityType
|
26
|
+
modified_at: datetime
|
27
|
+
|
28
|
+
|
29
|
+
class RemoteFilePermissions(BaseModel):
|
30
|
+
id: str
|
31
|
+
file_path: str
|
32
|
+
allowed_identity_remote_ids: list[str] | None = None
|
33
|
+
denied_identity_remote_ids: list[str] | None = None
|
34
|
+
publicly_accessible: bool = False
|