airbyte-cdk 6.21.1.dev0__py3-none-any.whl → 6.26.0.dev4103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +6 -0
  2. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -0
  3. airbyte_cdk/sources/declarative/auth/oauth.py +68 -11
  4. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +81 -16
  5. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +58 -2
  6. airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
  7. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
  8. airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
  9. airbyte_cdk/sources/declarative/incremental/__init__.py +6 -0
  10. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +334 -0
  11. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +3 -0
  12. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +35 -3
  13. airbyte_cdk/sources/declarative/manifest_declarative_source.py +15 -4
  14. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +50 -14
  15. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +143 -0
  16. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +220 -22
  17. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +6 -2
  18. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +22 -0
  19. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  20. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +15 -0
  21. airbyte_cdk/sources/file_based/config/identities_based_stream_config.py +8 -0
  22. airbyte_cdk/sources/file_based/config/permissions.py +34 -0
  23. airbyte_cdk/sources/file_based/file_based_source.py +65 -1
  24. airbyte_cdk/sources/file_based/file_based_stream_reader.py +33 -0
  25. airbyte_cdk/sources/file_based/schema_helpers.py +25 -0
  26. airbyte_cdk/sources/file_based/stream/__init__.py +2 -1
  27. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +29 -0
  28. airbyte_cdk/sources/file_based/stream/identities_stream.py +99 -0
  29. airbyte_cdk/sources/http_logger.py +1 -1
  30. airbyte_cdk/sources/streams/concurrent/clamping.py +99 -0
  31. airbyte_cdk/sources/streams/concurrent/cursor.py +51 -57
  32. airbyte_cdk/sources/streams/concurrent/cursor_types.py +32 -0
  33. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +20 -20
  34. airbyte_cdk/test/utils/manifest_only_fixtures.py +1 -2
  35. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/METADATA +3 -3
  36. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/RECORD +39 -31
  37. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/LICENSE.txt +0 -0
  38. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/WHEEL +0 -0
  39. {airbyte_cdk-6.21.1.dev0.dist-info → airbyte_cdk-6.26.0.dev4103.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,143 @@
1
+ """Contains functions to compile custom code from text."""
2
+
3
+ import hashlib
4
+ import os
5
+ import sys
6
+ from collections.abc import Mapping
7
+ from types import ModuleType
8
+ from typing import Any, cast
9
+
10
+ from typing_extensions import Literal
11
+
12
+ ChecksumType = Literal["md5", "sha256"]
13
+ CHECKSUM_FUNCTIONS = {
14
+ "md5": hashlib.md5,
15
+ "sha256": hashlib.sha256,
16
+ }
17
+ COMPONENTS_MODULE_NAME = "components"
18
+ SDM_COMPONENTS_MODULE_NAME = "source_declarative_manifest.components"
19
+ INJECTED_MANIFEST = "__injected_declarative_manifest"
20
+ INJECTED_COMPONENTS_PY = "__injected_components_py"
21
+ INJECTED_COMPONENTS_PY_CHECKSUMS = "__injected_components_py_checksums"
22
+ ENV_VAR_ALLOW_CUSTOM_CODE = "AIRBYTE_ALLOW_CUSTOM_CODE"
23
+
24
+
25
+ class AirbyteCodeTamperedError(Exception):
26
+ """Raised when the connector's components module does not match its checksum.
27
+
28
+ This is a fatal error, as it can be a sign of code tampering.
29
+ """
30
+
31
+
32
+ class AirbyteCustomCodeNotPermittedError(Exception):
33
+ """Raised when custom code is attempted to be run in an environment that does not support it."""
34
+
35
+ def __init__(self) -> None:
36
+ super().__init__(
37
+ "Custom connector code is not permitted in this environment. "
38
+ "If you need to run custom code, please ask your administrator to set the `AIRBYTE_ALLOW_CUSTOM_CODE` "
39
+ "environment variable to 'true' in your Airbyte environment. "
40
+ "If you see this message in Airbyte Cloud, your workspace does not allow executing "
41
+ "custom connector code."
42
+ )
43
+
44
+
45
+ def _hash_text(input_text: str, hash_type: str = "md5") -> str:
46
+ """Return the hash of the input text using the specified hash type."""
47
+ if not input_text:
48
+ raise ValueError("Input text cannot be empty.")
49
+
50
+ hash_object = CHECKSUM_FUNCTIONS[hash_type]()
51
+ hash_object.update(input_text.encode())
52
+ return hash_object.hexdigest()
53
+
54
+
55
+ def custom_code_execution_permitted() -> bool:
56
+ """Return `True` if custom code execution is permitted, otherwise `False`.
57
+
58
+ Custom code execution is permitted if the `AIRBYTE_ALLOW_CUSTOM_CODE` environment variable is set to 'true'.
59
+ """
60
+ return os.environ.get(ENV_VAR_ALLOW_CUSTOM_CODE, "").lower() == "true"
61
+
62
+
63
+ def validate_python_code(
64
+ code_text: str,
65
+ checksums: dict[str, str] | None,
66
+ ) -> None:
67
+ """Validate the provided Python code text against the provided checksums.
68
+
69
+ Currently we fail if no checksums are provided, although this may change in the future.
70
+ """
71
+ if not checksums:
72
+ raise ValueError(f"A checksum is required to validate the code. Received: {checksums}")
73
+
74
+ for checksum_type, checksum in checksums.items():
75
+ if checksum_type not in CHECKSUM_FUNCTIONS:
76
+ raise ValueError(
77
+ f"Unsupported checksum type: {checksum_type}. Supported checksum types are: {CHECKSUM_FUNCTIONS.keys()}"
78
+ )
79
+
80
+ if _hash_text(code_text, checksum_type) != checksum:
81
+ raise AirbyteCodeTamperedError(f"{checksum_type} checksum does not match.")
82
+
83
+
84
+ def get_registered_components_module(
85
+ config: Mapping[str, Any] | None,
86
+ ) -> ModuleType | None:
87
+ """Get a components module object based on the provided config.
88
+
89
+ If custom python components is provided, this will be loaded. Otherwise, we will
90
+ attempt to load from the `components` module already imported/registered in sys.modules.
91
+
92
+ If custom `components.py` text is provided in config, it will be registered with sys.modules
93
+ so that it can be later imported by manifest declarations which reference the provided classes.
94
+
95
+ Returns `None` if no components is provided and the `components` module is not found.
96
+ """
97
+ if config and INJECTED_COMPONENTS_PY in config:
98
+ if not custom_code_execution_permitted():
99
+ raise AirbyteCustomCodeNotPermittedError
100
+
101
+ # Create a new module object and execute the provided Python code text within it
102
+ python_text: str = config[INJECTED_COMPONENTS_PY]
103
+ return register_components_module_from_string(
104
+ components_py_text=python_text,
105
+ checksums=config.get(INJECTED_COMPONENTS_PY_CHECKSUMS, None),
106
+ )
107
+
108
+ # Check for `components` or `source_declarative_manifest.components`.
109
+ if SDM_COMPONENTS_MODULE_NAME in sys.modules:
110
+ return cast(ModuleType, sys.modules.get(SDM_COMPONENTS_MODULE_NAME))
111
+
112
+ if COMPONENTS_MODULE_NAME in sys.modules:
113
+ return cast(ModuleType, sys.modules.get(COMPONENTS_MODULE_NAME))
114
+
115
+ # Could not find module 'components' in `sys.modules`
116
+ # and INJECTED_COMPONENTS_PY was not provided in config.
117
+ return None
118
+
119
+
120
+ def register_components_module_from_string(
121
+ components_py_text: str,
122
+ checksums: dict[str, Any] | None,
123
+ ) -> ModuleType:
124
+ """Load and return the components module from a provided string containing the python code."""
125
+ # First validate the code
126
+ validate_python_code(
127
+ code_text=components_py_text,
128
+ checksums=checksums,
129
+ )
130
+
131
+ # Create a new module object
132
+ components_module = ModuleType(name=COMPONENTS_MODULE_NAME)
133
+
134
+ # Execute the module text in the module's namespace
135
+ exec(components_py_text, components_module.__dict__)
136
+
137
+ # Register the module in `sys.modules`` so it can be imported as
138
+ # `source_declarative_manifest.components` and/or `components`.
139
+ sys.modules[SDM_COMPONENTS_MODULE_NAME] = components_module
140
+ sys.modules[COMPONENTS_MODULE_NAME] = components_module
141
+
142
+ # Now you can import and use the module
143
+ return components_module
@@ -66,6 +66,7 @@ from airbyte_cdk.sources.declarative.decoders import (
66
66
  JsonlDecoder,
67
67
  PaginationDecoderDecorator,
68
68
  XmlDecoder,
69
+ ZipfileDecoder,
69
70
  )
70
71
  from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
71
72
  CompositeRawDecoder,
@@ -86,6 +87,8 @@ from airbyte_cdk.sources.declarative.extractors.record_filter import (
86
87
  )
87
88
  from airbyte_cdk.sources.declarative.incremental import (
88
89
  ChildPartitionResumableFullRefreshCursor,
90
+ ConcurrentCursorFactory,
91
+ ConcurrentPerPartitionCursor,
89
92
  CursorFactory,
90
93
  DatetimeBasedCursor,
91
94
  DeclarativeCursor,
@@ -100,6 +103,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
100
103
  LegacyToPerPartitionStateMigration,
101
104
  )
102
105
  from airbyte_cdk.sources.declarative.models import (
106
+ Clamping,
103
107
  CustomStateMigration,
104
108
  )
105
109
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -359,6 +363,13 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
359
363
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
360
364
  XmlDecoder as XmlDecoderModel,
361
365
  )
366
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
367
+ ZipfileDecoder as ZipfileDecoderModel,
368
+ )
369
+ from airbyte_cdk.sources.declarative.parsers.custom_code_compiler import (
370
+ COMPONENTS_MODULE_NAME,
371
+ SDM_COMPONENTS_MODULE_NAME,
372
+ )
362
373
  from airbyte_cdk.sources.declarative.partition_routers import (
363
374
  CartesianProductStreamSlicer,
364
375
  ListPartitionRouter,
@@ -452,6 +463,16 @@ from airbyte_cdk.sources.message import (
452
463
  InMemoryMessageRepository,
453
464
  LogAppenderMessageRepositoryDecorator,
454
465
  MessageRepository,
466
+ NoopMessageRepository,
467
+ )
468
+ from airbyte_cdk.sources.streams.concurrent.clamping import (
469
+ ClampingEndProvider,
470
+ ClampingStrategy,
471
+ DayClampingStrategy,
472
+ MonthClampingStrategy,
473
+ NoClamping,
474
+ WeekClampingStrategy,
475
+ Weekday,
455
476
  )
456
477
  from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor, CursorField
457
478
  from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import (
@@ -578,6 +599,7 @@ class ModelToComponentFactory:
578
599
  ConfigComponentsResolverModel: self.create_config_components_resolver,
579
600
  StreamConfigModel: self.create_stream_config,
580
601
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
602
+ ZipfileDecoderModel: self.create_zipfile_decoder,
581
603
  }
582
604
 
583
605
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -907,6 +929,8 @@ class ModelToComponentFactory:
907
929
  stream_namespace: Optional[str],
908
930
  config: Config,
909
931
  stream_state: MutableMapping[str, Any],
932
+ message_repository: Optional[MessageRepository] = None,
933
+ runtime_lookback_window: Optional[datetime.timedelta] = None,
910
934
  **kwargs: Any,
911
935
  ) -> ConcurrentCursor:
912
936
  component_type = component_definition.get("type")
@@ -968,10 +992,22 @@ class ModelToComponentFactory:
968
992
  connector_state_converter = CustomFormatConcurrentStreamStateConverter(
969
993
  datetime_format=datetime_format,
970
994
  input_datetime_formats=datetime_based_cursor_model.cursor_datetime_formats,
971
- is_sequential_state=True,
995
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
972
996
  cursor_granularity=cursor_granularity,
973
997
  )
974
998
 
999
+ # Adjusts the stream state by applying the runtime lookback window.
1000
+ # This is used to ensure correct state handling in case of failed partitions.
1001
+ stream_state_value = stream_state.get(cursor_field.cursor_field_key)
1002
+ if runtime_lookback_window and stream_state_value:
1003
+ new_stream_state = (
1004
+ connector_state_converter.parse_timestamp(stream_state_value)
1005
+ - runtime_lookback_window
1006
+ )
1007
+ stream_state[cursor_field.cursor_field_key] = connector_state_converter.output_format(
1008
+ new_stream_state
1009
+ )
1010
+
975
1011
  start_date_runtime_value: Union[InterpolatedString, str, MinMaxDatetime]
976
1012
  if isinstance(datetime_based_cursor_model.start_datetime, MinMaxDatetimeModel):
977
1013
  start_date_runtime_value = self.create_min_max_datetime(
@@ -1038,11 +1074,58 @@ class ModelToComponentFactory:
1038
1074
  if evaluated_step:
1039
1075
  step_length = parse_duration(evaluated_step)
1040
1076
 
1077
+ clamping_strategy: ClampingStrategy = NoClamping()
1078
+ if datetime_based_cursor_model.clamping:
1079
+ # While it is undesirable to interpolate within the model factory (as opposed to at runtime),
1080
+ # it is still better than shifting interpolation low-code concept into the ConcurrentCursor runtime
1081
+ # object which we want to keep agnostic of being low-code
1082
+ target = InterpolatedString(
1083
+ string=datetime_based_cursor_model.clamping.target,
1084
+ parameters=datetime_based_cursor_model.parameters or {},
1085
+ )
1086
+ evaluated_target = target.eval(config=config)
1087
+ match evaluated_target:
1088
+ case "DAY":
1089
+ clamping_strategy = DayClampingStrategy()
1090
+ end_date_provider = ClampingEndProvider(
1091
+ DayClampingStrategy(is_ceiling=False),
1092
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1093
+ granularity=cursor_granularity or datetime.timedelta(seconds=1),
1094
+ )
1095
+ case "WEEK":
1096
+ if (
1097
+ not datetime_based_cursor_model.clamping.target_details
1098
+ or "weekday" not in datetime_based_cursor_model.clamping.target_details
1099
+ ):
1100
+ raise ValueError(
1101
+ "Given WEEK clamping, weekday needs to be provided as target_details"
1102
+ )
1103
+ weekday = self._assemble_weekday(
1104
+ datetime_based_cursor_model.clamping.target_details["weekday"]
1105
+ )
1106
+ clamping_strategy = WeekClampingStrategy(weekday)
1107
+ end_date_provider = ClampingEndProvider(
1108
+ WeekClampingStrategy(weekday, is_ceiling=False),
1109
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1110
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1111
+ )
1112
+ case "MONTH":
1113
+ clamping_strategy = MonthClampingStrategy()
1114
+ end_date_provider = ClampingEndProvider(
1115
+ MonthClampingStrategy(is_ceiling=False),
1116
+ end_date_provider, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1117
+ granularity=cursor_granularity or datetime.timedelta(days=1),
1118
+ )
1119
+ case _:
1120
+ raise ValueError(
1121
+ f"Invalid clamping target {evaluated_target}, expected DAY, WEEK, MONTH"
1122
+ )
1123
+
1041
1124
  return ConcurrentCursor(
1042
1125
  stream_name=stream_name,
1043
1126
  stream_namespace=stream_namespace,
1044
1127
  stream_state=stream_state,
1045
- message_repository=self._message_repository,
1128
+ message_repository=message_repository or self._message_repository,
1046
1129
  connector_state_manager=state_manager,
1047
1130
  connector_state_converter=connector_state_converter,
1048
1131
  cursor_field=cursor_field,
@@ -1052,6 +1135,83 @@ class ModelToComponentFactory:
1052
1135
  lookback_window=lookback_window,
1053
1136
  slice_range=step_length,
1054
1137
  cursor_granularity=cursor_granularity,
1138
+ clamping_strategy=clamping_strategy,
1139
+ )
1140
+
1141
+ def _assemble_weekday(self, weekday: str) -> Weekday:
1142
+ match weekday:
1143
+ case "MONDAY":
1144
+ return Weekday.MONDAY
1145
+ case "TUESDAY":
1146
+ return Weekday.TUESDAY
1147
+ case "WEDNESDAY":
1148
+ return Weekday.WEDNESDAY
1149
+ case "THURSDAY":
1150
+ return Weekday.THURSDAY
1151
+ case "FRIDAY":
1152
+ return Weekday.FRIDAY
1153
+ case "SATURDAY":
1154
+ return Weekday.SATURDAY
1155
+ case "SUNDAY":
1156
+ return Weekday.SUNDAY
1157
+ case _:
1158
+ raise ValueError(f"Unknown weekday {weekday}")
1159
+
1160
+ def create_concurrent_cursor_from_perpartition_cursor(
1161
+ self,
1162
+ state_manager: ConnectorStateManager,
1163
+ model_type: Type[BaseModel],
1164
+ component_definition: ComponentDefinition,
1165
+ stream_name: str,
1166
+ stream_namespace: Optional[str],
1167
+ config: Config,
1168
+ stream_state: MutableMapping[str, Any],
1169
+ partition_router: PartitionRouter,
1170
+ **kwargs: Any,
1171
+ ) -> ConcurrentPerPartitionCursor:
1172
+ component_type = component_definition.get("type")
1173
+ if component_definition.get("type") != model_type.__name__:
1174
+ raise ValueError(
1175
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1176
+ )
1177
+
1178
+ datetime_based_cursor_model = model_type.parse_obj(component_definition)
1179
+
1180
+ if not isinstance(datetime_based_cursor_model, DatetimeBasedCursorModel):
1181
+ raise ValueError(
1182
+ f"Expected {model_type.__name__} component, but received {datetime_based_cursor_model.__class__.__name__}"
1183
+ )
1184
+
1185
+ interpolated_cursor_field = InterpolatedString.create(
1186
+ datetime_based_cursor_model.cursor_field,
1187
+ parameters=datetime_based_cursor_model.parameters or {},
1188
+ )
1189
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1190
+
1191
+ # Create the cursor factory
1192
+ cursor_factory = ConcurrentCursorFactory(
1193
+ partial(
1194
+ self.create_concurrent_cursor_from_datetime_based_cursor,
1195
+ state_manager=state_manager,
1196
+ model_type=model_type,
1197
+ component_definition=component_definition,
1198
+ stream_name=stream_name,
1199
+ stream_namespace=stream_namespace,
1200
+ config=config,
1201
+ message_repository=NoopMessageRepository(),
1202
+ )
1203
+ )
1204
+
1205
+ # Return the concurrent cursor and state converter
1206
+ return ConcurrentPerPartitionCursor(
1207
+ cursor_factory=cursor_factory,
1208
+ partition_router=partition_router,
1209
+ stream_name=stream_name,
1210
+ stream_namespace=stream_namespace,
1211
+ stream_state=stream_state,
1212
+ message_repository=self._message_repository, # type: ignore
1213
+ connector_state_manager=state_manager,
1214
+ cursor_field=cursor_field,
1055
1215
  )
1056
1216
 
1057
1217
  @staticmethod
@@ -1097,7 +1257,6 @@ class ModelToComponentFactory:
1097
1257
  :param config: The custom defined connector config
1098
1258
  :return: The declarative component built from the Pydantic model to be used at runtime
1099
1259
  """
1100
-
1101
1260
  custom_component_class = self._get_class_from_fully_qualified_class_name(model.class_name)
1102
1261
  component_fields = get_type_hints(custom_component_class)
1103
1262
  model_args = model.dict()
@@ -1151,14 +1310,38 @@ class ModelToComponentFactory:
1151
1310
  return custom_component_class(**kwargs)
1152
1311
 
1153
1312
  @staticmethod
1154
- def _get_class_from_fully_qualified_class_name(full_qualified_class_name: str) -> Any:
1313
+ def _get_class_from_fully_qualified_class_name(
1314
+ full_qualified_class_name: str,
1315
+ ) -> Any:
1316
+ """Get a class from its fully qualified name.
1317
+
1318
+ If a custom components module is needed, we assume it is already registered - probably
1319
+ as `source_declarative_manifest.components` or `components`.
1320
+
1321
+ Args:
1322
+ full_qualified_class_name (str): The fully qualified name of the class (e.g., "module.ClassName").
1323
+
1324
+ Returns:
1325
+ Any: The class object.
1326
+
1327
+ Raises:
1328
+ ValueError: If the class cannot be loaded.
1329
+ """
1155
1330
  split = full_qualified_class_name.split(".")
1156
- module = ".".join(split[:-1])
1331
+ module_name_full = ".".join(split[:-1])
1157
1332
  class_name = split[-1]
1333
+
1334
+ try:
1335
+ module_ref = importlib.import_module(module_name_full)
1336
+ except ModuleNotFoundError as e:
1337
+ raise ValueError(f"Could not load module `{module_name_full}`.") from e
1338
+
1158
1339
  try:
1159
- return getattr(importlib.import_module(module), class_name)
1160
- except AttributeError:
1161
- raise ValueError(f"Could not load class {full_qualified_class_name}.")
1340
+ return getattr(module_ref, class_name)
1341
+ except AttributeError as e:
1342
+ raise ValueError(
1343
+ f"Could not load class `{class_name}` from module `{module_name_full}`.",
1344
+ ) from e
1162
1345
 
1163
1346
  @staticmethod
1164
1347
  def _derive_component_type_from_type_hints(field_type: Any) -> Optional[str]:
@@ -1336,18 +1519,15 @@ class ModelToComponentFactory:
1336
1519
  raise ValueError(
1337
1520
  "Unsupported Slicer is used. PerPartitionWithGlobalCursor should be used here instead"
1338
1521
  )
1339
- client_side_incremental_sync = {
1340
- "date_time_based_cursor": self._create_component_from_model(
1341
- model=model.incremental_sync, config=config
1342
- ),
1343
- "substream_cursor": (
1344
- combined_slicers
1345
- if isinstance(
1346
- combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1347
- )
1348
- else None
1349
- ),
1350
- }
1522
+ cursor = (
1523
+ combined_slicers
1524
+ if isinstance(
1525
+ combined_slicers, (PerPartitionWithGlobalCursor, GlobalSubstreamCursor)
1526
+ )
1527
+ else self._create_component_from_model(model=model.incremental_sync, config=config)
1528
+ )
1529
+
1530
+ client_side_incremental_sync = {"cursor": cursor}
1351
1531
 
1352
1532
  if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
1353
1533
  cursor_model = model.incremental_sync
@@ -1820,6 +2000,12 @@ class ModelToComponentFactory:
1820
2000
  ) -> GzipJsonDecoder:
1821
2001
  return GzipJsonDecoder(parameters={}, encoding=model.encoding)
1822
2002
 
2003
+ def create_zipfile_decoder(
2004
+ self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
2005
+ ) -> ZipfileDecoder:
2006
+ parser = self._create_component_from_model(model=model.parser, config=config)
2007
+ return ZipfileDecoder(parser=parser)
2008
+
1823
2009
  def create_gzip_parser(
1824
2010
  self, model: GzipParserModel, config: Config, **kwargs: Any
1825
2011
  ) -> GzipParser:
@@ -1914,6 +2100,12 @@ class ModelToComponentFactory:
1914
2100
  def create_oauth_authenticator(
1915
2101
  self, model: OAuthAuthenticatorModel, config: Config, **kwargs: Any
1916
2102
  ) -> DeclarativeOauth2Authenticator:
2103
+ profile_assertion = (
2104
+ self._create_component_from_model(model.profile_assertion, config=config)
2105
+ if model.profile_assertion
2106
+ else None
2107
+ )
2108
+
1917
2109
  if model.refresh_token_updater:
1918
2110
  # ignore type error because fixing it would have a lot of dependencies, revisit later
1919
2111
  return DeclarativeSingleUseRefreshTokenOauth2Authenticator( # type: ignore
@@ -1934,13 +2126,17 @@ class ModelToComponentFactory:
1934
2126
  ).eval(config),
1935
2127
  client_id=InterpolatedString.create(
1936
2128
  model.client_id, parameters=model.parameters or {}
1937
- ).eval(config),
2129
+ ).eval(config)
2130
+ if model.client_id
2131
+ else model.client_id,
1938
2132
  client_secret_name=InterpolatedString.create(
1939
2133
  model.client_secret_name or "client_secret", parameters=model.parameters or {}
1940
2134
  ).eval(config),
1941
2135
  client_secret=InterpolatedString.create(
1942
2136
  model.client_secret, parameters=model.parameters or {}
1943
- ).eval(config),
2137
+ ).eval(config)
2138
+ if model.client_secret
2139
+ else model.client_secret,
1944
2140
  access_token_config_path=model.refresh_token_updater.access_token_config_path,
1945
2141
  refresh_token_config_path=model.refresh_token_updater.refresh_token_config_path,
1946
2142
  token_expiry_date_config_path=model.refresh_token_updater.token_expiry_date_config_path,
@@ -1986,6 +2182,8 @@ class ModelToComponentFactory:
1986
2182
  config=config,
1987
2183
  parameters=model.parameters or {},
1988
2184
  message_repository=self._message_repository,
2185
+ profile_assertion=profile_assertion,
2186
+ use_profile_assertion=model.use_profile_assertion,
1989
2187
  )
1990
2188
 
1991
2189
  def create_offset_increment(
@@ -296,8 +296,12 @@ class SubstreamPartitionRouter(PartitionRouter):
296
296
 
297
297
  if not parent_state and incremental_dependency:
298
298
  # Attempt to retrieve child state
299
- substream_state = list(stream_state.values())
300
- substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment
299
+ substream_state_values = list(stream_state.values())
300
+ substream_state = substream_state_values[0] if substream_state_values else {}
301
+ # Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
302
+ if isinstance(substream_state, (list, dict)):
303
+ substream_state = {}
304
+
301
305
  parent_state = {}
302
306
 
303
307
  # Copy child state to parent streams with incremental dependencies
@@ -8,6 +8,7 @@ from typing import Any, List, Mapping, Optional, Union
8
8
  import requests
9
9
 
10
10
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
11
+ from airbyte_cdk.sources.streams.http.error_handlers.backoff_strategy import BackoffStrategy
11
12
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
12
13
  ErrorResolution,
13
14
  ResponseAction,
@@ -77,3 +78,24 @@ class CompositeErrorHandler(ErrorHandler):
77
78
  return matched_error_resolution
78
79
 
79
80
  return create_fallback_error_resolution(response_or_exception)
81
+
82
+ @property
83
+ def backoff_strategies(self) -> Optional[List[BackoffStrategy]]:
84
+ """
85
+ Combines backoff strategies from all child error handlers into a single flattened list.
86
+
87
+ When used with HttpRequester, note the following behavior:
88
+ - In HttpRequester.__post_init__, the entire list of backoff strategies is assigned to the error handler
89
+ - However, the error handler's backoff_time() method only ever uses the first non-None strategy in the list
90
+ - This means that if any backoff strategies are present, the first non-None strategy becomes the default
91
+ - This applies to both user-defined response filters and errors from DEFAULT_ERROR_MAPPING
92
+ - The list structure is not used to map different strategies to different error conditions
93
+ - Therefore, subsequent strategies in the list will not be used
94
+
95
+ Returns None if no handlers have strategies defined, which will result in HttpRequester using its default backoff strategy.
96
+ """
97
+ all_strategies = []
98
+ for handler in self.error_handlers:
99
+ if hasattr(handler, "backoff_strategies") and handler.backoff_strategies:
100
+ all_strategies.extend(handler.backoff_strategies)
101
+ return all_strategies if all_strategies else None
@@ -160,7 +160,7 @@ class SimpleRetriever(Retriever):
160
160
  stream_slice,
161
161
  next_page_token,
162
162
  self._paginator.get_request_headers,
163
- self.stream_slicer.get_request_headers,
163
+ self.request_option_provider.get_request_headers,
164
164
  )
165
165
  if isinstance(headers, str):
166
166
  raise ValueError("Request headers cannot be a string")
@@ -11,6 +11,9 @@ from pydantic.v1 import AnyUrl, BaseModel, Field
11
11
 
12
12
  from airbyte_cdk import OneOfOptionConfig
13
13
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
14
+ from airbyte_cdk.sources.file_based.config.identities_based_stream_config import (
15
+ IdentitiesStreamConfig,
16
+ )
14
17
  from airbyte_cdk.sources.utils import schema_helpers
15
18
 
16
19
 
@@ -22,6 +25,18 @@ class DeliverRecords(BaseModel):
22
25
 
23
26
  delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
24
27
 
28
+ sync_acl_permissions: bool = Field(
29
+ title="Include ACL Permissions",
30
+ description="Joins Document allowlists to each stream.",
31
+ default=False,
32
+ airbyte_hidden=True,
33
+ )
34
+ identities: Optional[IdentitiesStreamConfig] = Field(
35
+ title="Identities configuration",
36
+ description="Configuration for identities",
37
+ airbyte_hidden=True,
38
+ )
39
+
25
40
 
26
41
  class DeliverRawFiles(BaseModel):
27
42
  class Config(OneOfOptionConfig):
@@ -0,0 +1,8 @@
1
+ from typing import Literal
2
+
3
+ from pydantic.v1 import BaseModel, Field
4
+
5
+
6
+ class IdentitiesStreamConfig(BaseModel):
7
+ name: Literal["identities"] = Field("identities", const=True, airbyte_hidden=True)
8
+ domain: str = Field(title="Domain", description="The domain of the identities.")
@@ -0,0 +1,34 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import uuid
6
+ from datetime import datetime
7
+ from enum import Enum
8
+
9
+ from pydantic.v1 import BaseModel
10
+
11
+
12
+ class RemoteFileIdentityType(Enum):
13
+ USER = "user"
14
+ GROUP = "group"
15
+
16
+
17
+ class RemoteFileIdentity(BaseModel):
18
+ id: uuid.UUID
19
+ remote_id: str
20
+ parent_id: str | None = None
21
+ name: str | None = None
22
+ description: str | None = None
23
+ email_address: str | None = None
24
+ member_email_addresses: list[str] | None = None
25
+ type: RemoteFileIdentityType
26
+ modified_at: datetime
27
+
28
+
29
+ class RemoteFilePermissions(BaseModel):
30
+ id: str
31
+ file_path: str
32
+ allowed_identity_remote_ids: list[str] | None = None
33
+ denied_identity_remote_ids: list[str] | None = None
34
+ publicly_accessible: bool = False