airbyte-cdk 6.26.0.dev4105__py3-none-any.whl → 6.26.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +14 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +39 -13
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +7 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +24 -3
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +46 -16
- airbyte_cdk/sources/declarative/schema/__init__.py +2 -0
- airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +43 -5
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +0 -10
- airbyte_cdk/sources/file_based/file_based_source.py +1 -44
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +0 -33
- airbyte_cdk/sources/file_based/schema_helpers.py +0 -25
- airbyte_cdk/sources/file_based/stream/__init__.py +1 -2
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +0 -29
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.26.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.26.1.dist-info}/RECORD +18 -20
- airbyte_cdk/sources/file_based/config/permissions.py +0 -34
- airbyte_cdk/sources/file_based/stream/identities_stream.py +0 -96
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.26.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.26.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.26.0.dev4105.dist-info → airbyte_cdk-6.26.1.dist-info}/entry_points.txt +0 -0
@@ -1800,6 +1800,19 @@ definitions:
|
|
1800
1800
|
$parameters:
|
1801
1801
|
type: object
|
1802
1802
|
additionalProperties: true
|
1803
|
+
ComplexFieldType:
|
1804
|
+
title: Schema Field Type
|
1805
|
+
description: (This component is experimental. Use at your own risk.) Represents a complex field type.
|
1806
|
+
type: object
|
1807
|
+
required:
|
1808
|
+
- field_type
|
1809
|
+
properties:
|
1810
|
+
field_type:
|
1811
|
+
type: string
|
1812
|
+
items:
|
1813
|
+
anyOf:
|
1814
|
+
- type: string
|
1815
|
+
- "$ref": "#/definitions/ComplexFieldType"
|
1803
1816
|
TypesMap:
|
1804
1817
|
title: Types Map
|
1805
1818
|
description: (This component is experimental. Use at your own risk.) Represents a mapping between a current type and its corresponding target type.
|
@@ -1814,6 +1827,7 @@ definitions:
|
|
1814
1827
|
- type: array
|
1815
1828
|
items:
|
1816
1829
|
type: string
|
1830
|
+
- "$ref": "#/definitions/ComplexFieldType"
|
1817
1831
|
current_type:
|
1818
1832
|
anyOf:
|
1819
1833
|
- type: string
|
@@ -147,7 +147,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
147
147
|
< cursor.state[self.cursor_field.cursor_field_key]
|
148
148
|
):
|
149
149
|
self._new_global_cursor = copy.deepcopy(cursor.state)
|
150
|
-
|
150
|
+
self._emit_state_message()
|
151
151
|
|
152
152
|
def ensure_at_least_one_state_emitted(self) -> None:
|
153
153
|
"""
|
@@ -192,7 +192,8 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
192
192
|
self._global_cursor,
|
193
193
|
self._lookback_window if self._global_cursor else 0,
|
194
194
|
)
|
195
|
-
self.
|
195
|
+
with self._lock:
|
196
|
+
self._cursor_per_partition[self._to_partition_key(partition.partition)] = cursor
|
196
197
|
self._semaphore_per_partition[self._to_partition_key(partition.partition)] = (
|
197
198
|
threading.Semaphore(0)
|
198
199
|
)
|
@@ -210,16 +211,38 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
210
211
|
|
211
212
|
def _ensure_partition_limit(self) -> None:
|
212
213
|
"""
|
213
|
-
Ensure the maximum number of partitions
|
214
|
+
Ensure the maximum number of partitions does not exceed the predefined limit.
|
215
|
+
|
216
|
+
Steps:
|
217
|
+
1. Attempt to remove partitions that are marked as finished in `_finished_partitions`.
|
218
|
+
These partitions are considered processed and safe to delete.
|
219
|
+
2. If the limit is still exceeded and no finished partitions are available for removal,
|
220
|
+
remove the oldest partition unconditionally. We expect failed partitions to be removed.
|
221
|
+
|
222
|
+
Logging:
|
223
|
+
- Logs a warning each time a partition is removed, indicating whether it was finished
|
224
|
+
or removed due to being the oldest.
|
214
225
|
"""
|
215
|
-
|
216
|
-
self.
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
226
|
+
with self._lock:
|
227
|
+
while len(self._cursor_per_partition) > self.DEFAULT_MAX_PARTITIONS_NUMBER - 1:
|
228
|
+
# Try removing finished partitions first
|
229
|
+
for partition_key in list(self._cursor_per_partition.keys()):
|
230
|
+
if partition_key in self._finished_partitions:
|
231
|
+
oldest_partition = self._cursor_per_partition.pop(
|
232
|
+
partition_key
|
233
|
+
) # Remove the oldest partition
|
234
|
+
logger.warning(
|
235
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
236
|
+
)
|
237
|
+
break
|
238
|
+
else:
|
239
|
+
# If no finished partitions can be removed, fall back to removing the oldest partition
|
240
|
+
oldest_partition = self._cursor_per_partition.popitem(last=False)[
|
241
|
+
1
|
242
|
+
] # Remove the oldest partition
|
243
|
+
logger.warning(
|
244
|
+
f"The maximum number of partitions has been reached. Dropping the oldest partition: {oldest_partition}. Over limit: {self._over_limit}."
|
245
|
+
)
|
223
246
|
|
224
247
|
def _set_initial_state(self, stream_state: StreamState) -> None:
|
225
248
|
"""
|
@@ -264,7 +287,10 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
264
287
|
if not stream_state:
|
265
288
|
return
|
266
289
|
|
267
|
-
if
|
290
|
+
if (
|
291
|
+
self._PERPARTITION_STATE_KEY not in stream_state
|
292
|
+
and self._GLOBAL_STATE_KEY not in stream_state
|
293
|
+
):
|
268
294
|
# We assume that `stream_state` is in a global format that can be applied to all partitions.
|
269
295
|
# Example: {"global_state_format_key": "global_state_format_value"}
|
270
296
|
self._global_cursor = deepcopy(stream_state)
|
@@ -273,7 +299,7 @@ class ConcurrentPerPartitionCursor(Cursor):
|
|
273
299
|
else:
|
274
300
|
self._lookback_window = int(stream_state.get("lookback_window", 0))
|
275
301
|
|
276
|
-
for state in stream_state
|
302
|
+
for state in stream_state.get(self._PERPARTITION_STATE_KEY, []):
|
277
303
|
self._cursor_per_partition[self._to_partition_key(state["partition"])] = (
|
278
304
|
self._create_cursor(state["cursor"])
|
279
305
|
)
|
@@ -736,8 +736,13 @@ class HttpResponseFilter(BaseModel):
|
|
736
736
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
737
737
|
|
738
738
|
|
739
|
+
class ComplexFieldType(BaseModel):
|
740
|
+
field_type: str
|
741
|
+
items: Optional[Union[str, ComplexFieldType]] = None
|
742
|
+
|
743
|
+
|
739
744
|
class TypesMap(BaseModel):
|
740
|
-
target_type: Union[str, List[str]]
|
745
|
+
target_type: Union[str, List[str], ComplexFieldType]
|
741
746
|
current_type: Union[str, List[str]]
|
742
747
|
condition: Optional[str] = None
|
743
748
|
|
@@ -2260,6 +2265,7 @@ class DynamicDeclarativeStream(BaseModel):
|
|
2260
2265
|
)
|
2261
2266
|
|
2262
2267
|
|
2268
|
+
ComplexFieldType.update_forward_refs()
|
2263
2269
|
CompositeErrorHandler.update_forward_refs()
|
2264
2270
|
DeclarativeSource1.update_forward_refs()
|
2265
2271
|
DeclarativeSource2.update_forward_refs()
|
@@ -133,6 +133,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
133
133
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
134
134
|
CheckStream as CheckStreamModel,
|
135
135
|
)
|
136
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
137
|
+
ComplexFieldType as ComplexFieldTypeModel,
|
138
|
+
)
|
136
139
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
137
140
|
ComponentMappingDefinition as ComponentMappingDefinitionModel,
|
138
141
|
)
|
@@ -429,6 +432,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
429
432
|
SimpleRetrieverTestReadDecorator,
|
430
433
|
)
|
431
434
|
from airbyte_cdk.sources.declarative.schema import (
|
435
|
+
ComplexFieldType,
|
432
436
|
DefaultSchemaLoader,
|
433
437
|
DynamicSchemaLoader,
|
434
438
|
InlineSchemaLoader,
|
@@ -572,6 +576,7 @@ class ModelToComponentFactory:
|
|
572
576
|
DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
|
573
577
|
SchemaTypeIdentifierModel: self.create_schema_type_identifier,
|
574
578
|
TypesMapModel: self.create_types_map,
|
579
|
+
ComplexFieldTypeModel: self.create_complex_field_type,
|
575
580
|
JwtAuthenticatorModel: self.create_jwt_authenticator,
|
576
581
|
LegacyToPerPartitionStateMigrationModel: self.create_legacy_to_per_partition_state_migration,
|
577
582
|
ListPartitionRouterModel: self.create_list_partition_router,
|
@@ -1894,10 +1899,26 @@ class ModelToComponentFactory:
|
|
1894
1899
|
) -> InlineSchemaLoader:
|
1895
1900
|
return InlineSchemaLoader(schema=model.schema_ or {}, parameters={})
|
1896
1901
|
|
1897
|
-
|
1898
|
-
|
1902
|
+
def create_complex_field_type(
|
1903
|
+
self, model: ComplexFieldTypeModel, config: Config, **kwargs: Any
|
1904
|
+
) -> ComplexFieldType:
|
1905
|
+
items = (
|
1906
|
+
self._create_component_from_model(model=model.items, config=config)
|
1907
|
+
if isinstance(model.items, ComplexFieldTypeModel)
|
1908
|
+
else model.items
|
1909
|
+
)
|
1910
|
+
|
1911
|
+
return ComplexFieldType(field_type=model.field_type, items=items)
|
1912
|
+
|
1913
|
+
def create_types_map(self, model: TypesMapModel, config: Config, **kwargs: Any) -> TypesMap:
|
1914
|
+
target_type = (
|
1915
|
+
self._create_component_from_model(model=model.target_type, config=config)
|
1916
|
+
if isinstance(model.target_type, ComplexFieldTypeModel)
|
1917
|
+
else model.target_type
|
1918
|
+
)
|
1919
|
+
|
1899
1920
|
return TypesMap(
|
1900
|
-
target_type=
|
1921
|
+
target_type=target_type,
|
1901
1922
|
current_type=model.current_type,
|
1902
1923
|
condition=model.condition if model.condition is not None else "True",
|
1903
1924
|
)
|
@@ -295,28 +295,58 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
295
295
|
return
|
296
296
|
|
297
297
|
if not parent_state and incremental_dependency:
|
298
|
-
#
|
299
|
-
|
300
|
-
substream_state = substream_state_values[0] if substream_state_values else {}
|
301
|
-
# Filter out per partition state. Because we pass the state to the parent stream in the format {cursor_field: substream_state}
|
302
|
-
if isinstance(substream_state, (list, dict)):
|
303
|
-
substream_state = {}
|
304
|
-
|
305
|
-
parent_state = {}
|
306
|
-
|
307
|
-
# Copy child state to parent streams with incremental dependencies
|
308
|
-
if substream_state:
|
309
|
-
for parent_config in self.parent_stream_configs:
|
310
|
-
if parent_config.incremental_dependency:
|
311
|
-
parent_state[parent_config.stream.name] = {
|
312
|
-
parent_config.stream.cursor_field: substream_state
|
313
|
-
}
|
298
|
+
# Migrate child state to parent state format
|
299
|
+
parent_state = self._migrate_child_state_to_parent_state(stream_state)
|
314
300
|
|
315
301
|
# Set state for each parent stream with an incremental dependency
|
316
302
|
for parent_config in self.parent_stream_configs:
|
317
303
|
if parent_config.incremental_dependency:
|
318
304
|
parent_config.stream.state = parent_state.get(parent_config.stream.name, {})
|
319
305
|
|
306
|
+
def _migrate_child_state_to_parent_state(self, stream_state: StreamState) -> StreamState:
|
307
|
+
"""
|
308
|
+
Migrate the child stream state to the parent stream's state format.
|
309
|
+
|
310
|
+
This method converts the global or child state into a format compatible with parent
|
311
|
+
streams. The migration occurs only for parent streams with incremental dependencies.
|
312
|
+
The method filters out per-partition states and retains only the global state in the
|
313
|
+
format `{cursor_field: cursor_value}`.
|
314
|
+
|
315
|
+
Args:
|
316
|
+
stream_state (StreamState): The state to migrate. Expected formats include:
|
317
|
+
- {"updated_at": "2023-05-27T00:00:00Z"}
|
318
|
+
- {"states": [...] } (ignored during migration)
|
319
|
+
|
320
|
+
Returns:
|
321
|
+
StreamState: A migrated state for parent streams in the format:
|
322
|
+
{
|
323
|
+
"parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
|
324
|
+
}
|
325
|
+
|
326
|
+
Example:
|
327
|
+
Input: {"updated_at": "2023-05-27T00:00:00Z"}
|
328
|
+
Output: {
|
329
|
+
"parent_stream_name": {"parent_stream_cursor": "2023-05-27T00:00:00Z"}
|
330
|
+
}
|
331
|
+
"""
|
332
|
+
substream_state_values = list(stream_state.values())
|
333
|
+
substream_state = substream_state_values[0] if substream_state_values else {}
|
334
|
+
|
335
|
+
# Ignore per-partition states or invalid formats
|
336
|
+
if isinstance(substream_state, (list, dict)) or len(substream_state_values) != 1:
|
337
|
+
return {}
|
338
|
+
|
339
|
+
# Copy child state to parent streams with incremental dependencies
|
340
|
+
parent_state = {}
|
341
|
+
if substream_state:
|
342
|
+
for parent_config in self.parent_stream_configs:
|
343
|
+
if parent_config.incremental_dependency:
|
344
|
+
parent_state[parent_config.stream.name] = {
|
345
|
+
parent_config.stream.cursor_field: substream_state
|
346
|
+
}
|
347
|
+
|
348
|
+
return parent_state
|
349
|
+
|
320
350
|
def get_stream_state(self) -> Optional[Mapping[str, StreamState]]:
|
321
351
|
"""
|
322
352
|
Get the state of the parent streams.
|
@@ -4,6 +4,7 @@
|
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.schema.default_schema_loader import DefaultSchemaLoader
|
6
6
|
from airbyte_cdk.sources.declarative.schema.dynamic_schema_loader import (
|
7
|
+
ComplexFieldType,
|
7
8
|
DynamicSchemaLoader,
|
8
9
|
SchemaTypeIdentifier,
|
9
10
|
TypesMap,
|
@@ -18,6 +19,7 @@ __all__ = [
|
|
18
19
|
"SchemaLoader",
|
19
20
|
"InlineSchemaLoader",
|
20
21
|
"DynamicSchemaLoader",
|
22
|
+
"ComplexFieldType",
|
21
23
|
"TypesMap",
|
22
24
|
"SchemaTypeIdentifier",
|
23
25
|
]
|
@@ -18,7 +18,7 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
|
18
18
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
19
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
20
20
|
|
21
|
-
AIRBYTE_DATA_TYPES: Mapping[str,
|
21
|
+
AIRBYTE_DATA_TYPES: Mapping[str, MutableMapping[str, Any]] = {
|
22
22
|
"string": {"type": ["null", "string"]},
|
23
23
|
"boolean": {"type": ["null", "boolean"]},
|
24
24
|
"date": {"type": ["null", "string"], "format": "date"},
|
@@ -45,6 +45,25 @@ AIRBYTE_DATA_TYPES: Mapping[str, Mapping[str, Any]] = {
|
|
45
45
|
}
|
46
46
|
|
47
47
|
|
48
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
49
|
+
@dataclass(frozen=True)
|
50
|
+
class ComplexFieldType:
|
51
|
+
"""
|
52
|
+
Identifies complex field type
|
53
|
+
"""
|
54
|
+
|
55
|
+
field_type: str
|
56
|
+
items: Optional[Union[str, "ComplexFieldType"]] = None
|
57
|
+
|
58
|
+
def __post_init__(self) -> None:
|
59
|
+
"""
|
60
|
+
Enforces that `items` is only used when `field_type` is a array
|
61
|
+
"""
|
62
|
+
# `items_type` is valid only for array target types
|
63
|
+
if self.items and self.field_type != "array":
|
64
|
+
raise ValueError("'items' can only be used when 'field_type' is an array.")
|
65
|
+
|
66
|
+
|
48
67
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
49
68
|
@dataclass(frozen=True)
|
50
69
|
class TypesMap:
|
@@ -52,7 +71,7 @@ class TypesMap:
|
|
52
71
|
Represents a mapping between a current type and its corresponding target type.
|
53
72
|
"""
|
54
73
|
|
55
|
-
target_type: Union[List[str], str]
|
74
|
+
target_type: Union[List[str], str, ComplexFieldType]
|
56
75
|
current_type: Union[List[str], str]
|
57
76
|
condition: Optional[str]
|
58
77
|
|
@@ -135,8 +154,9 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
135
154
|
transformed_properties = self._transform(properties, {})
|
136
155
|
|
137
156
|
return {
|
138
|
-
"$schema": "
|
157
|
+
"$schema": "https://json-schema.org/draft-07/schema#",
|
139
158
|
"type": "object",
|
159
|
+
"additionalProperties": True,
|
140
160
|
"properties": transformed_properties,
|
141
161
|
}
|
142
162
|
|
@@ -188,18 +208,36 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
188
208
|
first_type = self._get_airbyte_type(mapped_field_type[0])
|
189
209
|
second_type = self._get_airbyte_type(mapped_field_type[1])
|
190
210
|
return {"oneOf": [first_type, second_type]}
|
211
|
+
|
191
212
|
elif isinstance(mapped_field_type, str):
|
192
213
|
return self._get_airbyte_type(mapped_field_type)
|
214
|
+
|
215
|
+
elif isinstance(mapped_field_type, ComplexFieldType):
|
216
|
+
return self._resolve_complex_type(mapped_field_type)
|
217
|
+
|
193
218
|
else:
|
194
219
|
raise ValueError(
|
195
220
|
f"Invalid data type. Available string or two items list of string. Got {mapped_field_type}."
|
196
221
|
)
|
197
222
|
|
223
|
+
def _resolve_complex_type(self, complex_type: ComplexFieldType) -> Mapping[str, Any]:
|
224
|
+
if not complex_type.items:
|
225
|
+
return self._get_airbyte_type(complex_type.field_type)
|
226
|
+
|
227
|
+
field_type = self._get_airbyte_type(complex_type.field_type)
|
228
|
+
field_type["items"] = (
|
229
|
+
self._get_airbyte_type(complex_type.items)
|
230
|
+
if isinstance(complex_type.items, str)
|
231
|
+
else self._resolve_complex_type(complex_type.items)
|
232
|
+
)
|
233
|
+
|
234
|
+
return field_type
|
235
|
+
|
198
236
|
def _replace_type_if_not_valid(
|
199
237
|
self,
|
200
238
|
field_type: Union[List[str], str],
|
201
239
|
raw_schema: MutableMapping[str, Any],
|
202
|
-
) -> Union[List[str], str]:
|
240
|
+
) -> Union[List[str], str, ComplexFieldType]:
|
203
241
|
"""
|
204
242
|
Replaces a field type if it matches a type mapping in `types_map`.
|
205
243
|
"""
|
@@ -216,7 +254,7 @@ class DynamicSchemaLoader(SchemaLoader):
|
|
216
254
|
return field_type
|
217
255
|
|
218
256
|
@staticmethod
|
219
|
-
def _get_airbyte_type(field_type: str) ->
|
257
|
+
def _get_airbyte_type(field_type: str) -> MutableMapping[str, Any]:
|
220
258
|
"""
|
221
259
|
Maps a field type to its corresponding Airbyte type definition.
|
222
260
|
"""
|
@@ -22,16 +22,6 @@ class DeliverRecords(BaseModel):
|
|
22
22
|
|
23
23
|
delivery_type: Literal["use_records_transfer"] = Field("use_records_transfer", const=True)
|
24
24
|
|
25
|
-
sync_acl_permissions: bool = Field(
|
26
|
-
title="Include ACL Permissions",
|
27
|
-
description="Joins Document allowlists to each stream.",
|
28
|
-
default=False,
|
29
|
-
airbyte_hidden=True,
|
30
|
-
)
|
31
|
-
domain: Optional[str] = Field(
|
32
|
-
title="Domain", description="The domain of the identities.", airbyte_hidden=True
|
33
|
-
)
|
34
|
-
|
35
25
|
|
36
26
|
class DeliverRawFiles(BaseModel):
|
37
27
|
class Config(OneOfOptionConfig):
|
@@ -49,11 +49,7 @@ from airbyte_cdk.sources.file_based.schema_validation_policies import (
|
|
49
49
|
DEFAULT_SCHEMA_VALIDATION_POLICIES,
|
50
50
|
AbstractSchemaValidationPolicy,
|
51
51
|
)
|
52
|
-
from airbyte_cdk.sources.file_based.stream import
|
53
|
-
AbstractFileBasedStream,
|
54
|
-
DefaultFileBasedStream,
|
55
|
-
IdentitiesStream,
|
56
|
-
)
|
52
|
+
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream, DefaultFileBasedStream
|
57
53
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamFacade
|
58
54
|
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
59
55
|
AbstractConcurrentFileBasedCursor,
|
@@ -61,7 +57,6 @@ from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
|
61
57
|
FileBasedFinalStateCursor,
|
62
58
|
)
|
63
59
|
from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
64
|
-
from airbyte_cdk.sources.file_based.stream.identities_stream import IDENTITIES_STREAM_NAME
|
65
60
|
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository, MessageRepository
|
66
61
|
from airbyte_cdk.sources.streams import Stream
|
67
62
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
@@ -71,7 +66,6 @@ from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
|
71
66
|
DEFAULT_CONCURRENCY = 100
|
72
67
|
MAX_CONCURRENCY = 100
|
73
68
|
INITIAL_N_PARTITIONS = MAX_CONCURRENCY // 2
|
74
|
-
IDENTITIES_STREAM = "identities"
|
75
69
|
|
76
70
|
|
77
71
|
class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
@@ -163,9 +157,6 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
163
157
|
errors = []
|
164
158
|
tracebacks = []
|
165
159
|
for stream in streams:
|
166
|
-
if isinstance(stream, IdentitiesStream):
|
167
|
-
# Probably need to check identities endpoint/api access but will skip for now.
|
168
|
-
continue
|
169
160
|
if not isinstance(stream, AbstractFileBasedStream):
|
170
161
|
raise ValueError(f"Stream {stream} is not a file-based stream.")
|
171
162
|
try:
|
@@ -173,7 +164,6 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
173
164
|
availability_method = (
|
174
165
|
stream.availability_strategy.check_availability
|
175
166
|
if self._use_file_transfer(parsed_config)
|
176
|
-
or self._sync_acl_permissions(parsed_config)
|
177
167
|
else stream.availability_strategy.check_availability_and_parsability
|
178
168
|
)
|
179
169
|
(
|
@@ -299,10 +289,6 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
299
289
|
)
|
300
290
|
|
301
291
|
streams.append(stream)
|
302
|
-
|
303
|
-
if self._sync_acl_permissions(parsed_config):
|
304
|
-
identities_stream = self._make_identities_stream()
|
305
|
-
streams.append(identities_stream)
|
306
292
|
return streams
|
307
293
|
|
308
294
|
except ValidationError as exc:
|
@@ -326,17 +312,6 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
326
312
|
cursor=cursor,
|
327
313
|
use_file_transfer=self._use_file_transfer(parsed_config),
|
328
314
|
preserve_directory_structure=self._preserve_directory_structure(parsed_config),
|
329
|
-
sync_acl_permissions=self._sync_acl_permissions(parsed_config),
|
330
|
-
)
|
331
|
-
|
332
|
-
def _make_identities_stream(
|
333
|
-
self,
|
334
|
-
) -> Stream:
|
335
|
-
return IdentitiesStream(
|
336
|
-
catalog_schema=self.stream_schemas.get(IDENTITIES_STREAM_NAME),
|
337
|
-
stream_reader=self.stream_reader,
|
338
|
-
discovery_policy=self.discovery_policy,
|
339
|
-
errors_collector=self.errors_collector,
|
340
315
|
)
|
341
316
|
|
342
317
|
def _get_stream_from_catalog(
|
@@ -412,14 +387,6 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
412
387
|
)
|
413
388
|
return use_file_transfer
|
414
389
|
|
415
|
-
@staticmethod
|
416
|
-
def _use_records_transfer(parsed_config: AbstractFileBasedSpec) -> bool:
|
417
|
-
use_records_transfer = (
|
418
|
-
hasattr(parsed_config.delivery_method, "delivery_type")
|
419
|
-
and parsed_config.delivery_method.delivery_type == "use_records_transfer"
|
420
|
-
)
|
421
|
-
return use_records_transfer
|
422
|
-
|
423
390
|
@staticmethod
|
424
391
|
def _preserve_directory_structure(parsed_config: AbstractFileBasedSpec) -> bool:
|
425
392
|
"""
|
@@ -441,13 +408,3 @@ class FileBasedSource(ConcurrentSourceAdapter, ABC):
|
|
441
408
|
):
|
442
409
|
return parsed_config.delivery_method.preserve_directory_structure
|
443
410
|
return True
|
444
|
-
|
445
|
-
@staticmethod
|
446
|
-
def _sync_acl_permissions(parsed_config: AbstractFileBasedSpec) -> bool:
|
447
|
-
if (
|
448
|
-
FileBasedSource._use_records_transfer(parsed_config)
|
449
|
-
and hasattr(parsed_config.delivery_method, "sync_acl_permissions")
|
450
|
-
and parsed_config.delivery_method.sync_acl_permissions is not None
|
451
|
-
):
|
452
|
-
return parsed_config.delivery_method.sync_acl_permissions
|
453
|
-
return False
|
@@ -135,15 +135,6 @@ class AbstractFileBasedStreamReader(ABC):
|
|
135
135
|
return use_file_transfer
|
136
136
|
return False
|
137
137
|
|
138
|
-
def use_records_transfer(self) -> bool:
|
139
|
-
if self.config:
|
140
|
-
use_records_transfer = (
|
141
|
-
hasattr(self.config.delivery_method, "delivery_type")
|
142
|
-
and self.config.delivery_method.delivery_type == "use_records_transfer"
|
143
|
-
)
|
144
|
-
return use_records_transfer
|
145
|
-
return False
|
146
|
-
|
147
138
|
def preserve_directory_structure(self) -> bool:
|
148
139
|
# fall back to preserve subdirectories if config is not present or incomplete
|
149
140
|
if (
|
@@ -155,16 +146,6 @@ class AbstractFileBasedStreamReader(ABC):
|
|
155
146
|
return self.config.delivery_method.preserve_directory_structure
|
156
147
|
return True
|
157
148
|
|
158
|
-
def sync_acl_permissions(self) -> bool:
|
159
|
-
if (
|
160
|
-
self.config
|
161
|
-
and self.use_records_transfer()
|
162
|
-
and hasattr(self.config.delivery_method, "sync_acl_permissions")
|
163
|
-
and self.config.delivery_method.sync_acl_permissions is not None
|
164
|
-
):
|
165
|
-
return self.config.delivery_method.sync_acl_permissions
|
166
|
-
return False
|
167
|
-
|
168
149
|
@abstractmethod
|
169
150
|
def get_file(
|
170
151
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
@@ -202,17 +183,3 @@ class AbstractFileBasedStreamReader(ABC):
|
|
202
183
|
makedirs(path.dirname(local_file_path), exist_ok=True)
|
203
184
|
absolute_file_path = path.abspath(local_file_path)
|
204
185
|
return [file_relative_path, local_file_path, absolute_file_path]
|
205
|
-
|
206
|
-
def get_file_acl_permissions(self, file: RemoteFile, logger: logging.Logger) -> Dict[str, Any]:
|
207
|
-
"""
|
208
|
-
This is required for connectors that will support syncing
|
209
|
-
ACL Permissions from files.
|
210
|
-
"""
|
211
|
-
return {}
|
212
|
-
|
213
|
-
def load_identity_groups(self, logger: logging.Logger) -> Iterable[Dict[str, Any]]:
|
214
|
-
"""
|
215
|
-
This is required for connectors that will support syncing
|
216
|
-
identities.
|
217
|
-
"""
|
218
|
-
yield {}
|
@@ -23,31 +23,6 @@ file_transfer_schema = {
|
|
23
23
|
"properties": {"data": {"type": "object"}, "file": {"type": "object"}},
|
24
24
|
}
|
25
25
|
|
26
|
-
remote_file_permissions_schema = {
|
27
|
-
"type": "object",
|
28
|
-
"properties": {
|
29
|
-
"id": {"type": "string"},
|
30
|
-
"file_path": {"type": "string"},
|
31
|
-
"allowed_identity_remote_ids": {"type": "array", "items": {"type": "string"}},
|
32
|
-
"publicly_accessible": {"type": "boolean"},
|
33
|
-
},
|
34
|
-
}
|
35
|
-
|
36
|
-
remote_file_identity_schema = {
|
37
|
-
"type": "object",
|
38
|
-
"properties": {
|
39
|
-
"id": {"type": "string"},
|
40
|
-
"remote_id": {"type": "string"},
|
41
|
-
"parent_id": {"type": ["null", "string"]},
|
42
|
-
"name": {"type": ["null", "string"]},
|
43
|
-
"description": {"type": ["null", "string"]},
|
44
|
-
"email_address": {"type": ["null", "string"]},
|
45
|
-
"member_email_addresses": {"type": ["null", "array"]},
|
46
|
-
"type": {"type": "string"},
|
47
|
-
"modified_at": {"type": "string"},
|
48
|
-
},
|
49
|
-
}
|
50
|
-
|
51
26
|
|
52
27
|
@total_ordering
|
53
28
|
class ComparableType(Enum):
|
@@ -1,5 +1,4 @@
|
|
1
1
|
from airbyte_cdk.sources.file_based.stream.abstract_file_based_stream import AbstractFileBasedStream
|
2
2
|
from airbyte_cdk.sources.file_based.stream.default_file_based_stream import DefaultFileBasedStream
|
3
|
-
from airbyte_cdk.sources.file_based.stream.identities_stream import IdentitiesStream
|
4
3
|
|
5
|
-
__all__ = ["AbstractFileBasedStream", "DefaultFileBasedStream"
|
4
|
+
__all__ = ["AbstractFileBasedStream", "DefaultFileBasedStream"]
|
@@ -29,7 +29,6 @@ from airbyte_cdk.sources.file_based.schema_helpers import (
|
|
29
29
|
SchemaType,
|
30
30
|
file_transfer_schema,
|
31
31
|
merge_schemas,
|
32
|
-
remote_file_permissions_schema,
|
33
32
|
schemaless_schema,
|
34
33
|
)
|
35
34
|
from airbyte_cdk.sources.file_based.stream import AbstractFileBasedStream
|
@@ -48,7 +47,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
48
47
|
|
49
48
|
FILE_TRANSFER_KW = "use_file_transfer"
|
50
49
|
PRESERVE_DIRECTORY_STRUCTURE_KW = "preserve_directory_structure"
|
51
|
-
SYNC_ACL_PERMISSIONS_KW = "sync_acl_permissions"
|
52
50
|
FILES_KEY = "files"
|
53
51
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
54
52
|
ab_last_mod_col = "_ab_source_file_last_modified"
|
@@ -58,7 +56,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
58
56
|
airbyte_columns = [ab_last_mod_col, ab_file_name_col]
|
59
57
|
use_file_transfer = False
|
60
58
|
preserve_directory_structure = True
|
61
|
-
sync_acl_permissions = False
|
62
59
|
|
63
60
|
def __init__(self, **kwargs: Any):
|
64
61
|
if self.FILE_TRANSFER_KW in kwargs:
|
@@ -67,8 +64,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
67
64
|
self.preserve_directory_structure = kwargs.pop(
|
68
65
|
self.PRESERVE_DIRECTORY_STRUCTURE_KW, True
|
69
66
|
)
|
70
|
-
if self.SYNC_ACL_PERMISSIONS_KW in kwargs:
|
71
|
-
self.sync_acl_permissions = kwargs.pop(self.SYNC_ACL_PERMISSIONS_KW, False)
|
72
67
|
super().__init__(**kwargs)
|
73
68
|
|
74
69
|
@property
|
@@ -110,8 +105,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
110
105
|
self.ab_file_name_col: {"type": "string"},
|
111
106
|
},
|
112
107
|
}
|
113
|
-
elif self.sync_acl_permissions:
|
114
|
-
return remote_file_permissions_schema
|
115
108
|
else:
|
116
109
|
return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
|
117
110
|
|
@@ -194,26 +187,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
194
187
|
yield stream_data_to_airbyte_message(
|
195
188
|
self.name, record, is_file_transfer_message=True
|
196
189
|
)
|
197
|
-
elif self.sync_acl_permissions:
|
198
|
-
try:
|
199
|
-
metadata_record = self.stream_reader.get_file_acl_permissions(
|
200
|
-
file, logger=self.logger
|
201
|
-
)
|
202
|
-
yield stream_data_to_airbyte_message(
|
203
|
-
self.name, metadata_record, is_file_transfer_message=False
|
204
|
-
)
|
205
|
-
except Exception as e:
|
206
|
-
self.logger.error(
|
207
|
-
f"Failed to retrieve metadata for file {file.uri}: {str(e)}"
|
208
|
-
)
|
209
|
-
yield AirbyteMessage(
|
210
|
-
type=MessageType.LOG,
|
211
|
-
log=AirbyteLogMessage(
|
212
|
-
level=Level.ERROR,
|
213
|
-
message=f"Error retrieving metadata: stream={self.name} file={file.uri}",
|
214
|
-
stack_trace=traceback.format_exc(),
|
215
|
-
),
|
216
|
-
)
|
217
190
|
else:
|
218
191
|
for record in parser.parse_records(
|
219
192
|
self.config, file, self.stream_reader, self.logger, schema
|
@@ -311,8 +284,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
311
284
|
def _get_raw_json_schema(self) -> JsonSchema:
|
312
285
|
if self.use_file_transfer:
|
313
286
|
return file_transfer_schema
|
314
|
-
elif self.sync_acl_permissions:
|
315
|
-
return remote_file_permissions_schema
|
316
287
|
elif self.config.input_schema:
|
317
288
|
return self.config.get_input_schema() # type: ignore
|
318
289
|
elif self.config.schemaless:
|
@@ -67,7 +67,7 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=wbfk5udu
|
|
67
67
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
|
68
68
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
|
69
69
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
70
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
70
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=yHOfjvrxDVnQmMi-mrdM27Y0Uqk4fYMmp9Rwdbq6-7s,139662
|
71
71
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
72
72
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
|
73
73
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=KSpQetKGqPCv-38QgcVJ5kzM5nzbFldTSsYDCS3Xf0Y,1035
|
@@ -88,7 +88,7 @@ airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=tjNwcURmlyD
|
|
88
88
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=LhqGDfX06_dDYLKsIVnwQ_nAWCln-v8PV7Wgt_QVeTI,6533
|
89
89
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
90
90
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
91
|
-
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=
|
91
|
+
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=4Z8qZ5DccF0fw163KR5fWW83O-3-84AlaZKPajZ0ZZI,15945
|
92
92
|
airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py,sha256=_UzUnSIUsDbRgbFTXgSyZEFb4ws-KdhdQPWO8mFbV7U,22028
|
93
93
|
airbyte_cdk/sources/declarative/incremental/declarative_cursor.py,sha256=5Bhw9VRPyIuCaD0wmmq_L3DZsa-rJgtKSEUzSd8YYD0,536
|
94
94
|
airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py,sha256=9HO-QbL9akvjq2NP7l498RwLA4iQZlBMQW1tZbt34I8,15943
|
@@ -109,20 +109,20 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
109
109
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
110
110
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
111
111
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
112
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
112
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=68JPw6bLHnTh7zGN3CC8B6b9NI4hxvSPOyLyY8TVRqk,98059
|
113
113
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
114
114
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=958MMX6_ZOJUlDDdNr9Krosgi2bCKGx2Z765M2Woz18,5505
|
115
115
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
116
116
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
117
117
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
118
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
118
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=zfWJLlopJklDK1xvoUy2qMFcnSklmQ7wwEbdWVxYlw0,122917
|
119
119
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
120
120
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
|
121
121
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
122
122
|
airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py,sha256=t7pRdFWfFWJtQQG19c9PVeMODyO2BknRTakpM5U9N-8,4844
|
123
123
|
airbyte_cdk/sources/declarative/partition_routers/partition_router.py,sha256=YyEIzdmLd1FjbVP3QbQ2VFCLW_P-OGbVh6VpZShp54k,2218
|
124
124
|
airbyte_cdk/sources/declarative/partition_routers/single_partition_router.py,sha256=SKzKjSyfccq4dxGIh-J6ejrgkCHzaiTIazmbmeQiRD4,1942
|
125
|
-
airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=
|
125
|
+
airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py,sha256=pEz-P6D5TGtP4isNfmtakgKD95PqMLo6fasCVLIguWk,16760
|
126
126
|
airbyte_cdk/sources/declarative/requesters/README.md,sha256=eL1I4iLkxaw7hJi9S9d18_XcRl-R8lUSjqBVJJzvXmg,2656
|
127
127
|
airbyte_cdk/sources/declarative/requesters/__init__.py,sha256=d7a3OoHbqaJDyyPli3nqqJ2yAW_SLX6XDaBAKOwvpxw,364
|
128
128
|
airbyte_cdk/sources/declarative/requesters/error_handlers/__init__.py,sha256=SkEDcJxlT1683rNx93K9whoS0OyUukkuOfToGtgpF58,776
|
@@ -168,9 +168,9 @@ airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=ix9m1dkR69DcXCXUKC
|
|
168
168
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=kX9ltelK2xLIBWDJBK2ucrvVe5tc5xmhdbVbgsjvlxY,3696
|
169
169
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
170
170
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=kgnhVQxRlFqJs2-rDu2-QH-p-GzQU3nKmSp6_aq8u0s,24550
|
171
|
-
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=
|
171
|
+
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
|
172
172
|
airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=KTACrIE23a83wsm3Rd9Eb4K6-20lrGqYxTHNp9yxsso,1820
|
173
|
-
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=
|
173
|
+
airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py,sha256=d8tfDiDcJiunvN_Yalyfx5ISY5A-iIW3HbPwX2Hagh4,10702
|
174
174
|
airbyte_cdk/sources/declarative/schema/inline_schema_loader.py,sha256=bVETE10hRsatRJq3R3BeyRR0wIoK3gcP1gcpVRQ_P5U,464
|
175
175
|
airbyte_cdk/sources/declarative/schema/json_file_schema_loader.py,sha256=5Wl-fqW-pVf_dxJ4yGHMAFfC4JjKHYJhqFJT1xA57F4,4177
|
176
176
|
airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLnrDLxf1PJKdUqvQq2RVnAOAzNSY,379
|
@@ -201,21 +201,20 @@ airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk
|
|
201
201
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py,sha256=01Nd4b7ERAbp-OZo_8rrAzFXWPTMwr02SnWiN17nx8Q,2363
|
202
202
|
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py,sha256=j9T5TimfWFUz7nqsaj-83G3xWmDpsmeSbDnaUNmz0UM,5849
|
203
203
|
airbyte_cdk/sources/file_based/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
204
|
-
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=
|
204
|
+
airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py,sha256=gXlZwnEKLWknnK_n7j14lANgR6vkqhlLJ-G3rRu-ox4,6897
|
205
205
|
airbyte_cdk/sources/file_based/config/avro_format.py,sha256=NxTF96ewzn6HuhgodsY7Rpb-ybr1ZEWW5d4Vid64g5A,716
|
206
206
|
airbyte_cdk/sources/file_based/config/csv_format.py,sha256=NWekkyT8dTwiVK0mwa_krQD4FJPHSDfILo8kPAg3-Vs,8006
|
207
207
|
airbyte_cdk/sources/file_based/config/excel_format.py,sha256=9qAmTsT6SoVzNfNv0oBVkVCmiyqQuVAbfRKajjoa7Js,378
|
208
208
|
airbyte_cdk/sources/file_based/config/file_based_stream_config.py,sha256=rkTuHpz9G8o2YEnCkOZJM2vJZt_hEE4zklHivRfx43s,4647
|
209
209
|
airbyte_cdk/sources/file_based/config/jsonl_format.py,sha256=cxtpz4t9_ERQyj_1Bx4DjOxuYLykWt0B02S4dWW5BgM,378
|
210
210
|
airbyte_cdk/sources/file_based/config/parquet_format.py,sha256=XOp-7nmm_WcbGI8SjKH2fs3Mkf1H4RAOYSWeUFYAz3w,741
|
211
|
-
airbyte_cdk/sources/file_based/config/permissions.py,sha256=CmXKilhNQOfm4NFlXVBFF2pz3hIUrt3JFp5bPVerE_8,781
|
212
211
|
airbyte_cdk/sources/file_based/config/unstructured_format.py,sha256=tIbB9Pn1HqU67ju7hEZ9dBstRrb2eojUNMsdckzbj58,3565
|
213
212
|
airbyte_cdk/sources/file_based/discovery_policy/__init__.py,sha256=gl3ey6mZbyfraB9P3pFhf9UJp2JeTZ1SUFAopy2iBvY,301
|
214
213
|
airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py,sha256=dCfXX529Rd5rtopg4VeEgTPJjFtqjtjzPq6LCw18Wt0,605
|
215
214
|
airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha256=-xujTidtrq6HC00WKbjQh1CZdT5LMuzkp5BLjqDmfTY,1007
|
216
215
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
|
217
|
-
airbyte_cdk/sources/file_based/file_based_source.py,sha256=
|
218
|
-
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=
|
216
|
+
airbyte_cdk/sources/file_based/file_based_source.py,sha256=Biv2QufYQtHZQCBZs4iCUpqTd82rk7xo8SDYkEeau3k,17616
|
217
|
+
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=e1KhgTh7mzvkBOz9DjLwzOsDwevrTmbxSYIcvhgWgGM,6856
|
219
218
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
|
220
219
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=XNx-JC-sgzH9u3nOJ2M59FxBXvtig8LN6BIkeDOavZA,10858
|
221
220
|
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
|
@@ -226,11 +225,11 @@ airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-Xp
|
|
226
225
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
|
227
226
|
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
|
228
227
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
|
229
|
-
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=
|
228
|
+
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=Cf8FH1bDFP0qCDDfEYir_WjP4exXUnikz8hZ40y1Ek0,9601
|
230
229
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
|
231
230
|
airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=kjvX7nOmUALYd7HuZHilUzgJPZ-MnZ08mtvuBnt2tQ0,618
|
232
231
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=vjTlmYT_nqzY3DbT5xem7X-bwgA9RyXHoKFqiMO2URk,1728
|
233
|
-
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=
|
232
|
+
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaFS_3pIwm-chEabZHiPJi0,265
|
234
233
|
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=9pQh3BHYcxm8CRC8XawfmBxL8O9HggpWwCCbX_ncINE,7509
|
235
234
|
airbyte_cdk/sources/file_based/stream/concurrent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
236
235
|
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=WZ5q2uovgohauJgwfxq_LFeZ92WMZd0LoH6c5QQURPo,13931
|
@@ -241,8 +240,7 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
|
|
241
240
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
242
241
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
|
243
242
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
|
244
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
245
|
-
airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=kHFaBn4Wsqi8PYI2z7_aGsjMPA5A4UoPrSMnKfxP4SA,3644
|
243
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=XLU5cNqQ-5mj243gNzMyXtm_oCtg1ORyoqbCsUo9Dn4,18044
|
246
244
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
247
245
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
248
246
|
airbyte_cdk/sources/http_logger.py,sha256=l_1fk5YwdonZ1wvAsTwjj6d36fj2WrVraIAMj5jTQdM,1575
|
@@ -352,8 +350,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
|
|
352
350
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
353
351
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
354
352
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
355
|
-
airbyte_cdk-6.26.
|
356
|
-
airbyte_cdk-6.26.
|
357
|
-
airbyte_cdk-6.26.
|
358
|
-
airbyte_cdk-6.26.
|
359
|
-
airbyte_cdk-6.26.
|
353
|
+
airbyte_cdk-6.26.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
354
|
+
airbyte_cdk-6.26.1.dist-info/METADATA,sha256=wZfNnYb9jDrsSmSBh1QX2mHCQ2t_OhtdRaZ1jVeWnKY,5996
|
355
|
+
airbyte_cdk-6.26.1.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
|
356
|
+
airbyte_cdk-6.26.1.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
357
|
+
airbyte_cdk-6.26.1.dist-info/RECORD,,
|
@@ -1,34 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import uuid
|
6
|
-
from datetime import datetime
|
7
|
-
from enum import Enum
|
8
|
-
|
9
|
-
from pydantic.v1 import BaseModel
|
10
|
-
|
11
|
-
|
12
|
-
class RemoteFileIdentityType(Enum):
|
13
|
-
USER = "user"
|
14
|
-
GROUP = "group"
|
15
|
-
|
16
|
-
|
17
|
-
class RemoteFileIdentity(BaseModel):
|
18
|
-
id: uuid.UUID
|
19
|
-
remote_id: str
|
20
|
-
parent_id: str | None = None
|
21
|
-
name: str | None = None
|
22
|
-
description: str | None = None
|
23
|
-
email_address: str | None = None
|
24
|
-
member_email_addresses: list[str] | None = None
|
25
|
-
type: RemoteFileIdentityType
|
26
|
-
modified_at: datetime
|
27
|
-
|
28
|
-
|
29
|
-
class RemoteFilePermissions(BaseModel):
|
30
|
-
id: str
|
31
|
-
file_path: str
|
32
|
-
allowed_identity_remote_ids: list[str] | None = None
|
33
|
-
denied_identity_remote_ids: list[str] | None = None
|
34
|
-
publicly_accessible: bool = False
|
@@ -1,96 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import traceback
|
6
|
-
from functools import cache
|
7
|
-
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional
|
8
|
-
|
9
|
-
from airbyte_protocol_dataclasses.models import SyncMode
|
10
|
-
|
11
|
-
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level
|
12
|
-
from airbyte_cdk.models import Type as MessageType
|
13
|
-
from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
|
14
|
-
from airbyte_cdk.sources.file_based.discovery_policy import AbstractDiscoveryPolicy
|
15
|
-
from airbyte_cdk.sources.file_based.exceptions import FileBasedErrorsCollector, FileBasedSourceError
|
16
|
-
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
17
|
-
from airbyte_cdk.sources.file_based.schema_helpers import remote_file_identity_schema
|
18
|
-
from airbyte_cdk.sources.file_based.types import StreamSlice
|
19
|
-
from airbyte_cdk.sources.streams import Stream
|
20
|
-
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
21
|
-
from airbyte_cdk.sources.streams.core import JsonSchema
|
22
|
-
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
23
|
-
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
24
|
-
|
25
|
-
IDENTITIES_STREAM_NAME = "identities"
|
26
|
-
|
27
|
-
|
28
|
-
class IdentitiesStream(Stream):
|
29
|
-
"""
|
30
|
-
The identities stream. A full refresh stream to sync identities from a certain domain.
|
31
|
-
The stream reader manage the logic to get such data, which is implemented on connector side.
|
32
|
-
"""
|
33
|
-
|
34
|
-
is_resumable = False
|
35
|
-
|
36
|
-
def __init__(
|
37
|
-
self,
|
38
|
-
catalog_schema: Optional[Mapping[str, Any]],
|
39
|
-
stream_reader: AbstractFileBasedStreamReader,
|
40
|
-
discovery_policy: AbstractDiscoveryPolicy,
|
41
|
-
errors_collector: FileBasedErrorsCollector,
|
42
|
-
):
|
43
|
-
super().__init__()
|
44
|
-
self.catalog_schema = catalog_schema
|
45
|
-
self.stream_reader = stream_reader
|
46
|
-
self._discovery_policy = discovery_policy
|
47
|
-
self.errors_collector = errors_collector
|
48
|
-
self._cursor: MutableMapping[str, Any] = {}
|
49
|
-
|
50
|
-
@property
|
51
|
-
def state(self) -> MutableMapping[str, Any]:
|
52
|
-
return self._cursor
|
53
|
-
|
54
|
-
@state.setter
|
55
|
-
def state(self, value: MutableMapping[str, Any]) -> None:
|
56
|
-
"""State setter, accept state serialized by state getter."""
|
57
|
-
self._cursor = value
|
58
|
-
|
59
|
-
@property
|
60
|
-
def primary_key(self) -> PrimaryKeyType:
|
61
|
-
return None
|
62
|
-
|
63
|
-
def read_records(
|
64
|
-
self,
|
65
|
-
sync_mode: SyncMode,
|
66
|
-
cursor_field: Optional[List[str]] = None,
|
67
|
-
stream_slice: Optional[StreamSlice] = None,
|
68
|
-
stream_state: Optional[Mapping[str, Any]] = None,
|
69
|
-
) -> Iterable[Mapping[str, Any] | AirbyteMessage]:
|
70
|
-
try:
|
71
|
-
identity_groups = self.stream_reader.load_identity_groups(logger=self.logger)
|
72
|
-
for record in identity_groups:
|
73
|
-
yield stream_data_to_airbyte_message(self.name, record)
|
74
|
-
except AirbyteTracedException as exc:
|
75
|
-
# Re-raise the exception to stop the whole sync immediately as this is a fatal error
|
76
|
-
raise exc
|
77
|
-
except Exception:
|
78
|
-
yield AirbyteMessage(
|
79
|
-
type=MessageType.LOG,
|
80
|
-
log=AirbyteLogMessage(
|
81
|
-
level=Level.ERROR,
|
82
|
-
message=f"{FileBasedSourceError.ERROR_PARSING_RECORD.value} stream={self.name}",
|
83
|
-
stack_trace=traceback.format_exc(),
|
84
|
-
),
|
85
|
-
)
|
86
|
-
|
87
|
-
@cache
|
88
|
-
def get_json_schema(self) -> JsonSchema:
|
89
|
-
return remote_file_identity_schema
|
90
|
-
|
91
|
-
@property
|
92
|
-
def name(self) -> str:
|
93
|
-
return IDENTITIES_STREAM_NAME
|
94
|
-
|
95
|
-
def get_cursor(self) -> Optional[Cursor]:
|
96
|
-
return None
|
File without changes
|
File without changes
|
File without changes
|