airbyte-cdk 6.45.4.post52.dev14501809740__py3-none-any.whl → 6.45.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/models/__init__.py +0 -1
- airbyte_cdk/models/airbyte_protocol.py +3 -1
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/auth/oauth.py +2 -2
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +6 -10
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -36
- airbyte_cdk/sources/declarative/extractors/record_selector.py +1 -6
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -31
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +4 -40
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -9
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -38
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +15 -8
- airbyte_cdk/sources/file_based/schema_helpers.py +1 -11
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +12 -3
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +38 -15
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +3 -1
- airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -27
- airbyte_cdk/sources/types.py +2 -11
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- airbyte_cdk/test/mock_http/response_builder.py +0 -8
- {airbyte_cdk-6.45.4.post52.dev14501809740.dist-info → airbyte_cdk-6.45.5.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.4.post52.dev14501809740.dist-info → airbyte_cdk-6.45.5.dist-info}/RECORD +29 -31
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -89
- airbyte_cdk/sources/file_based/file_record_data.py +0 -23
- airbyte_cdk/sources/utils/files_directory.py +0 -15
- {airbyte_cdk-6.45.4.post52.dev14501809740.dist-info → airbyte_cdk-6.45.5.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.4.post52.dev14501809740.dist-info → airbyte_cdk-6.45.5.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.4.post52.dev14501809740.dist-info → airbyte_cdk-6.45.5.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.4.post52.dev14501809740.dist-info → airbyte_cdk-6.45.5.dist-info}/entry_points.txt +0 -0
airbyte_cdk/models/__init__.py
CHANGED
@@ -8,6 +8,8 @@ from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
|
|
8
8
|
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
|
9
9
|
from serpyco_rs.metadata import Alias
|
10
10
|
|
11
|
+
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
12
|
+
|
11
13
|
# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
|
12
14
|
|
13
15
|
|
@@ -82,7 +84,7 @@ class AirbyteMessage:
|
|
82
84
|
spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
|
83
85
|
connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
|
84
86
|
catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
|
85
|
-
record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
|
87
|
+
record: Optional[Union[AirbyteFileTransferRecordMessage, AirbyteRecordMessage]] = None # type: ignore [name-defined]
|
86
88
|
state: Optional[AirbyteStateMessage] = None
|
87
89
|
trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
|
88
90
|
control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
from dataclasses import dataclass
|
4
|
+
from typing import Any, Dict, Optional
|
5
|
+
|
6
|
+
|
7
|
+
@dataclass
|
8
|
+
class AirbyteFileTransferRecordMessage:
|
9
|
+
stream: str
|
10
|
+
file: Dict[str, Any]
|
11
|
+
emitted_at: int
|
12
|
+
namespace: Optional[str] = None
|
13
|
+
data: Optional[Dict[str, Any]] = None
|
@@ -149,7 +149,7 @@ class ConcurrentReadProcessor:
|
|
149
149
|
message = stream_data_to_airbyte_message(
|
150
150
|
stream_name=record.stream_name,
|
151
151
|
data_or_message=record.data,
|
152
|
-
|
152
|
+
is_file_transfer_message=record.is_file_transfer_message,
|
153
153
|
)
|
154
154
|
stream = self._stream_name_to_instance[record.stream_name]
|
155
155
|
|
@@ -239,8 +239,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
|
|
239
239
|
def _has_access_token_been_initialized(self) -> bool:
|
240
240
|
return self._access_token is not None
|
241
241
|
|
242
|
-
def set_token_expiry_date(self, value:
|
243
|
-
self._token_expiry_date =
|
242
|
+
def set_token_expiry_date(self, value: AirbyteDateTime) -> None:
|
243
|
+
self._token_expiry_date = value
|
244
244
|
|
245
245
|
def get_assertion_name(self) -> str:
|
246
246
|
return self.assertion_name
|
@@ -19,13 +19,15 @@ from airbyte_cdk.sources.declarative.extractors import RecordSelector
|
|
19
19
|
from airbyte_cdk.sources.declarative.extractors.record_filter import (
|
20
20
|
ClientSideIncrementalRecordFilterDecorator,
|
21
21
|
)
|
22
|
-
from airbyte_cdk.sources.declarative.incremental import
|
22
|
+
from airbyte_cdk.sources.declarative.incremental import (
|
23
|
+
ConcurrentPerPartitionCursor,
|
24
|
+
GlobalSubstreamCursor,
|
25
|
+
)
|
23
26
|
from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
|
24
27
|
from airbyte_cdk.sources.declarative.incremental.per_partition_with_global import (
|
25
28
|
PerPartitionWithGlobalCursor,
|
26
29
|
)
|
27
30
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
|
-
from airbyte_cdk.sources.declarative.models import FileUploader
|
29
31
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
30
32
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
31
33
|
)
|
@@ -207,10 +209,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
207
209
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
208
210
|
# so we need to treat them as synchronous
|
209
211
|
|
210
|
-
supports_file_transfer = (
|
211
|
-
"file_uploader" in name_to_stream_mapping[declarative_stream.name]
|
212
|
-
)
|
213
|
-
|
214
212
|
if (
|
215
213
|
isinstance(declarative_stream, DeclarativeStream)
|
216
214
|
and name_to_stream_mapping[declarative_stream.name]["type"]
|
@@ -327,7 +325,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
327
325
|
else None,
|
328
326
|
logger=self.logger,
|
329
327
|
cursor=cursor,
|
330
|
-
supports_file_transfer=supports_file_transfer,
|
331
328
|
)
|
332
329
|
)
|
333
330
|
elif (
|
@@ -359,7 +356,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
359
356
|
cursor_field=None,
|
360
357
|
logger=self.logger,
|
361
358
|
cursor=final_state_cursor,
|
362
|
-
supports_file_transfer=supports_file_transfer,
|
363
359
|
)
|
364
360
|
)
|
365
361
|
elif (
|
@@ -368,7 +364,8 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
368
364
|
== DatetimeBasedCursorModel.__name__
|
369
365
|
and hasattr(declarative_stream.retriever, "stream_slicer")
|
370
366
|
and isinstance(
|
371
|
-
declarative_stream.retriever.stream_slicer,
|
367
|
+
declarative_stream.retriever.stream_slicer,
|
368
|
+
(GlobalSubstreamCursor, PerPartitionWithGlobalCursor),
|
372
369
|
)
|
373
370
|
):
|
374
371
|
stream_state = self._connector_state_manager.get_stream_state(
|
@@ -413,7 +410,6 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
413
410
|
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
414
411
|
logger=self.logger,
|
415
412
|
cursor=perpartition_cursor,
|
416
|
-
supports_file_transfer=supports_file_transfer,
|
417
413
|
)
|
418
414
|
)
|
419
415
|
else:
|
@@ -1448,42 +1448,6 @@ definitions:
|
|
1448
1448
|
- "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
|
1449
1449
|
- "$ref": "#/definitions/CustomStateMigration"
|
1450
1450
|
default: []
|
1451
|
-
file_uploader:
|
1452
|
-
title: File Uploader
|
1453
|
-
description: (experimental) Describes how to fetch a file
|
1454
|
-
type: object
|
1455
|
-
required:
|
1456
|
-
- type
|
1457
|
-
- requester
|
1458
|
-
- download_target_extractor
|
1459
|
-
properties:
|
1460
|
-
type:
|
1461
|
-
type: string
|
1462
|
-
enum: [ FileUploader ]
|
1463
|
-
requester:
|
1464
|
-
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
1465
|
-
anyOf:
|
1466
|
-
- "$ref": "#/definitions/CustomRequester"
|
1467
|
-
- "$ref": "#/definitions/HttpRequester"
|
1468
|
-
download_target_extractor:
|
1469
|
-
description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
|
1470
|
-
anyOf:
|
1471
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
1472
|
-
- "$ref": "#/definitions/DpathExtractor"
|
1473
|
-
file_extractor:
|
1474
|
-
description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
|
1475
|
-
anyOf:
|
1476
|
-
- "$ref": "#/definitions/CustomRecordExtractor"
|
1477
|
-
- "$ref": "#/definitions/DpathExtractor"
|
1478
|
-
filename_extractor:
|
1479
|
-
description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
|
1480
|
-
type: string
|
1481
|
-
interpolation_context:
|
1482
|
-
- config
|
1483
|
-
- record
|
1484
|
-
examples:
|
1485
|
-
- "{{ record.id }}/{{ record.file_name }}/"
|
1486
|
-
- "{{ record.id }}_{{ record.file_name }}/"
|
1487
1451
|
$parameters:
|
1488
1452
|
type: object
|
1489
1453
|
additional_properties: true
|
@@ -15,7 +15,6 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
|
15
15
|
)
|
16
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
17
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
18
|
-
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
19
18
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
20
19
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
21
20
|
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
@@ -43,7 +42,6 @@ class RecordSelector(HttpSelector):
|
|
43
42
|
record_filter: Optional[RecordFilter] = None
|
44
43
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
45
44
|
transform_before_filtering: bool = False
|
46
|
-
file_uploader: Optional[FileUploader] = None
|
47
45
|
|
48
46
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
49
47
|
self._parameters = parameters
|
@@ -119,10 +117,7 @@ class RecordSelector(HttpSelector):
|
|
119
117
|
transformed_filtered_data, schema=records_schema
|
120
118
|
)
|
121
119
|
for data in normalized_data:
|
122
|
-
|
123
|
-
if self.file_uploader:
|
124
|
-
self.file_uploader.upload(record)
|
125
|
-
yield record
|
120
|
+
yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
126
121
|
|
127
122
|
def _normalize_by_schema(
|
128
123
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -2066,31 +2066,6 @@ class SelectiveAuthenticator(BaseModel):
|
|
2066
2066
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2067
2067
|
|
2068
2068
|
|
2069
|
-
class FileUploader(BaseModel):
|
2070
|
-
type: Literal["FileUploader"]
|
2071
|
-
requester: Union[CustomRequester, HttpRequester] = Field(
|
2072
|
-
...,
|
2073
|
-
description="Requester component that describes how to prepare HTTP requests to send to the source API.",
|
2074
|
-
)
|
2075
|
-
download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2076
|
-
...,
|
2077
|
-
description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
|
2078
|
-
)
|
2079
|
-
file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
|
2080
|
-
None,
|
2081
|
-
description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
|
2082
|
-
)
|
2083
|
-
filename_extractor: Optional[str] = Field(
|
2084
|
-
None,
|
2085
|
-
description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
|
2086
|
-
examples=[
|
2087
|
-
"{{ record.id }}/{{ record.file_name }}/",
|
2088
|
-
"{{ record.id }}_{{ record.file_name }}/",
|
2089
|
-
],
|
2090
|
-
)
|
2091
|
-
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2092
|
-
|
2093
|
-
|
2094
2069
|
class DeclarativeStream(BaseModel):
|
2095
2070
|
class Config:
|
2096
2071
|
extra = Extra.allow
|
@@ -2149,11 +2124,6 @@ class DeclarativeStream(BaseModel):
|
|
2149
2124
|
description="Array of state migrations to be applied on the input state",
|
2150
2125
|
title="State Migrations",
|
2151
2126
|
)
|
2152
|
-
file_uploader: Optional[FileUploader] = Field(
|
2153
|
-
None,
|
2154
|
-
description="(experimental) Describes how to fetch a file",
|
2155
|
-
title="File Uploader",
|
2156
|
-
)
|
2157
2127
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2158
2128
|
|
2159
2129
|
|
@@ -2647,7 +2617,6 @@ CompositeErrorHandler.update_forward_refs()
|
|
2647
2617
|
DeclarativeSource1.update_forward_refs()
|
2648
2618
|
DeclarativeSource2.update_forward_refs()
|
2649
2619
|
SelectiveAuthenticator.update_forward_refs()
|
2650
|
-
FileUploader.update_forward_refs()
|
2651
2620
|
DeclarativeStream.update_forward_refs()
|
2652
2621
|
SessionTokenAuthenticator.update_forward_refs()
|
2653
2622
|
DynamicSchemaLoader.update_forward_refs()
|
@@ -106,6 +106,7 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
106
106
|
)
|
107
107
|
from airbyte_cdk.sources.declarative.models import (
|
108
108
|
CustomStateMigration,
|
109
|
+
GzipDecoder,
|
109
110
|
)
|
110
111
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
111
112
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
@@ -227,9 +228,6 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
227
228
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
228
229
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
229
230
|
)
|
230
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
-
FileUploader as FileUploaderModel,
|
232
|
-
)
|
233
231
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
234
232
|
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
235
233
|
)
|
@@ -481,7 +479,6 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
481
479
|
SimpleRetriever,
|
482
480
|
SimpleRetrieverTestReadDecorator,
|
483
481
|
)
|
484
|
-
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
485
482
|
from airbyte_cdk.sources.declarative.schema import (
|
486
483
|
ComplexFieldType,
|
487
484
|
DefaultSchemaLoader,
|
@@ -679,7 +676,6 @@ class ModelToComponentFactory:
|
|
679
676
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
680
677
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
681
678
|
HTTPAPIBudgetModel: self.create_http_api_budget,
|
682
|
-
FileUploaderModel: self.create_file_uploader,
|
683
679
|
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
684
680
|
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
685
681
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
@@ -1443,7 +1439,9 @@ class ModelToComponentFactory:
|
|
1443
1439
|
stream_state = self.apply_stream_state_migrations(stream_state_migrations, stream_state)
|
1444
1440
|
|
1445
1441
|
# Per-partition state doesn't make sense for GroupingPartitionRouter, so force the global state
|
1446
|
-
use_global_cursor = isinstance(
|
1442
|
+
use_global_cursor = isinstance(
|
1443
|
+
partition_router, GroupingPartitionRouter
|
1444
|
+
) or component_definition.get("global_substream_cursor", False)
|
1447
1445
|
|
1448
1446
|
# Return the concurrent cursor and state converter
|
1449
1447
|
return ConcurrentPerPartitionCursor(
|
@@ -1842,11 +1840,6 @@ class ModelToComponentFactory:
|
|
1842
1840
|
transformations.append(
|
1843
1841
|
self._create_component_from_model(model=transformation_model, config=config)
|
1844
1842
|
)
|
1845
|
-
file_uploader = None
|
1846
|
-
if model.file_uploader:
|
1847
|
-
file_uploader = self._create_component_from_model(
|
1848
|
-
model=model.file_uploader, config=config
|
1849
|
-
)
|
1850
1843
|
|
1851
1844
|
retriever = self._create_component_from_model(
|
1852
1845
|
model=model.retriever,
|
@@ -1858,7 +1851,6 @@ class ModelToComponentFactory:
|
|
1858
1851
|
stop_condition_on_cursor=stop_condition_on_cursor,
|
1859
1852
|
client_side_incremental_sync=client_side_incremental_sync,
|
1860
1853
|
transformations=transformations,
|
1861
|
-
file_uploader=file_uploader,
|
1862
1854
|
incremental_sync=model.incremental_sync,
|
1863
1855
|
)
|
1864
1856
|
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
@@ -2804,7 +2796,6 @@ class ModelToComponentFactory:
|
|
2804
2796
|
transformations: List[RecordTransformation] | None = None,
|
2805
2797
|
decoder: Decoder | None = None,
|
2806
2798
|
client_side_incremental_sync: Dict[str, Any] | None = None,
|
2807
|
-
file_uploader: Optional[FileUploader] = None,
|
2808
2799
|
**kwargs: Any,
|
2809
2800
|
) -> RecordSelector:
|
2810
2801
|
extractor = self._create_component_from_model(
|
@@ -2842,7 +2833,6 @@ class ModelToComponentFactory:
|
|
2842
2833
|
config=config,
|
2843
2834
|
record_filter=record_filter,
|
2844
2835
|
transformations=transformations or [],
|
2845
|
-
file_uploader=file_uploader,
|
2846
2836
|
schema_normalization=schema_normalization,
|
2847
2837
|
parameters=model.parameters or {},
|
2848
2838
|
transform_before_filtering=transform_before_filtering,
|
@@ -2900,7 +2890,6 @@ class ModelToComponentFactory:
|
|
2900
2890
|
stop_condition_on_cursor: bool = False,
|
2901
2891
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
2902
2892
|
transformations: List[RecordTransformation],
|
2903
|
-
file_uploader: Optional[FileUploader] = None,
|
2904
2893
|
incremental_sync: Optional[
|
2905
2894
|
Union[
|
2906
2895
|
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
@@ -2921,7 +2910,6 @@ class ModelToComponentFactory:
|
|
2921
2910
|
decoder=decoder,
|
2922
2911
|
transformations=transformations,
|
2923
2912
|
client_side_incremental_sync=client_side_incremental_sync,
|
2924
|
-
file_uploader=file_uploader,
|
2925
2913
|
)
|
2926
2914
|
|
2927
2915
|
query_properties: Optional[QueryProperties] = None
|
@@ -3588,30 +3576,6 @@ class ModelToComponentFactory:
|
|
3588
3576
|
matchers=matchers,
|
3589
3577
|
)
|
3590
3578
|
|
3591
|
-
def create_file_uploader(
|
3592
|
-
self, model: FileUploaderModel, config: Config, **kwargs: Any
|
3593
|
-
) -> FileUploader:
|
3594
|
-
name = "File Uploader"
|
3595
|
-
requester = self._create_component_from_model(
|
3596
|
-
model=model.requester,
|
3597
|
-
config=config,
|
3598
|
-
name=name,
|
3599
|
-
**kwargs,
|
3600
|
-
)
|
3601
|
-
download_target_extractor = self._create_component_from_model(
|
3602
|
-
model=model.download_target_extractor,
|
3603
|
-
config=config,
|
3604
|
-
name=name,
|
3605
|
-
**kwargs,
|
3606
|
-
)
|
3607
|
-
return FileUploader(
|
3608
|
-
requester=requester,
|
3609
|
-
download_target_extractor=download_target_extractor,
|
3610
|
-
config=config,
|
3611
|
-
parameters=model.parameters or {},
|
3612
|
-
filename_extractor=model.filename_extractor if model.filename_extractor else None,
|
3613
|
-
)
|
3614
|
-
|
3615
3579
|
def create_moving_window_call_rate_policy(
|
3616
3580
|
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3617
3581
|
) -> MovingWindowCallRatePolicy:
|
@@ -58,16 +58,11 @@ class DeclarativePartition(Partition):
|
|
58
58
|
def read(self) -> Iterable[Record]:
|
59
59
|
for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
|
60
60
|
if isinstance(stream_data, Mapping):
|
61
|
-
|
62
|
-
stream_data
|
63
|
-
|
64
|
-
|
65
|
-
data=stream_data,
|
66
|
-
stream_name=self.stream_name(),
|
67
|
-
associated_slice=self._stream_slice,
|
68
|
-
)
|
61
|
+
yield Record(
|
62
|
+
data=stream_data,
|
63
|
+
stream_name=self.stream_name(),
|
64
|
+
associated_slice=self._stream_slice,
|
69
65
|
)
|
70
|
-
yield record
|
71
66
|
else:
|
72
67
|
self._message_repository.emit_message(stream_data)
|
73
68
|
|
@@ -8,18 +8,16 @@ from datetime import datetime
|
|
8
8
|
from enum import Enum
|
9
9
|
from io import IOBase
|
10
10
|
from os import makedirs, path
|
11
|
-
from typing import Any,
|
11
|
+
from typing import Any, Dict, Iterable, List, Optional, Set
|
12
12
|
|
13
13
|
from wcmatch.glob import GLOBSTAR, globmatch
|
14
14
|
|
15
|
-
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
16
15
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
17
16
|
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
18
17
|
include_identities_stream,
|
19
18
|
preserve_directory_structure,
|
20
19
|
use_file_transfer,
|
21
20
|
)
|
22
|
-
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
23
21
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
24
22
|
|
25
23
|
|
@@ -30,10 +28,6 @@ class FileReadMode(Enum):
|
|
30
28
|
|
31
29
|
class AbstractFileBasedStreamReader(ABC):
|
32
30
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
33
|
-
FILE_RELATIVE_PATH = "file_relative_path"
|
34
|
-
FILE_NAME = "file_name"
|
35
|
-
LOCAL_FILE_PATH = "local_file_path"
|
36
|
-
FILE_FOLDER = "file_folder"
|
37
31
|
|
38
32
|
def __init__(self) -> None:
|
39
33
|
self._config = None
|
@@ -154,9 +148,9 @@ class AbstractFileBasedStreamReader(ABC):
|
|
154
148
|
return False
|
155
149
|
|
156
150
|
@abstractmethod
|
157
|
-
def
|
151
|
+
def get_file(
|
158
152
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
159
|
-
) ->
|
153
|
+
) -> Dict[str, Any]:
|
160
154
|
"""
|
161
155
|
This is required for connectors that will support writing to
|
162
156
|
files. It will handle the logic to download,get,read,acquire or
|
@@ -168,41 +162,25 @@ class AbstractFileBasedStreamReader(ABC):
|
|
168
162
|
logger (logging.Logger): Logger for logging information and errors.
|
169
163
|
|
170
164
|
Returns:
|
171
|
-
|
172
|
-
-
|
173
|
-
-
|
174
|
-
-
|
165
|
+
dict: A dictionary containing the following:
|
166
|
+
- "file_url" (str): The absolute path of the downloaded file.
|
167
|
+
- "bytes" (int): The file size in bytes.
|
168
|
+
- "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as
|
169
|
+
this a mounted volume in the pod container.
|
170
|
+
|
175
171
|
"""
|
176
172
|
...
|
177
173
|
|
178
|
-
def _get_file_transfer_paths(
|
179
|
-
self, source_file_relative_path: str, staging_directory: str
|
180
|
-
) -> MutableMapping[str, Any]:
|
181
|
-
"""
|
182
|
-
This method is used to get the file transfer paths for a given source file relative path and local directory.
|
183
|
-
It returns a dictionary with the following keys:
|
184
|
-
- FILE_RELATIVE_PATH: The relative path to file in reference to the staging directory.
|
185
|
-
- LOCAL_FILE_PATH: The absolute path to the file.
|
186
|
-
- FILE_NAME: The name of the referenced file.
|
187
|
-
- FILE_FOLDER: The folder of the referenced file.
|
188
|
-
"""
|
174
|
+
def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
|
189
175
|
preserve_directory_structure = self.preserve_directory_structure()
|
190
|
-
|
191
|
-
file_name = path.basename(source_file_relative_path)
|
192
|
-
file_folder = path.dirname(source_file_relative_path)
|
193
176
|
if preserve_directory_structure:
|
194
177
|
# Remove left slashes from source path format to make relative path for writing locally
|
195
|
-
file_relative_path =
|
178
|
+
file_relative_path = file.uri.lstrip("/")
|
196
179
|
else:
|
197
|
-
file_relative_path =
|
198
|
-
local_file_path = path.join(
|
180
|
+
file_relative_path = path.basename(file.uri)
|
181
|
+
local_file_path = path.join(local_directory, file_relative_path)
|
182
|
+
|
199
183
|
# Ensure the local directory exists
|
200
184
|
makedirs(path.dirname(local_file_path), exist_ok=True)
|
201
|
-
|
202
|
-
|
203
|
-
self.FILE_RELATIVE_PATH: file_relative_path,
|
204
|
-
self.LOCAL_FILE_PATH: local_file_path,
|
205
|
-
self.FILE_NAME: file_name,
|
206
|
-
self.FILE_FOLDER: file_folder,
|
207
|
-
}
|
208
|
-
return file_paths
|
185
|
+
absolute_file_path = path.abspath(local_file_path)
|
186
|
+
return [file_relative_path, local_file_path, absolute_file_path]
|
@@ -2,27 +2,34 @@
|
|
2
2
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
import logging
|
5
|
-
|
5
|
+
import os
|
6
|
+
from typing import Any, Dict, Iterable
|
6
7
|
|
7
|
-
from airbyte_cdk.
|
8
|
+
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
8
9
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
9
|
-
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
10
10
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
11
|
-
|
11
|
+
|
12
|
+
AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
|
13
|
+
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
|
12
14
|
|
13
15
|
|
14
16
|
class FileTransfer:
|
15
17
|
def __init__(self) -> None:
|
16
|
-
self._local_directory =
|
18
|
+
self._local_directory = (
|
19
|
+
AIRBYTE_STAGING_DIRECTORY
|
20
|
+
if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
|
21
|
+
else DEFAULT_LOCAL_DIRECTORY
|
22
|
+
)
|
17
23
|
|
18
|
-
def
|
24
|
+
def get_file(
|
19
25
|
self,
|
26
|
+
config: FileBasedStreamConfig,
|
20
27
|
file: RemoteFile,
|
21
28
|
stream_reader: AbstractFileBasedStreamReader,
|
22
29
|
logger: logging.Logger,
|
23
|
-
) -> Iterable[
|
30
|
+
) -> Iterable[Dict[str, Any]]:
|
24
31
|
try:
|
25
|
-
yield stream_reader.
|
32
|
+
yield stream_reader.get_file(
|
26
33
|
file=file, local_directory=self._local_directory, logger=logger
|
27
34
|
)
|
28
35
|
except Exception as ex:
|
@@ -18,19 +18,9 @@ JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
|
|
18
18
|
SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
|
19
19
|
|
20
20
|
schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
|
21
|
-
|
22
21
|
file_transfer_schema = {
|
23
22
|
"type": "object",
|
24
|
-
"properties": {
|
25
|
-
"folder": {"type": "string"},
|
26
|
-
"file_name": {"type": "string"},
|
27
|
-
"source_uri": {"type": "string"},
|
28
|
-
"bytes": {"type": "integer"},
|
29
|
-
"id": {"type": ["null", "string"]},
|
30
|
-
"created_at": {"type": ["null", "string"]},
|
31
|
-
"updated_at": {"type": ["null", "string"]},
|
32
|
-
"mime_type": {"type": ["null", "string"]},
|
33
|
-
},
|
23
|
+
"properties": {"data": {"type": "object"}, "file": {"type": "object"}},
|
34
24
|
}
|
35
25
|
|
36
26
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import copy
|
6
6
|
import logging
|
7
|
-
from functools import lru_cache
|
7
|
+
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
from typing_extensions import deprecated
|
@@ -258,14 +258,19 @@ class FileBasedStreamPartition(Partition):
|
|
258
258
|
and record_data.record is not None
|
259
259
|
):
|
260
260
|
# `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
|
261
|
-
|
261
|
+
# If stream is flagged for file_transfer the record should data in file key
|
262
|
+
record_message_data = (
|
263
|
+
record_data.record.file
|
264
|
+
if self._use_file_transfer()
|
265
|
+
else record_data.record.data
|
266
|
+
)
|
262
267
|
if not record_message_data:
|
263
268
|
raise ExceptionWithDisplayMessage("A record without data was found")
|
264
269
|
else:
|
265
270
|
yield Record(
|
266
271
|
data=record_message_data,
|
267
272
|
stream_name=self.stream_name(),
|
268
|
-
|
273
|
+
is_file_transfer_message=self._use_file_transfer(),
|
269
274
|
)
|
270
275
|
else:
|
271
276
|
self._message_repository.emit_message(record_data)
|
@@ -301,6 +306,10 @@ class FileBasedStreamPartition(Partition):
|
|
301
306
|
def stream_name(self) -> str:
|
302
307
|
return self._stream.name
|
303
308
|
|
309
|
+
@cache
|
310
|
+
def _use_file_transfer(self) -> bool:
|
311
|
+
return hasattr(self._stream, "use_file_transfer") and self._stream.use_file_transfer
|
312
|
+
|
304
313
|
def __repr__(self) -> str:
|
305
314
|
return f"FileBasedStreamPartition({self._stream.name}, {self._slice})"
|
306
315
|
|