airbyte-cdk 6.45.1.post46.dev14423672753__py3-none-any.whl → 6.45.1.post47.dev14456468218__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +3 -12
- airbyte_cdk/connector_builder/test_reader/reader.py +0 -2
- airbyte_cdk/models/__init__.py +1 -0
- airbyte_cdk/models/airbyte_protocol.py +1 -3
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +36 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +31 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +39 -1
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +89 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +38 -16
- airbyte_cdk/sources/file_based/file_record_data.py +22 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +10 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -12
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -38
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
- airbyte_cdk/sources/types.py +11 -2
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- airbyte_cdk/test/entrypoint_wrapper.py +0 -4
- airbyte_cdk/test/mock_http/response_builder.py +8 -0
- {airbyte_cdk-6.45.1.post46.dev14423672753.dist-info → airbyte_cdk-6.45.1.post47.dev14456468218.dist-info}/METADATA +2 -3
- {airbyte_cdk-6.45.1.post46.dev14423672753.dist-info → airbyte_cdk-6.45.1.post47.dev14456468218.dist-info}/RECORD +31 -42
- airbyte_cdk/models/file_transfer_record_message.py +0 -13
- airbyte_cdk/test/declarative/__init__.py +0 -6
- airbyte_cdk/test/declarative/models/__init__.py +0 -7
- airbyte_cdk/test/declarative/models/scenario.py +0 -74
- airbyte_cdk/test/declarative/test_suites/__init__.py +0 -25
- airbyte_cdk/test/declarative/test_suites/connector_base.py +0 -223
- airbyte_cdk/test/declarative/test_suites/declarative_sources.py +0 -74
- airbyte_cdk/test/declarative/test_suites/destination_base.py +0 -12
- airbyte_cdk/test/declarative/test_suites/source_base.py +0 -128
- airbyte_cdk/test/declarative/utils/__init__.py +0 -0
- airbyte_cdk/test/declarative/utils/job_runner.py +0 -150
- airbyte_cdk/test/fixtures/__init__.py +0 -0
- airbyte_cdk/test/fixtures/auto.py +0 -14
- airbyte_cdk/test/pytest_config/plugin.py +0 -46
- {airbyte_cdk-6.45.1.post46.dev14423672753.dist-info → airbyte_cdk-6.45.1.post47.dev14456468218.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.1.post46.dev14423672753.dist-info → airbyte_cdk-6.45.1.post47.dev14456468218.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.1.post46.dev14423672753.dist-info → airbyte_cdk-6.45.1.post47.dev14456468218.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.1.post46.dev14423672753.dist-info → airbyte_cdk-6.45.1.post47.dev14456468218.dist-info}/entry_points.txt +0 -0
@@ -35,10 +35,8 @@ MAX_RECORDS_KEY = "max_records"
|
|
35
35
|
MAX_STREAMS_KEY = "max_streams"
|
36
36
|
|
37
37
|
|
38
|
-
@dataclass
|
38
|
+
@dataclass
|
39
39
|
class TestLimits:
|
40
|
-
__test__: bool = False # Prevent pytest from treating this as a test case, despite its name
|
41
|
-
|
42
40
|
max_records: int = field(default=DEFAULT_MAXIMUM_RECORDS)
|
43
41
|
max_pages_per_slice: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE)
|
44
42
|
max_slices: int = field(default=DEFAULT_MAXIMUM_NUMBER_OF_SLICES)
|
@@ -53,12 +51,7 @@ def get_limits(config: Mapping[str, Any]) -> TestLimits:
|
|
53
51
|
max_slices = command_config.get(MAX_SLICES_KEY) or DEFAULT_MAXIMUM_NUMBER_OF_SLICES
|
54
52
|
max_records = command_config.get(MAX_RECORDS_KEY) or DEFAULT_MAXIMUM_RECORDS
|
55
53
|
max_streams = command_config.get(MAX_STREAMS_KEY) or DEFAULT_MAXIMUM_STREAMS
|
56
|
-
return TestLimits(
|
57
|
-
max_records=max_records,
|
58
|
-
max_pages_per_slice=max_pages_per_slice,
|
59
|
-
max_slices=max_slices,
|
60
|
-
max_streams=max_streams,
|
61
|
-
)
|
54
|
+
return TestLimits(max_records, max_pages_per_slice, max_slices, max_streams)
|
62
55
|
|
63
56
|
|
64
57
|
def create_source(config: Mapping[str, Any], limits: TestLimits) -> ManifestDeclarativeSource:
|
@@ -86,9 +79,7 @@ def read_stream(
|
|
86
79
|
) -> AirbyteMessage:
|
87
80
|
try:
|
88
81
|
test_read_handler = TestReader(
|
89
|
-
|
90
|
-
max_slices=limits.max_slices,
|
91
|
-
max_record_limit=limits.max_records,
|
82
|
+
limits.max_pages_per_slice, limits.max_slices, limits.max_records
|
92
83
|
)
|
93
84
|
# The connector builder only supports a single stream
|
94
85
|
stream_name = configured_catalog.streams[0].stream.name
|
airbyte_cdk/models/__init__.py
CHANGED
@@ -8,8 +8,6 @@ from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
|
|
8
8
|
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
|
9
9
|
from serpyco_rs.metadata import Alias
|
10
10
|
|
11
|
-
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
12
|
-
|
13
11
|
# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
|
14
12
|
|
15
13
|
|
@@ -84,7 +82,7 @@ class AirbyteMessage:
|
|
84
82
|
spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
|
85
83
|
connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
|
86
84
|
catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
|
87
|
-
record: Optional[
|
85
|
+
record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
|
88
86
|
state: Optional[AirbyteStateMessage] = None
|
89
87
|
trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
|
90
88
|
control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
|
@@ -149,7 +149,7 @@ class ConcurrentReadProcessor:
|
|
149
149
|
message = stream_data_to_airbyte_message(
|
150
150
|
stream_name=record.stream_name,
|
151
151
|
data_or_message=record.data,
|
152
|
-
|
152
|
+
file_reference=record.file_reference,
|
153
153
|
)
|
154
154
|
stream = self._stream_name_to_instance[record.stream_name]
|
155
155
|
|
@@ -25,6 +25,7 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global impor
|
|
25
25
|
PerPartitionWithGlobalCursor,
|
26
26
|
)
|
27
27
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
|
+
from airbyte_cdk.sources.declarative.models import FileUploader
|
28
29
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
30
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
30
31
|
)
|
@@ -206,6 +207,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
206
207
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
207
208
|
# so we need to treat them as synchronous
|
208
209
|
|
210
|
+
supports_file_transfer = (
|
211
|
+
"file_uploader" in name_to_stream_mapping[declarative_stream.name]
|
212
|
+
)
|
213
|
+
|
209
214
|
if (
|
210
215
|
isinstance(declarative_stream, DeclarativeStream)
|
211
216
|
and name_to_stream_mapping[declarative_stream.name]["type"]
|
@@ -322,6 +327,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
322
327
|
else None,
|
323
328
|
logger=self.logger,
|
324
329
|
cursor=cursor,
|
330
|
+
supports_file_transfer=supports_file_transfer,
|
325
331
|
)
|
326
332
|
)
|
327
333
|
elif (
|
@@ -353,6 +359,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
353
359
|
cursor_field=None,
|
354
360
|
logger=self.logger,
|
355
361
|
cursor=final_state_cursor,
|
362
|
+
supports_file_transfer=supports_file_transfer,
|
356
363
|
)
|
357
364
|
)
|
358
365
|
elif (
|
@@ -406,6 +413,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
406
413
|
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
407
414
|
logger=self.logger,
|
408
415
|
cursor=perpartition_cursor,
|
416
|
+
supports_file_transfer=supports_file_transfer,
|
409
417
|
)
|
410
418
|
)
|
411
419
|
else:
|
@@ -1448,6 +1448,42 @@ definitions:
|
|
1448
1448
|
- "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
|
1449
1449
|
- "$ref": "#/definitions/CustomStateMigration"
|
1450
1450
|
default: []
|
1451
|
+
file_uploader:
|
1452
|
+
title: File Uploader
|
1453
|
+
description: (experimental) Describes how to fetch a file
|
1454
|
+
type: object
|
1455
|
+
required:
|
1456
|
+
- type
|
1457
|
+
- requester
|
1458
|
+
- download_target_extractor
|
1459
|
+
properties:
|
1460
|
+
type:
|
1461
|
+
type: string
|
1462
|
+
enum: [ FileUploader ]
|
1463
|
+
requester:
|
1464
|
+
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
1465
|
+
anyOf:
|
1466
|
+
- "$ref": "#/definitions/CustomRequester"
|
1467
|
+
- "$ref": "#/definitions/HttpRequester"
|
1468
|
+
download_target_extractor:
|
1469
|
+
description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
|
1470
|
+
anyOf:
|
1471
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1472
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1473
|
+
file_extractor:
|
1474
|
+
description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
|
1475
|
+
anyOf:
|
1476
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1477
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1478
|
+
filename_extractor:
|
1479
|
+
description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
|
1480
|
+
type: string
|
1481
|
+
interpolation_context:
|
1482
|
+
- config
|
1483
|
+
- record
|
1484
|
+
examples:
|
1485
|
+
- "{{ record.id }}/{{ record.file_name }}/"
|
1486
|
+
- "{{ record.id }}_{{ record.file_name }}/"
|
1451
1487
|
$parameters:
|
1452
1488
|
type: object
|
1453
1489
|
additional_properties: true
|
@@ -15,6 +15,7 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
|
15
15
|
)
|
16
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
17
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
18
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
18
19
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
19
20
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
20
21
|
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
@@ -42,6 +43,7 @@ class RecordSelector(HttpSelector):
|
|
42
43
|
record_filter: Optional[RecordFilter] = None
|
43
44
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
45
|
transform_before_filtering: bool = False
|
46
|
+
file_uploader: Optional[FileUploader] = None
|
45
47
|
|
46
48
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
47
49
|
self._parameters = parameters
|
@@ -117,7 +119,10 @@ class RecordSelector(HttpSelector):
|
|
117
119
|
transformed_filtered_data, schema=records_schema
|
118
120
|
)
|
119
121
|
for data in normalized_data:
|
120
|
-
|
122
|
+
record = Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
123
|
+
if self.file_uploader:
|
124
|
+
self.file_uploader.upload(record)
|
125
|
+
yield record
|
121
126
|
|
122
127
|
def _normalize_by_schema(
|
123
128
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -2042,6 +2042,31 @@ class SelectiveAuthenticator(BaseModel):
|
|
2042
2042
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2043
2043
|
|
2044
2044
|
|
2045
|
+
class FileUploader(BaseModel):
|
2046
|
+
type: Literal["FileUploader"]
|
2047
|
+
requester: Union[CustomRequester, HttpRequester] = Field(
|
2048
|
+
...,
|
2049
|
+
description="Requester component that describes how to prepare HTTP requests to send to the source API.",
|
2050
|
+
)
|
2051
|
+
download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2052
|
+
...,
|
2053
|
+
description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
|
2054
|
+
)
|
2055
|
+
file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
|
2056
|
+
None,
|
2057
|
+
description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
|
2058
|
+
)
|
2059
|
+
filename_extractor: Optional[str] = Field(
|
2060
|
+
None,
|
2061
|
+
description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
|
2062
|
+
examples=[
|
2063
|
+
"{{ record.id }}/{{ record.file_name }}/",
|
2064
|
+
"{{ record.id }}_{{ record.file_name }}/",
|
2065
|
+
],
|
2066
|
+
)
|
2067
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2068
|
+
|
2069
|
+
|
2045
2070
|
class DeclarativeStream(BaseModel):
|
2046
2071
|
class Config:
|
2047
2072
|
extra = Extra.allow
|
@@ -2100,6 +2125,11 @@ class DeclarativeStream(BaseModel):
|
|
2100
2125
|
description="Array of state migrations to be applied on the input state",
|
2101
2126
|
title="State Migrations",
|
2102
2127
|
)
|
2128
|
+
file_uploader: Optional[FileUploader] = Field(
|
2129
|
+
None,
|
2130
|
+
description="(experimental) Describes how to fetch a file",
|
2131
|
+
title="File Uploader",
|
2132
|
+
)
|
2103
2133
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2104
2134
|
|
2105
2135
|
|
@@ -2593,6 +2623,7 @@ CompositeErrorHandler.update_forward_refs()
|
|
2593
2623
|
DeclarativeSource1.update_forward_refs()
|
2594
2624
|
DeclarativeSource2.update_forward_refs()
|
2595
2625
|
SelectiveAuthenticator.update_forward_refs()
|
2626
|
+
FileUploader.update_forward_refs()
|
2596
2627
|
DeclarativeStream.update_forward_refs()
|
2597
2628
|
SessionTokenAuthenticator.update_forward_refs()
|
2598
2629
|
DynamicSchemaLoader.update_forward_refs()
|
@@ -106,7 +106,6 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
106
106
|
)
|
107
107
|
from airbyte_cdk.sources.declarative.models import (
|
108
108
|
CustomStateMigration,
|
109
|
-
GzipDecoder,
|
110
109
|
)
|
111
110
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
112
111
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
@@ -228,6 +227,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
228
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
229
228
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
230
229
|
)
|
230
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
+
FileUploader as FileUploaderModel,
|
232
|
+
)
|
231
233
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
232
234
|
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
233
235
|
)
|
@@ -479,6 +481,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
479
481
|
SimpleRetriever,
|
480
482
|
SimpleRetrieverTestReadDecorator,
|
481
483
|
)
|
484
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
482
485
|
from airbyte_cdk.sources.declarative.schema import (
|
483
486
|
ComplexFieldType,
|
484
487
|
DefaultSchemaLoader,
|
@@ -675,6 +678,7 @@ class ModelToComponentFactory:
|
|
675
678
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
676
679
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
677
680
|
HTTPAPIBudgetModel: self.create_http_api_budget,
|
681
|
+
FileUploaderModel: self.create_file_uploader,
|
678
682
|
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
679
683
|
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
680
684
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
@@ -1826,6 +1830,11 @@ class ModelToComponentFactory:
|
|
1826
1830
|
transformations.append(
|
1827
1831
|
self._create_component_from_model(model=transformation_model, config=config)
|
1828
1832
|
)
|
1833
|
+
file_uploader = None
|
1834
|
+
if model.file_uploader:
|
1835
|
+
file_uploader = self._create_component_from_model(
|
1836
|
+
model=model.file_uploader, config=config
|
1837
|
+
)
|
1829
1838
|
|
1830
1839
|
retriever = self._create_component_from_model(
|
1831
1840
|
model=model.retriever,
|
@@ -1837,6 +1846,7 @@ class ModelToComponentFactory:
|
|
1837
1846
|
stop_condition_on_cursor=stop_condition_on_cursor,
|
1838
1847
|
client_side_incremental_sync=client_side_incremental_sync,
|
1839
1848
|
transformations=transformations,
|
1849
|
+
file_uploader=file_uploader,
|
1840
1850
|
incremental_sync=model.incremental_sync,
|
1841
1851
|
)
|
1842
1852
|
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
@@ -2759,6 +2769,7 @@ class ModelToComponentFactory:
|
|
2759
2769
|
transformations: List[RecordTransformation] | None = None,
|
2760
2770
|
decoder: Decoder | None = None,
|
2761
2771
|
client_side_incremental_sync: Dict[str, Any] | None = None,
|
2772
|
+
file_uploader: Optional[FileUploader] = None,
|
2762
2773
|
**kwargs: Any,
|
2763
2774
|
) -> RecordSelector:
|
2764
2775
|
extractor = self._create_component_from_model(
|
@@ -2796,6 +2807,7 @@ class ModelToComponentFactory:
|
|
2796
2807
|
config=config,
|
2797
2808
|
record_filter=record_filter,
|
2798
2809
|
transformations=transformations or [],
|
2810
|
+
file_uploader=file_uploader,
|
2799
2811
|
schema_normalization=schema_normalization,
|
2800
2812
|
parameters=model.parameters or {},
|
2801
2813
|
transform_before_filtering=transform_before_filtering,
|
@@ -2853,6 +2865,7 @@ class ModelToComponentFactory:
|
|
2853
2865
|
stop_condition_on_cursor: bool = False,
|
2854
2866
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
2855
2867
|
transformations: List[RecordTransformation],
|
2868
|
+
file_uploader: Optional[FileUploader] = None,
|
2856
2869
|
incremental_sync: Optional[
|
2857
2870
|
Union[
|
2858
2871
|
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
@@ -2873,6 +2886,7 @@ class ModelToComponentFactory:
|
|
2873
2886
|
decoder=decoder,
|
2874
2887
|
transformations=transformations,
|
2875
2888
|
client_side_incremental_sync=client_side_incremental_sync,
|
2889
|
+
file_uploader=file_uploader,
|
2876
2890
|
)
|
2877
2891
|
|
2878
2892
|
query_properties: Optional[QueryProperties] = None
|
@@ -3538,6 +3552,30 @@ class ModelToComponentFactory:
|
|
3538
3552
|
matchers=matchers,
|
3539
3553
|
)
|
3540
3554
|
|
3555
|
+
def create_file_uploader(
|
3556
|
+
self, model: FileUploaderModel, config: Config, **kwargs: Any
|
3557
|
+
) -> FileUploader:
|
3558
|
+
name = "File Uploader"
|
3559
|
+
requester = self._create_component_from_model(
|
3560
|
+
model=model.requester,
|
3561
|
+
config=config,
|
3562
|
+
name=name,
|
3563
|
+
**kwargs,
|
3564
|
+
)
|
3565
|
+
download_target_extractor = self._create_component_from_model(
|
3566
|
+
model=model.download_target_extractor,
|
3567
|
+
config=config,
|
3568
|
+
name=name,
|
3569
|
+
**kwargs,
|
3570
|
+
)
|
3571
|
+
return FileUploader(
|
3572
|
+
requester=requester,
|
3573
|
+
download_target_extractor=download_target_extractor,
|
3574
|
+
config=config,
|
3575
|
+
parameters=model.parameters or {},
|
3576
|
+
filename_extractor=model.filename_extractor if model.filename_extractor else None,
|
3577
|
+
)
|
3578
|
+
|
3541
3579
|
def create_moving_window_call_rate_policy(
|
3542
3580
|
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3543
3581
|
) -> MovingWindowCallRatePolicy:
|
@@ -0,0 +1,89 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import uuid
|
8
|
+
from dataclasses import InitVar, dataclass, field
|
9
|
+
from pathlib import Path
|
10
|
+
from typing import Any, Mapping, Optional, Union
|
11
|
+
|
12
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
13
|
+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
14
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
|
15
|
+
InterpolatedString,
|
16
|
+
)
|
17
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
18
|
+
SafeResponse,
|
19
|
+
)
|
20
|
+
from airbyte_cdk.sources.declarative.requesters import Requester
|
21
|
+
from airbyte_cdk.sources.declarative.types import Record, StreamSlice
|
22
|
+
from airbyte_cdk.sources.types import Config
|
23
|
+
from airbyte_cdk.sources.utils.files_directory import get_files_directory
|
24
|
+
|
25
|
+
logger = logging.getLogger("airbyte")
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass
|
29
|
+
class FileUploader:
|
30
|
+
requester: Requester
|
31
|
+
download_target_extractor: RecordExtractor
|
32
|
+
config: Config
|
33
|
+
parameters: InitVar[Mapping[str, Any]]
|
34
|
+
|
35
|
+
filename_extractor: Optional[Union[InterpolatedString, str]] = None
|
36
|
+
content_extractor: Optional[RecordExtractor] = None
|
37
|
+
|
38
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
39
|
+
if self.filename_extractor:
|
40
|
+
self.filename_extractor = InterpolatedString.create(
|
41
|
+
self.filename_extractor,
|
42
|
+
parameters=parameters,
|
43
|
+
)
|
44
|
+
|
45
|
+
def upload(self, record: Record) -> None:
|
46
|
+
mocked_response = SafeResponse()
|
47
|
+
mocked_response.content = json.dumps(record.data).encode()
|
48
|
+
download_target = list(self.download_target_extractor.extract_records(mocked_response))[0]
|
49
|
+
if not isinstance(download_target, str):
|
50
|
+
raise ValueError(
|
51
|
+
f"download_target is expected to be a str but was {type(download_target)}: {download_target}"
|
52
|
+
)
|
53
|
+
|
54
|
+
response = self.requester.send_request(
|
55
|
+
stream_slice=StreamSlice(
|
56
|
+
partition={}, cursor_slice={}, extra_fields={"download_target": download_target}
|
57
|
+
),
|
58
|
+
)
|
59
|
+
|
60
|
+
if self.content_extractor:
|
61
|
+
raise NotImplementedError("TODO")
|
62
|
+
else:
|
63
|
+
files_directory = Path(get_files_directory())
|
64
|
+
|
65
|
+
file_name = (
|
66
|
+
self.filename_extractor.eval(self.config, record=record)
|
67
|
+
if self.filename_extractor
|
68
|
+
else str(uuid.uuid4())
|
69
|
+
)
|
70
|
+
file_name = file_name.lstrip("/")
|
71
|
+
file_relative_path = Path(record.stream_name) / Path(file_name)
|
72
|
+
|
73
|
+
full_path = files_directory / file_relative_path
|
74
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
75
|
+
|
76
|
+
with open(str(full_path), "wb") as f:
|
77
|
+
f.write(response.content)
|
78
|
+
file_size_bytes = full_path.stat().st_size
|
79
|
+
|
80
|
+
logger.info("File uploaded successfully")
|
81
|
+
logger.info(f"File url: {str(full_path)}")
|
82
|
+
logger.info(f"File size: {file_size_bytes / 1024} KB")
|
83
|
+
logger.info(f"File relative path: {str(file_relative_path)}")
|
84
|
+
|
85
|
+
record.file_reference = AirbyteRecordMessageFileReference(
|
86
|
+
staging_file_url=str(full_path),
|
87
|
+
source_file_relative_path=str(file_relative_path),
|
88
|
+
file_size_bytes=file_size_bytes,
|
89
|
+
)
|
@@ -58,11 +58,16 @@ class DeclarativePartition(Partition):
|
|
58
58
|
def read(self) -> Iterable[Record]:
|
59
59
|
for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
|
60
60
|
if isinstance(stream_data, Mapping):
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
61
|
+
record = (
|
62
|
+
stream_data
|
63
|
+
if isinstance(stream_data, Record)
|
64
|
+
else Record(
|
65
|
+
data=stream_data,
|
66
|
+
stream_name=self.stream_name(),
|
67
|
+
associated_slice=self._stream_slice,
|
68
|
+
)
|
65
69
|
)
|
70
|
+
yield record
|
66
71
|
else:
|
67
72
|
self._message_repository.emit_message(stream_data)
|
68
73
|
|
@@ -8,16 +8,18 @@ from datetime import datetime
|
|
8
8
|
from enum import Enum
|
9
9
|
from io import IOBase
|
10
10
|
from os import makedirs, path
|
11
|
-
from typing import Any,
|
11
|
+
from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
|
12
12
|
|
13
13
|
from wcmatch.glob import GLOBSTAR, globmatch
|
14
14
|
|
15
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
15
16
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
16
17
|
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
17
18
|
include_identities_stream,
|
18
19
|
preserve_directory_structure,
|
19
20
|
use_file_transfer,
|
20
21
|
)
|
22
|
+
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
21
23
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
22
24
|
|
23
25
|
|
@@ -28,6 +30,10 @@ class FileReadMode(Enum):
|
|
28
30
|
|
29
31
|
class AbstractFileBasedStreamReader(ABC):
|
30
32
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
33
|
+
FILE_RELATIVE_PATH = "file_relative_path"
|
34
|
+
FILE_NAME = "file_name"
|
35
|
+
LOCAL_FILE_PATH = "local_file_path"
|
36
|
+
FILE_FOLDER = "file_folder"
|
31
37
|
|
32
38
|
def __init__(self) -> None:
|
33
39
|
self._config = None
|
@@ -148,9 +154,9 @@ class AbstractFileBasedStreamReader(ABC):
|
|
148
154
|
return False
|
149
155
|
|
150
156
|
@abstractmethod
|
151
|
-
def
|
157
|
+
def upload(
|
152
158
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
153
|
-
) ->
|
159
|
+
) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
|
154
160
|
"""
|
155
161
|
This is required for connectors that will support writing to
|
156
162
|
files. It will handle the logic to download,get,read,acquire or
|
@@ -162,25 +168,41 @@ class AbstractFileBasedStreamReader(ABC):
|
|
162
168
|
logger (logging.Logger): Logger for logging information and errors.
|
163
169
|
|
164
170
|
Returns:
|
165
|
-
|
166
|
-
-
|
167
|
-
-
|
168
|
-
-
|
169
|
-
this a mounted volume in the pod container.
|
170
|
-
|
171
|
+
AirbyteRecordMessageFileReference: A file reference object containing:
|
172
|
+
- staging_file_url (str): The absolute path to the referenced file in the staging area.
|
173
|
+
- file_size_bytes (int): The size of the referenced file in bytes.
|
174
|
+
- source_file_relative_path (str): The relative path to the referenced file in source.
|
171
175
|
"""
|
172
176
|
...
|
173
177
|
|
174
|
-
def _get_file_transfer_paths(
|
178
|
+
def _get_file_transfer_paths(
|
179
|
+
self, source_file_relative_path: str, staging_directory: str
|
180
|
+
) -> MutableMapping[str, Any]:
|
181
|
+
"""
|
182
|
+
This method is used to get the file transfer paths for a given source file relative path and local directory.
|
183
|
+
It returns a dictionary with the following keys:
|
184
|
+
- FILE_RELATIVE_PATH: The relative path to file in reference to the staging directory.
|
185
|
+
- LOCAL_FILE_PATH: The absolute path to the file.
|
186
|
+
- FILE_NAME: The name of the referenced file.
|
187
|
+
- FILE_FOLDER: The folder of the referenced file.
|
188
|
+
"""
|
175
189
|
preserve_directory_structure = self.preserve_directory_structure()
|
190
|
+
|
191
|
+
file_name = path.basename(source_file_relative_path)
|
192
|
+
file_folder = path.dirname(source_file_relative_path)
|
176
193
|
if preserve_directory_structure:
|
177
194
|
# Remove left slashes from source path format to make relative path for writing locally
|
178
|
-
file_relative_path =
|
195
|
+
file_relative_path = source_file_relative_path.lstrip("/")
|
179
196
|
else:
|
180
|
-
file_relative_path =
|
181
|
-
local_file_path = path.join(
|
182
|
-
|
197
|
+
file_relative_path = file_name
|
198
|
+
local_file_path = path.join(staging_directory, file_relative_path)
|
183
199
|
# Ensure the local directory exists
|
184
200
|
makedirs(path.dirname(local_file_path), exist_ok=True)
|
185
|
-
|
186
|
-
|
201
|
+
|
202
|
+
file_paths = {
|
203
|
+
self.FILE_RELATIVE_PATH: file_relative_path,
|
204
|
+
self.LOCAL_FILE_PATH: local_file_path,
|
205
|
+
self.FILE_NAME: file_name,
|
206
|
+
self.FILE_FOLDER: file_folder,
|
207
|
+
}
|
208
|
+
return file_paths
|
@@ -0,0 +1,22 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from pydantic.v1 import BaseModel
|
9
|
+
|
10
|
+
|
11
|
+
class FileRecordData(BaseModel):
|
12
|
+
"""
|
13
|
+
A record in a file-based stream.
|
14
|
+
"""
|
15
|
+
|
16
|
+
folder: str
|
17
|
+
filename: str
|
18
|
+
bytes: int
|
19
|
+
source_uri: str
|
20
|
+
id: Optional[str] = None
|
21
|
+
updated_at: Optional[str] = None
|
22
|
+
mime_type: Optional[str] = None
|
@@ -2,34 +2,27 @@
|
|
2
2
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
import logging
|
5
|
-
import
|
6
|
-
from typing import Any, Dict, Iterable
|
5
|
+
from typing import Iterable, Tuple
|
7
6
|
|
8
|
-
from airbyte_cdk.
|
7
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
9
8
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
9
|
+
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
10
10
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
11
|
-
|
12
|
-
AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
|
13
|
-
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
|
11
|
+
from airbyte_cdk.sources.utils.files_directory import get_files_directory
|
14
12
|
|
15
13
|
|
16
14
|
class FileTransfer:
|
17
15
|
def __init__(self) -> None:
|
18
|
-
self._local_directory = (
|
19
|
-
AIRBYTE_STAGING_DIRECTORY
|
20
|
-
if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
|
21
|
-
else DEFAULT_LOCAL_DIRECTORY
|
22
|
-
)
|
16
|
+
self._local_directory = get_files_directory()
|
23
17
|
|
24
|
-
def
|
18
|
+
def upload(
|
25
19
|
self,
|
26
|
-
config: FileBasedStreamConfig,
|
27
20
|
file: RemoteFile,
|
28
21
|
stream_reader: AbstractFileBasedStreamReader,
|
29
22
|
logger: logging.Logger,
|
30
|
-
) -> Iterable[
|
23
|
+
) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
|
31
24
|
try:
|
32
|
-
yield stream_reader.
|
25
|
+
yield stream_reader.upload(
|
33
26
|
file=file, local_directory=self._local_directory, logger=logger
|
34
27
|
)
|
35
28
|
except Exception as ex:
|
@@ -18,9 +18,18 @@ JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
|
|
18
18
|
SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
|
19
19
|
|
20
20
|
schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
|
21
|
+
|
21
22
|
file_transfer_schema = {
|
22
23
|
"type": "object",
|
23
|
-
"properties": {
|
24
|
+
"properties": {
|
25
|
+
"folder": {"type": "string"},
|
26
|
+
"file_name": {"type": "string"},
|
27
|
+
"source_uri": {"type": "string"},
|
28
|
+
"bytes": {"type": "integer"},
|
29
|
+
"id": {"type": ["null", "string"]},
|
30
|
+
"updated_at": {"type": ["null", "string"]},
|
31
|
+
"mime_type": {"type": ["null", "string"]},
|
32
|
+
},
|
24
33
|
}
|
25
34
|
|
26
35
|
|