airbyte-cdk 6.45.7.post11.dev14564503945__py3-none-any.whl → 6.45.8.post2.dev14604759065__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/models/__init__.py +1 -0
- airbyte_cdk/models/airbyte_protocol.py +1 -3
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +36 -0
- airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +31 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +39 -1
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +93 -0
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +38 -16
- airbyte_cdk/sources/file_based/file_record_data.py +23 -0
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -12
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -38
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
- airbyte_cdk/sources/types.py +11 -2
- airbyte_cdk/sources/utils/files_directory.py +15 -0
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- airbyte_cdk/test/mock_http/response_builder.py +8 -0
- {airbyte_cdk-6.45.7.post11.dev14564503945.dist-info → airbyte_cdk-6.45.8.post2.dev14604759065.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.7.post11.dev14564503945.dist-info → airbyte_cdk-6.45.8.post2.dev14604759065.dist-info}/RECORD +28 -27
- airbyte_cdk/cli/README.md +0 -63
- airbyte_cdk/models/file_transfer_record_message.py +0 -13
- {airbyte_cdk-6.45.7.post11.dev14564503945.dist-info → airbyte_cdk-6.45.8.post2.dev14604759065.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.7.post11.dev14564503945.dist-info → airbyte_cdk-6.45.8.post2.dev14604759065.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.7.post11.dev14564503945.dist-info → airbyte_cdk-6.45.8.post2.dev14604759065.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.7.post11.dev14564503945.dist-info → airbyte_cdk-6.45.8.post2.dev14604759065.dist-info}/entry_points.txt +0 -0
airbyte_cdk/models/__init__.py
CHANGED
@@ -8,8 +8,6 @@ from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
|
|
8
8
|
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
|
9
9
|
from serpyco_rs.metadata import Alias
|
10
10
|
|
11
|
-
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
12
|
-
|
13
11
|
# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
|
14
12
|
|
15
13
|
|
@@ -84,7 +82,7 @@ class AirbyteMessage:
|
|
84
82
|
spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
|
85
83
|
connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
|
86
84
|
catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
|
87
|
-
record: Optional[
|
85
|
+
record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
|
88
86
|
state: Optional[AirbyteStateMessage] = None
|
89
87
|
trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
|
90
88
|
control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
|
@@ -149,7 +149,7 @@ class ConcurrentReadProcessor:
|
|
149
149
|
message = stream_data_to_airbyte_message(
|
150
150
|
stream_name=record.stream_name,
|
151
151
|
data_or_message=record.data,
|
152
|
-
|
152
|
+
file_reference=record.file_reference,
|
153
153
|
)
|
154
154
|
stream = self._stream_name_to_instance[record.stream_name]
|
155
155
|
|
@@ -28,6 +28,7 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global impor
|
|
28
28
|
PerPartitionWithGlobalCursor,
|
29
29
|
)
|
30
30
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
31
|
+
from airbyte_cdk.sources.declarative.models import FileUploader
|
31
32
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
33
|
ConcurrencyLevel as ConcurrencyLevelModel,
|
33
34
|
)
|
@@ -209,6 +210,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
209
210
|
# these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
|
210
211
|
# so we need to treat them as synchronous
|
211
212
|
|
213
|
+
supports_file_transfer = (
|
214
|
+
"file_uploader" in name_to_stream_mapping[declarative_stream.name]
|
215
|
+
)
|
216
|
+
|
212
217
|
if (
|
213
218
|
isinstance(declarative_stream, DeclarativeStream)
|
214
219
|
and name_to_stream_mapping[declarative_stream.name]["type"]
|
@@ -325,6 +330,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
325
330
|
else None,
|
326
331
|
logger=self.logger,
|
327
332
|
cursor=cursor,
|
333
|
+
supports_file_transfer=supports_file_transfer,
|
328
334
|
)
|
329
335
|
)
|
330
336
|
elif (
|
@@ -356,6 +362,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
356
362
|
cursor_field=None,
|
357
363
|
logger=self.logger,
|
358
364
|
cursor=final_state_cursor,
|
365
|
+
supports_file_transfer=supports_file_transfer,
|
359
366
|
)
|
360
367
|
)
|
361
368
|
elif (
|
@@ -410,6 +417,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
|
410
417
|
cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
|
411
418
|
logger=self.logger,
|
412
419
|
cursor=perpartition_cursor,
|
420
|
+
supports_file_transfer=supports_file_transfer,
|
413
421
|
)
|
414
422
|
)
|
415
423
|
else:
|
@@ -1448,6 +1448,42 @@ definitions:
|
|
1448
1448
|
- "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
|
1449
1449
|
- "$ref": "#/definitions/CustomStateMigration"
|
1450
1450
|
default: []
|
1451
|
+
file_uploader:
|
1452
|
+
title: File Uploader
|
1453
|
+
description: (experimental) Describes how to fetch a file
|
1454
|
+
type: object
|
1455
|
+
required:
|
1456
|
+
- type
|
1457
|
+
- requester
|
1458
|
+
- download_target_extractor
|
1459
|
+
properties:
|
1460
|
+
type:
|
1461
|
+
type: string
|
1462
|
+
enum: [ FileUploader ]
|
1463
|
+
requester:
|
1464
|
+
description: Requester component that describes how to prepare HTTP requests to send to the source API.
|
1465
|
+
anyOf:
|
1466
|
+
- "$ref": "#/definitions/CustomRequester"
|
1467
|
+
- "$ref": "#/definitions/HttpRequester"
|
1468
|
+
download_target_extractor:
|
1469
|
+
description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
|
1470
|
+
anyOf:
|
1471
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1472
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1473
|
+
file_extractor:
|
1474
|
+
description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
|
1475
|
+
anyOf:
|
1476
|
+
- "$ref": "#/definitions/CustomRecordExtractor"
|
1477
|
+
- "$ref": "#/definitions/DpathExtractor"
|
1478
|
+
filename_extractor:
|
1479
|
+
description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
|
1480
|
+
type: string
|
1481
|
+
interpolation_context:
|
1482
|
+
- config
|
1483
|
+
- record
|
1484
|
+
examples:
|
1485
|
+
- "{{ record.id }}/{{ record.file_name }}/"
|
1486
|
+
- "{{ record.id }}_{{ record.file_name }}/"
|
1451
1487
|
$parameters:
|
1452
1488
|
type: object
|
1453
1489
|
additional_properties: true
|
@@ -15,6 +15,7 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
|
|
15
15
|
)
|
16
16
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
17
17
|
from airbyte_cdk.sources.declarative.models import SchemaNormalization
|
18
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
18
19
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
19
20
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
20
21
|
from airbyte_cdk.sources.utils.transform import TypeTransformer
|
@@ -42,6 +43,7 @@ class RecordSelector(HttpSelector):
|
|
42
43
|
record_filter: Optional[RecordFilter] = None
|
43
44
|
transformations: List[RecordTransformation] = field(default_factory=lambda: [])
|
44
45
|
transform_before_filtering: bool = False
|
46
|
+
file_uploader: Optional[FileUploader] = None
|
45
47
|
|
46
48
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
47
49
|
self._parameters = parameters
|
@@ -117,7 +119,10 @@ class RecordSelector(HttpSelector):
|
|
117
119
|
transformed_filtered_data, schema=records_schema
|
118
120
|
)
|
119
121
|
for data in normalized_data:
|
120
|
-
|
122
|
+
record = Record(data=data, stream_name=self.name, associated_slice=stream_slice)
|
123
|
+
if self.file_uploader:
|
124
|
+
self.file_uploader.upload(record)
|
125
|
+
yield record
|
121
126
|
|
122
127
|
def _normalize_by_schema(
|
123
128
|
self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
|
@@ -2066,6 +2066,31 @@ class SelectiveAuthenticator(BaseModel):
|
|
2066
2066
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2067
2067
|
|
2068
2068
|
|
2069
|
+
class FileUploader(BaseModel):
|
2070
|
+
type: Literal["FileUploader"]
|
2071
|
+
requester: Union[CustomRequester, HttpRequester] = Field(
|
2072
|
+
...,
|
2073
|
+
description="Requester component that describes how to prepare HTTP requests to send to the source API.",
|
2074
|
+
)
|
2075
|
+
download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
|
2076
|
+
...,
|
2077
|
+
description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
|
2078
|
+
)
|
2079
|
+
file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
|
2080
|
+
None,
|
2081
|
+
description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
|
2082
|
+
)
|
2083
|
+
filename_extractor: Optional[str] = Field(
|
2084
|
+
None,
|
2085
|
+
description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
|
2086
|
+
examples=[
|
2087
|
+
"{{ record.id }}/{{ record.file_name }}/",
|
2088
|
+
"{{ record.id }}_{{ record.file_name }}/",
|
2089
|
+
],
|
2090
|
+
)
|
2091
|
+
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2092
|
+
|
2093
|
+
|
2069
2094
|
class DeclarativeStream(BaseModel):
|
2070
2095
|
class Config:
|
2071
2096
|
extra = Extra.allow
|
@@ -2124,6 +2149,11 @@ class DeclarativeStream(BaseModel):
|
|
2124
2149
|
description="Array of state migrations to be applied on the input state",
|
2125
2150
|
title="State Migrations",
|
2126
2151
|
)
|
2152
|
+
file_uploader: Optional[FileUploader] = Field(
|
2153
|
+
None,
|
2154
|
+
description="(experimental) Describes how to fetch a file",
|
2155
|
+
title="File Uploader",
|
2156
|
+
)
|
2127
2157
|
parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
|
2128
2158
|
|
2129
2159
|
|
@@ -2617,6 +2647,7 @@ CompositeErrorHandler.update_forward_refs()
|
|
2617
2647
|
DeclarativeSource1.update_forward_refs()
|
2618
2648
|
DeclarativeSource2.update_forward_refs()
|
2619
2649
|
SelectiveAuthenticator.update_forward_refs()
|
2650
|
+
FileUploader.update_forward_refs()
|
2620
2651
|
DeclarativeStream.update_forward_refs()
|
2621
2652
|
SessionTokenAuthenticator.update_forward_refs()
|
2622
2653
|
DynamicSchemaLoader.update_forward_refs()
|
@@ -106,7 +106,6 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
|
|
106
106
|
)
|
107
107
|
from airbyte_cdk.sources.declarative.models import (
|
108
108
|
CustomStateMigration,
|
109
|
-
GzipDecoder,
|
110
109
|
)
|
111
110
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
112
111
|
AddedFieldDefinition as AddedFieldDefinitionModel,
|
@@ -228,6 +227,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
228
227
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
229
228
|
ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
|
230
229
|
)
|
230
|
+
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
231
|
+
FileUploader as FileUploaderModel,
|
232
|
+
)
|
231
233
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
232
234
|
FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
|
233
235
|
)
|
@@ -479,6 +481,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
|
|
479
481
|
SimpleRetriever,
|
480
482
|
SimpleRetrieverTestReadDecorator,
|
481
483
|
)
|
484
|
+
from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
|
482
485
|
from airbyte_cdk.sources.declarative.schema import (
|
483
486
|
ComplexFieldType,
|
484
487
|
DefaultSchemaLoader,
|
@@ -676,6 +679,7 @@ class ModelToComponentFactory:
|
|
676
679
|
ComponentMappingDefinitionModel: self.create_components_mapping_definition,
|
677
680
|
ZipfileDecoderModel: self.create_zipfile_decoder,
|
678
681
|
HTTPAPIBudgetModel: self.create_http_api_budget,
|
682
|
+
FileUploaderModel: self.create_file_uploader,
|
679
683
|
FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
|
680
684
|
MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
|
681
685
|
UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
|
@@ -1840,6 +1844,11 @@ class ModelToComponentFactory:
|
|
1840
1844
|
transformations.append(
|
1841
1845
|
self._create_component_from_model(model=transformation_model, config=config)
|
1842
1846
|
)
|
1847
|
+
file_uploader = None
|
1848
|
+
if model.file_uploader:
|
1849
|
+
file_uploader = self._create_component_from_model(
|
1850
|
+
model=model.file_uploader, config=config
|
1851
|
+
)
|
1843
1852
|
|
1844
1853
|
retriever = self._create_component_from_model(
|
1845
1854
|
model=model.retriever,
|
@@ -1851,6 +1860,7 @@ class ModelToComponentFactory:
|
|
1851
1860
|
stop_condition_on_cursor=stop_condition_on_cursor,
|
1852
1861
|
client_side_incremental_sync=client_side_incremental_sync,
|
1853
1862
|
transformations=transformations,
|
1863
|
+
file_uploader=file_uploader,
|
1854
1864
|
incremental_sync=model.incremental_sync,
|
1855
1865
|
)
|
1856
1866
|
cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
|
@@ -2796,6 +2806,7 @@ class ModelToComponentFactory:
|
|
2796
2806
|
transformations: List[RecordTransformation] | None = None,
|
2797
2807
|
decoder: Decoder | None = None,
|
2798
2808
|
client_side_incremental_sync: Dict[str, Any] | None = None,
|
2809
|
+
file_uploader: Optional[FileUploader] = None,
|
2799
2810
|
**kwargs: Any,
|
2800
2811
|
) -> RecordSelector:
|
2801
2812
|
extractor = self._create_component_from_model(
|
@@ -2833,6 +2844,7 @@ class ModelToComponentFactory:
|
|
2833
2844
|
config=config,
|
2834
2845
|
record_filter=record_filter,
|
2835
2846
|
transformations=transformations or [],
|
2847
|
+
file_uploader=file_uploader,
|
2836
2848
|
schema_normalization=schema_normalization,
|
2837
2849
|
parameters=model.parameters or {},
|
2838
2850
|
transform_before_filtering=transform_before_filtering,
|
@@ -2890,6 +2902,7 @@ class ModelToComponentFactory:
|
|
2890
2902
|
stop_condition_on_cursor: bool = False,
|
2891
2903
|
client_side_incremental_sync: Optional[Dict[str, Any]] = None,
|
2892
2904
|
transformations: List[RecordTransformation],
|
2905
|
+
file_uploader: Optional[FileUploader] = None,
|
2893
2906
|
incremental_sync: Optional[
|
2894
2907
|
Union[
|
2895
2908
|
IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
|
@@ -2910,6 +2923,7 @@ class ModelToComponentFactory:
|
|
2910
2923
|
decoder=decoder,
|
2911
2924
|
transformations=transformations,
|
2912
2925
|
client_side_incremental_sync=client_side_incremental_sync,
|
2926
|
+
file_uploader=file_uploader,
|
2913
2927
|
)
|
2914
2928
|
|
2915
2929
|
query_properties: Optional[QueryProperties] = None
|
@@ -3576,6 +3590,30 @@ class ModelToComponentFactory:
|
|
3576
3590
|
matchers=matchers,
|
3577
3591
|
)
|
3578
3592
|
|
3593
|
+
def create_file_uploader(
|
3594
|
+
self, model: FileUploaderModel, config: Config, **kwargs: Any
|
3595
|
+
) -> FileUploader:
|
3596
|
+
name = "File Uploader"
|
3597
|
+
requester = self._create_component_from_model(
|
3598
|
+
model=model.requester,
|
3599
|
+
config=config,
|
3600
|
+
name=name,
|
3601
|
+
**kwargs,
|
3602
|
+
)
|
3603
|
+
download_target_extractor = self._create_component_from_model(
|
3604
|
+
model=model.download_target_extractor,
|
3605
|
+
config=config,
|
3606
|
+
name=name,
|
3607
|
+
**kwargs,
|
3608
|
+
)
|
3609
|
+
return FileUploader(
|
3610
|
+
requester=requester,
|
3611
|
+
download_target_extractor=download_target_extractor,
|
3612
|
+
config=config,
|
3613
|
+
parameters=model.parameters or {},
|
3614
|
+
filename_extractor=model.filename_extractor if model.filename_extractor else None,
|
3615
|
+
)
|
3616
|
+
|
3579
3617
|
def create_moving_window_call_rate_policy(
|
3580
3618
|
self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
|
3581
3619
|
) -> MovingWindowCallRatePolicy:
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
import json
|
6
|
+
import logging
|
7
|
+
import uuid
|
8
|
+
from dataclasses import InitVar, dataclass, field
|
9
|
+
from pathlib import Path
|
10
|
+
from typing import Any, Mapping, Optional, Union
|
11
|
+
|
12
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
13
|
+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
14
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
|
15
|
+
InterpolatedString,
|
16
|
+
)
|
17
|
+
from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
|
18
|
+
SafeResponse,
|
19
|
+
)
|
20
|
+
from airbyte_cdk.sources.declarative.requesters import Requester
|
21
|
+
from airbyte_cdk.sources.declarative.types import Record, StreamSlice
|
22
|
+
from airbyte_cdk.sources.types import Config
|
23
|
+
from airbyte_cdk.sources.utils.files_directory import get_files_directory
|
24
|
+
|
25
|
+
logger = logging.getLogger("airbyte")
|
26
|
+
|
27
|
+
|
28
|
+
@dataclass
|
29
|
+
class FileUploader:
|
30
|
+
requester: Requester
|
31
|
+
download_target_extractor: RecordExtractor
|
32
|
+
config: Config
|
33
|
+
parameters: InitVar[Mapping[str, Any]]
|
34
|
+
|
35
|
+
filename_extractor: Optional[Union[InterpolatedString, str]] = None
|
36
|
+
content_extractor: Optional[RecordExtractor] = None
|
37
|
+
|
38
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
39
|
+
if self.filename_extractor:
|
40
|
+
self.filename_extractor = InterpolatedString.create(
|
41
|
+
self.filename_extractor,
|
42
|
+
parameters=parameters,
|
43
|
+
)
|
44
|
+
|
45
|
+
def upload(self, record: Record) -> None:
|
46
|
+
mocked_response = SafeResponse()
|
47
|
+
mocked_response.content = json.dumps(record.data).encode()
|
48
|
+
download_targets = list(self.download_target_extractor.extract_records(mocked_response))
|
49
|
+
if not download_targets:
|
50
|
+
raise ValueError("No download targets found")
|
51
|
+
|
52
|
+
download_target = download_targets[0] # we just expect one download target
|
53
|
+
if not isinstance(download_target, str):
|
54
|
+
raise ValueError(
|
55
|
+
f"download_target is expected to be a str but was {type(download_target)}: {download_target}"
|
56
|
+
)
|
57
|
+
|
58
|
+
response = self.requester.send_request(
|
59
|
+
stream_slice=StreamSlice(
|
60
|
+
partition={}, cursor_slice={}, extra_fields={"download_target": download_target}
|
61
|
+
),
|
62
|
+
)
|
63
|
+
|
64
|
+
if self.content_extractor:
|
65
|
+
raise NotImplementedError("TODO")
|
66
|
+
else:
|
67
|
+
files_directory = Path(get_files_directory())
|
68
|
+
|
69
|
+
file_name = (
|
70
|
+
self.filename_extractor.eval(self.config, record=record)
|
71
|
+
if self.filename_extractor
|
72
|
+
else str(uuid.uuid4())
|
73
|
+
)
|
74
|
+
file_name = file_name.lstrip("/")
|
75
|
+
file_relative_path = Path(record.stream_name) / Path(file_name)
|
76
|
+
|
77
|
+
full_path = files_directory / file_relative_path
|
78
|
+
full_path.parent.mkdir(parents=True, exist_ok=True)
|
79
|
+
|
80
|
+
with open(str(full_path), "wb") as f:
|
81
|
+
f.write(response.content)
|
82
|
+
file_size_bytes = full_path.stat().st_size
|
83
|
+
|
84
|
+
logger.info("File uploaded successfully")
|
85
|
+
logger.info(f"File url: {str(full_path)}")
|
86
|
+
logger.info(f"File size: {file_size_bytes / 1024} KB")
|
87
|
+
logger.info(f"File relative path: {str(file_relative_path)}")
|
88
|
+
|
89
|
+
record.file_reference = AirbyteRecordMessageFileReference(
|
90
|
+
staging_file_url=str(full_path),
|
91
|
+
source_file_relative_path=str(file_relative_path),
|
92
|
+
file_size_bytes=file_size_bytes,
|
93
|
+
)
|
@@ -58,11 +58,16 @@ class DeclarativePartition(Partition):
|
|
58
58
|
def read(self) -> Iterable[Record]:
|
59
59
|
for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
|
60
60
|
if isinstance(stream_data, Mapping):
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
61
|
+
record = (
|
62
|
+
stream_data
|
63
|
+
if isinstance(stream_data, Record)
|
64
|
+
else Record(
|
65
|
+
data=stream_data,
|
66
|
+
stream_name=self.stream_name(),
|
67
|
+
associated_slice=self._stream_slice,
|
68
|
+
)
|
65
69
|
)
|
70
|
+
yield record
|
66
71
|
else:
|
67
72
|
self._message_repository.emit_message(stream_data)
|
68
73
|
|
@@ -8,16 +8,18 @@ from datetime import datetime
|
|
8
8
|
from enum import Enum
|
9
9
|
from io import IOBase
|
10
10
|
from os import makedirs, path
|
11
|
-
from typing import Any,
|
11
|
+
from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
|
12
12
|
|
13
13
|
from wcmatch.glob import GLOBSTAR, globmatch
|
14
14
|
|
15
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
15
16
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
16
17
|
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
17
18
|
include_identities_stream,
|
18
19
|
preserve_directory_structure,
|
19
20
|
use_file_transfer,
|
20
21
|
)
|
22
|
+
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
21
23
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
22
24
|
|
23
25
|
|
@@ -28,6 +30,10 @@ class FileReadMode(Enum):
|
|
28
30
|
|
29
31
|
class AbstractFileBasedStreamReader(ABC):
|
30
32
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
33
|
+
FILE_RELATIVE_PATH = "file_relative_path"
|
34
|
+
FILE_NAME = "file_name"
|
35
|
+
LOCAL_FILE_PATH = "local_file_path"
|
36
|
+
FILE_FOLDER = "file_folder"
|
31
37
|
|
32
38
|
def __init__(self) -> None:
|
33
39
|
self._config = None
|
@@ -148,9 +154,9 @@ class AbstractFileBasedStreamReader(ABC):
|
|
148
154
|
return False
|
149
155
|
|
150
156
|
@abstractmethod
|
151
|
-
def
|
157
|
+
def upload(
|
152
158
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
153
|
-
) ->
|
159
|
+
) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
|
154
160
|
"""
|
155
161
|
This is required for connectors that will support writing to
|
156
162
|
files. It will handle the logic to download,get,read,acquire or
|
@@ -162,25 +168,41 @@ class AbstractFileBasedStreamReader(ABC):
|
|
162
168
|
logger (logging.Logger): Logger for logging information and errors.
|
163
169
|
|
164
170
|
Returns:
|
165
|
-
|
166
|
-
-
|
167
|
-
-
|
168
|
-
-
|
169
|
-
this a mounted volume in the pod container.
|
170
|
-
|
171
|
+
AirbyteRecordMessageFileReference: A file reference object containing:
|
172
|
+
- staging_file_url (str): The absolute path to the referenced file in the staging area.
|
173
|
+
- file_size_bytes (int): The size of the referenced file in bytes.
|
174
|
+
- source_file_relative_path (str): The relative path to the referenced file in source.
|
171
175
|
"""
|
172
176
|
...
|
173
177
|
|
174
|
-
def _get_file_transfer_paths(
|
178
|
+
def _get_file_transfer_paths(
|
179
|
+
self, source_file_relative_path: str, staging_directory: str
|
180
|
+
) -> MutableMapping[str, Any]:
|
181
|
+
"""
|
182
|
+
This method is used to get the file transfer paths for a given source file relative path and local directory.
|
183
|
+
It returns a dictionary with the following keys:
|
184
|
+
- FILE_RELATIVE_PATH: The relative path to file in reference to the staging directory.
|
185
|
+
- LOCAL_FILE_PATH: The absolute path to the file.
|
186
|
+
- FILE_NAME: The name of the referenced file.
|
187
|
+
- FILE_FOLDER: The folder of the referenced file.
|
188
|
+
"""
|
175
189
|
preserve_directory_structure = self.preserve_directory_structure()
|
190
|
+
|
191
|
+
file_name = path.basename(source_file_relative_path)
|
192
|
+
file_folder = path.dirname(source_file_relative_path)
|
176
193
|
if preserve_directory_structure:
|
177
194
|
# Remove left slashes from source path format to make relative path for writing locally
|
178
|
-
file_relative_path =
|
195
|
+
file_relative_path = source_file_relative_path.lstrip("/")
|
179
196
|
else:
|
180
|
-
file_relative_path =
|
181
|
-
local_file_path = path.join(
|
182
|
-
|
197
|
+
file_relative_path = file_name
|
198
|
+
local_file_path = path.join(staging_directory, file_relative_path)
|
183
199
|
# Ensure the local directory exists
|
184
200
|
makedirs(path.dirname(local_file_path), exist_ok=True)
|
185
|
-
|
186
|
-
|
201
|
+
|
202
|
+
file_paths = {
|
203
|
+
self.FILE_RELATIVE_PATH: file_relative_path,
|
204
|
+
self.LOCAL_FILE_PATH: local_file_path,
|
205
|
+
self.FILE_NAME: file_name,
|
206
|
+
self.FILE_FOLDER: file_folder,
|
207
|
+
}
|
208
|
+
return file_paths
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from datetime import datetime
|
6
|
+
from typing import Optional
|
7
|
+
|
8
|
+
from pydantic.v1 import BaseModel
|
9
|
+
|
10
|
+
|
11
|
+
class FileRecordData(BaseModel):
|
12
|
+
"""
|
13
|
+
A record in a file-based stream.
|
14
|
+
"""
|
15
|
+
|
16
|
+
folder: str
|
17
|
+
file_name: str
|
18
|
+
bytes: int
|
19
|
+
source_uri: str
|
20
|
+
id: Optional[str] = None
|
21
|
+
created_at: Optional[str] = None
|
22
|
+
updated_at: Optional[str] = None
|
23
|
+
mime_type: Optional[str] = None
|
@@ -2,34 +2,27 @@
|
|
2
2
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
import logging
|
5
|
-
import
|
6
|
-
from typing import Any, Dict, Iterable
|
5
|
+
from typing import Iterable, Tuple
|
7
6
|
|
8
|
-
from airbyte_cdk.
|
7
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
9
8
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
9
|
+
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
10
10
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
11
|
-
|
12
|
-
AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
|
13
|
-
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
|
11
|
+
from airbyte_cdk.sources.utils.files_directory import get_files_directory
|
14
12
|
|
15
13
|
|
16
14
|
class FileTransfer:
|
17
15
|
def __init__(self) -> None:
|
18
|
-
self._local_directory = (
|
19
|
-
AIRBYTE_STAGING_DIRECTORY
|
20
|
-
if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
|
21
|
-
else DEFAULT_LOCAL_DIRECTORY
|
22
|
-
)
|
16
|
+
self._local_directory = get_files_directory()
|
23
17
|
|
24
|
-
def
|
18
|
+
def upload(
|
25
19
|
self,
|
26
|
-
config: FileBasedStreamConfig,
|
27
20
|
file: RemoteFile,
|
28
21
|
stream_reader: AbstractFileBasedStreamReader,
|
29
22
|
logger: logging.Logger,
|
30
|
-
) -> Iterable[
|
23
|
+
) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
|
31
24
|
try:
|
32
|
-
yield stream_reader.
|
25
|
+
yield stream_reader.upload(
|
33
26
|
file=file, local_directory=self._local_directory, logger=logger
|
34
27
|
)
|
35
28
|
except Exception as ex:
|
@@ -18,9 +18,19 @@ JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
|
|
18
18
|
SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
|
19
19
|
|
20
20
|
schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
|
21
|
+
|
21
22
|
file_transfer_schema = {
|
22
23
|
"type": "object",
|
23
|
-
"properties": {
|
24
|
+
"properties": {
|
25
|
+
"folder": {"type": "string"},
|
26
|
+
"file_name": {"type": "string"},
|
27
|
+
"source_uri": {"type": "string"},
|
28
|
+
"bytes": {"type": "integer"},
|
29
|
+
"id": {"type": ["null", "string"]},
|
30
|
+
"created_at": {"type": ["null", "string"]},
|
31
|
+
"updated_at": {"type": ["null", "string"]},
|
32
|
+
"mime_type": {"type": ["null", "string"]},
|
33
|
+
},
|
24
34
|
}
|
25
35
|
|
26
36
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import copy
|
6
6
|
import logging
|
7
|
-
from functools import
|
7
|
+
from functools import lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
from typing_extensions import deprecated
|
@@ -258,19 +258,14 @@ class FileBasedStreamPartition(Partition):
|
|
258
258
|
and record_data.record is not None
|
259
259
|
):
|
260
260
|
# `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
|
261
|
-
|
262
|
-
record_message_data = (
|
263
|
-
record_data.record.file
|
264
|
-
if self._use_file_transfer()
|
265
|
-
else record_data.record.data
|
266
|
-
)
|
261
|
+
record_message_data = record_data.record.data
|
267
262
|
if not record_message_data:
|
268
263
|
raise ExceptionWithDisplayMessage("A record without data was found")
|
269
264
|
else:
|
270
265
|
yield Record(
|
271
266
|
data=record_message_data,
|
272
267
|
stream_name=self.stream_name(),
|
273
|
-
|
268
|
+
file_reference=record_data.record.file_reference,
|
274
269
|
)
|
275
270
|
else:
|
276
271
|
self._message_repository.emit_message(record_data)
|
@@ -306,10 +301,6 @@ class FileBasedStreamPartition(Partition):
|
|
306
301
|
def stream_name(self) -> str:
|
307
302
|
return self._stream.name
|
308
303
|
|
309
|
-
@cache
|
310
|
-
def _use_file_transfer(self) -> bool:
|
311
|
-
return hasattr(self._stream, "use_file_transfer") and self._stream.use_file_transfer
|
312
|
-
|
313
304
|
def __repr__(self) -> str:
|
314
305
|
return f"FileBasedStreamPartition({self._stream.name}, {self._slice})"
|
315
306
|
|
@@ -11,7 +11,7 @@ from functools import cache
|
|
11
11
|
from os import path
|
12
12
|
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union
|
13
13
|
|
14
|
-
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level
|
14
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteStream, FailureType, Level
|
15
15
|
from airbyte_cdk.models import Type as MessageType
|
16
16
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
|
17
17
|
from airbyte_cdk.sources.file_based.exceptions import (
|
@@ -56,6 +56,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
56
56
|
airbyte_columns = [ab_last_mod_col, ab_file_name_col]
|
57
57
|
use_file_transfer = False
|
58
58
|
preserve_directory_structure = True
|
59
|
+
_file_transfer = FileTransfer()
|
59
60
|
|
60
61
|
def __init__(self, **kwargs: Any):
|
61
62
|
if self.FILE_TRANSFER_KW in kwargs:
|
@@ -93,21 +94,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
93
94
|
self.config
|
94
95
|
)
|
95
96
|
|
96
|
-
def _filter_schema_invalid_properties(
|
97
|
-
self, configured_catalog_json_schema: Dict[str, Any]
|
98
|
-
) -> Dict[str, Any]:
|
99
|
-
if self.use_file_transfer:
|
100
|
-
return {
|
101
|
-
"type": "object",
|
102
|
-
"properties": {
|
103
|
-
"file_path": {"type": "string"},
|
104
|
-
"file_size": {"type": "string"},
|
105
|
-
self.ab_file_name_col: {"type": "string"},
|
106
|
-
},
|
107
|
-
}
|
108
|
-
else:
|
109
|
-
return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
|
110
|
-
|
111
97
|
def _duplicated_files_names(
|
112
98
|
self, slices: List[dict[str, List[RemoteFile]]]
|
113
99
|
) -> List[dict[str, List[str]]]:
|
@@ -145,14 +131,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
145
131
|
record[self.ab_file_name_col] = file.uri
|
146
132
|
return record
|
147
133
|
|
148
|
-
def transform_record_for_file_transfer(
|
149
|
-
self, record: dict[str, Any], file: RemoteFile
|
150
|
-
) -> dict[str, Any]:
|
151
|
-
# timstamp() returns a float representing the number of seconds since the unix epoch
|
152
|
-
record[self.modified] = int(file.last_modified.timestamp()) * 1000
|
153
|
-
record[self.source_file_url] = file.uri
|
154
|
-
return record
|
155
|
-
|
156
134
|
def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
|
157
135
|
"""
|
158
136
|
Yield all records from all remote files in `list_files_for_this_sync`.
|
@@ -173,19 +151,13 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
173
151
|
|
174
152
|
try:
|
175
153
|
if self.use_file_transfer:
|
176
|
-
self.
|
177
|
-
|
178
|
-
file_transfer = FileTransfer()
|
179
|
-
for record in file_transfer.get_file(
|
180
|
-
self.config, file, self.stream_reader, self.logger
|
154
|
+
for file_record_data, file_reference in self._file_transfer.upload(
|
155
|
+
file=file, stream_reader=self.stream_reader, logger=self.logger
|
181
156
|
):
|
182
|
-
line_no += 1
|
183
|
-
if not self.record_passes_validation_policy(record):
|
184
|
-
n_skipped += 1
|
185
|
-
continue
|
186
|
-
record = self.transform_record_for_file_transfer(record, file)
|
187
157
|
yield stream_data_to_airbyte_message(
|
188
|
-
self.name,
|
158
|
+
self.name,
|
159
|
+
file_record_data.dict(exclude_none=True),
|
160
|
+
file_reference=file_reference,
|
189
161
|
)
|
190
162
|
else:
|
191
163
|
for record in parser.parse_records(
|
@@ -259,6 +231,8 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
259
231
|
|
260
232
|
@cache
|
261
233
|
def get_json_schema(self) -> JsonSchema:
|
234
|
+
if self.use_file_transfer:
|
235
|
+
return file_transfer_schema
|
262
236
|
extra_fields = {
|
263
237
|
self.ab_last_mod_col: {"type": "string"},
|
264
238
|
self.ab_file_name_col: {"type": "string"},
|
@@ -282,9 +256,7 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
282
256
|
return {"type": "object", "properties": {**extra_fields, **schema["properties"]}}
|
283
257
|
|
284
258
|
def _get_raw_json_schema(self) -> JsonSchema:
|
285
|
-
if self.
|
286
|
-
return file_transfer_schema
|
287
|
-
elif self.config.input_schema:
|
259
|
+
if self.config.input_schema:
|
288
260
|
return self.config.get_input_schema() # type: ignore
|
289
261
|
elif self.config.schemaless:
|
290
262
|
return schemaless_schema
|
@@ -341,6 +313,11 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
341
313
|
self.config.globs or [], self.config.legacy_prefix, self.logger
|
342
314
|
)
|
343
315
|
|
316
|
+
def as_airbyte_stream(self) -> AirbyteStream:
|
317
|
+
file_stream = super().as_airbyte_stream()
|
318
|
+
file_stream.is_file_based = self.use_file_transfer
|
319
|
+
return file_stream
|
320
|
+
|
344
321
|
def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
|
345
322
|
loop = asyncio.get_event_loop()
|
346
323
|
schema = loop.run_until_complete(self._infer_schema(files))
|
@@ -61,9 +61,7 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
|
|
61
61
|
permissions_record = self.transform_record(
|
62
62
|
permissions_record, file, file_datetime_string
|
63
63
|
)
|
64
|
-
yield stream_data_to_airbyte_message(
|
65
|
-
self.name, permissions_record, is_file_transfer_message=False
|
66
|
-
)
|
64
|
+
yield stream_data_to_airbyte_message(self.name, permissions_record)
|
67
65
|
except Exception as e:
|
68
66
|
self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
|
69
67
|
yield AirbyteMessage(
|
@@ -29,6 +29,7 @@ class DefaultStream(AbstractStream):
|
|
29
29
|
logger: Logger,
|
30
30
|
cursor: Cursor,
|
31
31
|
namespace: Optional[str] = None,
|
32
|
+
supports_file_transfer: bool = False,
|
32
33
|
) -> None:
|
33
34
|
self._stream_partition_generator = partition_generator
|
34
35
|
self._name = name
|
@@ -39,6 +40,7 @@ class DefaultStream(AbstractStream):
|
|
39
40
|
self._logger = logger
|
40
41
|
self._cursor = cursor
|
41
42
|
self._namespace = namespace
|
43
|
+
self._supports_file_transfer = supports_file_transfer
|
42
44
|
|
43
45
|
def generate_partitions(self) -> Iterable[Partition]:
|
44
46
|
yield from self._stream_partition_generator.generate()
|
@@ -68,6 +70,7 @@ class DefaultStream(AbstractStream):
|
|
68
70
|
json_schema=dict(self._json_schema),
|
69
71
|
supported_sync_modes=[SyncMode.full_refresh],
|
70
72
|
is_resumable=False,
|
73
|
+
is_file_based=self._supports_file_transfer,
|
71
74
|
)
|
72
75
|
|
73
76
|
if self._namespace:
|
airbyte_cdk/sources/types.py
CHANGED
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
8
8
|
|
9
|
+
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
9
10
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
10
11
|
|
11
12
|
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
@@ -23,12 +24,12 @@ class Record(Mapping[str, Any]):
|
|
23
24
|
data: Mapping[str, Any],
|
24
25
|
stream_name: str,
|
25
26
|
associated_slice: Optional[StreamSlice] = None,
|
26
|
-
|
27
|
+
file_reference: Optional[AirbyteRecordMessageFileReference] = None,
|
27
28
|
):
|
28
29
|
self._data = data
|
29
30
|
self._associated_slice = associated_slice
|
30
31
|
self.stream_name = stream_name
|
31
|
-
self.
|
32
|
+
self._file_reference = file_reference
|
32
33
|
|
33
34
|
@property
|
34
35
|
def data(self) -> Mapping[str, Any]:
|
@@ -38,6 +39,14 @@ class Record(Mapping[str, Any]):
|
|
38
39
|
def associated_slice(self) -> Optional[StreamSlice]:
|
39
40
|
return self._associated_slice
|
40
41
|
|
42
|
+
@property
|
43
|
+
def file_reference(self) -> AirbyteRecordMessageFileReference:
|
44
|
+
return self._file_reference
|
45
|
+
|
46
|
+
@file_reference.setter
|
47
|
+
def file_reference(self, value: AirbyteRecordMessageFileReference) -> None:
|
48
|
+
self._file_reference = value
|
49
|
+
|
41
50
|
def __repr__(self) -> str:
|
42
51
|
return repr(self._data)
|
43
52
|
|
@@ -0,0 +1,15 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2025 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
import os
|
5
|
+
|
6
|
+
AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
|
7
|
+
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
|
8
|
+
|
9
|
+
|
10
|
+
def get_files_directory() -> str:
|
11
|
+
return (
|
12
|
+
AIRBYTE_STAGING_DIRECTORY
|
13
|
+
if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
|
14
|
+
else DEFAULT_LOCAL_DIRECTORY
|
15
|
+
)
|
@@ -9,10 +9,10 @@ from airbyte_cdk.models import (
|
|
9
9
|
AirbyteLogMessage,
|
10
10
|
AirbyteMessage,
|
11
11
|
AirbyteRecordMessage,
|
12
|
+
AirbyteRecordMessageFileReference,
|
12
13
|
AirbyteTraceMessage,
|
13
14
|
)
|
14
15
|
from airbyte_cdk.models import Type as MessageType
|
15
|
-
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
16
16
|
from airbyte_cdk.sources.streams.core import StreamData
|
17
17
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
18
18
|
|
@@ -22,7 +22,7 @@ def stream_data_to_airbyte_message(
|
|
22
22
|
data_or_message: StreamData,
|
23
23
|
transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
|
24
24
|
schema: Optional[Mapping[str, Any]] = None,
|
25
|
-
|
25
|
+
file_reference: Optional[AirbyteRecordMessageFileReference] = None,
|
26
26
|
) -> AirbyteMessage:
|
27
27
|
if schema is None:
|
28
28
|
schema = {}
|
@@ -36,12 +36,12 @@ def stream_data_to_airbyte_message(
|
|
36
36
|
# taken unless configured. See
|
37
37
|
# docs/connector-development/cdk-python/schemas.md for details.
|
38
38
|
transformer.transform(data, schema)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
39
|
+
message = AirbyteRecordMessage(
|
40
|
+
stream=stream_name,
|
41
|
+
data=data,
|
42
|
+
emitted_at=now_millis,
|
43
|
+
file_reference=file_reference,
|
44
|
+
)
|
45
45
|
return AirbyteMessage(type=MessageType.RECORD, record=message)
|
46
46
|
case AirbyteTraceMessage():
|
47
47
|
return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)
|
@@ -198,6 +198,14 @@ def find_template(resource: str, execution_folder: str) -> Dict[str, Any]:
|
|
198
198
|
return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file
|
199
199
|
|
200
200
|
|
201
|
+
def find_binary_response(resource: str, execution_folder: str) -> bytes:
|
202
|
+
response_filepath = str(
|
203
|
+
get_unit_test_folder(execution_folder) / "resource" / "http" / "response" / f"{resource}"
|
204
|
+
)
|
205
|
+
with open(response_filepath, "rb") as response_file:
|
206
|
+
return response_file.read() # type: ignore # we assume the dev correctly set up the resource file
|
207
|
+
|
208
|
+
|
201
209
|
def create_record_builder(
|
202
210
|
response_template: Dict[str, Any],
|
203
211
|
records_path: Union[FieldPath, NestedPath],
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.45.
|
3
|
+
Version: 6.45.8.post2.dev14604759065
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,7 +22,7 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
-
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.
|
25
|
+
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.15,<0.16)
|
26
26
|
Requires-Dist: anyascii (>=0.3.2,<0.4.0)
|
27
27
|
Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
|
28
28
|
Requires-Dist: backoff
|
@@ -1,5 +1,4 @@
|
|
1
1
|
airbyte_cdk/__init__.py,sha256=52uncJvDQNHvwKxaqzXgnMYTptIl65LDJr2fvlk8-DU,11707
|
2
|
-
airbyte_cdk/cli/README.md,sha256=yEZyrSfnExsogR6eXgjx7bnRaDUlNzLflMDz8tD4Avo,1796
|
3
2
|
airbyte_cdk/cli/__init__.py,sha256=Hu-1XT2KDoYjDF7-_ziDwv5bY3PueGjANOCbzeOegDg,57
|
4
3
|
airbyte_cdk/cli/source_declarative_manifest/__init__.py,sha256=-0ST722Nj65bgRokzpzPkD1NBBW5CytEHFUe38cB86Q,91
|
5
4
|
airbyte_cdk/cli/source_declarative_manifest/_run.py,sha256=9qtbjt-I_stGWzWX6yVUKO_eE-Ga7g-uTuibML9qLBs,8330
|
@@ -30,16 +29,15 @@ airbyte_cdk/destinations/vector_db_based/writer.py,sha256=nZ00xPiohElJmYktEZZIhr
|
|
30
29
|
airbyte_cdk/entrypoint.py,sha256=NRJv5BNZRSUEVTmNBa9N7ih6fW5sg4DwL0nkB9kI99Y,18570
|
31
30
|
airbyte_cdk/exception_handler.py,sha256=D_doVl3Dt60ASXlJsfviOCswxGyKF2q0RL6rif3fNks,2013
|
32
31
|
airbyte_cdk/logger.py,sha256=1cURbvawbunCAV178q-XhTHcbAQZTSf07WhU7U9AXWU,3744
|
33
|
-
airbyte_cdk/models/__init__.py,sha256=
|
34
|
-
airbyte_cdk/models/airbyte_protocol.py,sha256=
|
32
|
+
airbyte_cdk/models/__init__.py,sha256=Et9wJWs5VOWynGbb-3aJRhsdAHAiLkNNLxdwqJAuqkw,2114
|
33
|
+
airbyte_cdk/models/airbyte_protocol.py,sha256=oZdKsZ7yPjUt9hvxdWNpxCtgjSV2RWhf4R9Np03sqyY,3613
|
35
34
|
airbyte_cdk/models/airbyte_protocol_serializers.py,sha256=s6SaFB2CMrG_7jTQGn_fhFbQ1FUxhCxf5kq2RWGHMVI,1749
|
36
|
-
airbyte_cdk/models/file_transfer_record_message.py,sha256=J-E-43KOmUFdpsjeKlEfNnnZRSB-Gb5AGZjonR25Drc,323
|
37
35
|
airbyte_cdk/models/well_known_types.py,sha256=EquepbisGPuCSrs_D7YVVnMR9-ShhUr21wnFz3COiJs,156
|
38
36
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
39
37
|
airbyte_cdk/sources/__init__.py,sha256=45J83QsFH3Wky3sVapZWg4C58R_i1thm61M06t2c1AQ,1156
|
40
38
|
airbyte_cdk/sources/abstract_source.py,sha256=50vxEBRByiNhT4WJkiFvgM-C6PWqKSJgvuNC_aeg2cw,15547
|
41
39
|
airbyte_cdk/sources/concurrent_source/__init__.py,sha256=3D_RJsxQfiLboSCDdNei1Iv-msRp3DXsas6E9kl7dXc,386
|
42
|
-
airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py,sha256=
|
40
|
+
airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py,sha256=P_GA5QayzehCf0ksUbEbGoNixBnauzsepv-0ICzhH4w,12691
|
43
41
|
airbyte_cdk/sources/concurrent_source/concurrent_source.py,sha256=P8B6EcLKaSstfAD9kDZsTJ0q8vRmdFrxLt-zOA5_By0,7737
|
44
42
|
airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py,sha256=f9PIRPWn2tXu0-bxVeYHL2vYdqCzZ_kgpHg5_Ep-cfQ,6103
|
45
43
|
airbyte_cdk/sources/concurrent_source/partition_generation_completed_sentinel.py,sha256=z1t-rAZBsqVidv2fpUlPHE9JgyXsITuGk4AMu96mXSQ,696
|
@@ -68,11 +66,11 @@ airbyte_cdk/sources/declarative/checks/check_stream.py,sha256=QeExVmpSYjr_CnghHu
|
|
68
66
|
airbyte_cdk/sources/declarative/checks/connection_checker.py,sha256=MBRJo6WJlZQHpIfOGaNOkkHUmgUl_4wDM6VPo41z5Ss,1383
|
69
67
|
airbyte_cdk/sources/declarative/concurrency_level/__init__.py,sha256=5XUqrmlstYlMM0j6crktlKQwALek0uiz2D3WdM46MyA,191
|
70
68
|
airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py,sha256=YIwCTCpOr_QSNW4ltQK0yUGWInI8PKNY216HOOegYLk,2101
|
71
|
-
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=
|
69
|
+
airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=sEyakwDKLlREsr3RPq5q54hgqqc-MCQgtJzHAULmsHQ,28199
|
72
70
|
airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
73
71
|
airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=_zGNGq31RNy_0QBLt_EcTvgPyhj7urPdx6oA3M5-r3o,3150
|
74
72
|
airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
|
75
|
-
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=
|
73
|
+
airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=Vs1Pa_o1oOS7hlL412PDwNruIfvHsv9u4ttF47VDMYY,160754
|
76
74
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
77
75
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=dCRlddBUSaJmBNBz1pSO1r2rTw8AP5d2_vlmIeGs2gg,10767
|
78
76
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
@@ -90,7 +88,7 @@ airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt
|
|
90
88
|
airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8VwkjS0xD_u350Km3SiYP7hpOOgiLg5o,1169
|
91
89
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
92
90
|
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
|
93
|
-
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=
|
91
|
+
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=Xg4e0exJ5Tq8I346uD2-HjpCsAUFLgPPcNKa0UoHjV8,7178
|
94
92
|
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=WJyA2OYIEgFpVP5Y3o0tIj69AV6IKkn9B16MeXaEItI,6513
|
95
93
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
96
94
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
@@ -115,13 +113,13 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
|
|
115
113
|
airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
|
116
114
|
airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
|
117
115
|
airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
|
118
|
-
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=
|
116
|
+
airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=x4oO-U4Ftfqi0jnOEw4lKU60Cb2x5eDiHSapesNr5iw,114266
|
119
117
|
airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
120
118
|
airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=nlVvHC511NUyDEEIRBkoeDTAvLqKNp-hRy8D19z8tdk,5941
|
121
119
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
122
120
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=4C15MKV-zOrMVQAm4FyohDsrJUBCSpMv5tZw0SK3aeI,9685
|
123
121
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
124
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
122
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=tcpUKJnBlJ3dJmPwb2ukFWlXMADV4BopzTXjUi7KoU0,161821
|
125
123
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
|
126
124
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
127
125
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -180,6 +178,7 @@ airbyte_cdk/sources/declarative/resolvers/config_components_resolver.py,sha256=d
|
|
180
178
|
airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py,sha256=AiojNs8wItJFrENZBFUaDvau3sgwudO6Wkra36upSPo,4639
|
181
179
|
airbyte_cdk/sources/declarative/retrievers/__init__.py,sha256=nQepwG_RfW53sgwvK5dLPqfCx0VjsQ83nYoPjBMAaLM,527
|
182
180
|
airbyte_cdk/sources/declarative/retrievers/async_retriever.py,sha256=6oZtnCHm9NdDvjTSrVwPQOXGSdETSIR7eWH2vFjM7jI,4855
|
181
|
+
airbyte_cdk/sources/declarative/retrievers/file_uploader.py,sha256=J-OjleRStGAGaoPylttyFQwVGHvtFTqdfnS0JYcYUtg,3537
|
183
182
|
airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=XPLs593Xv8c5cKMc37XzUAYmzlXd1a7eSsspM-CMuWA,1696
|
184
183
|
airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=O7qpM71L1_ATIbEKa8y658jdiSJSPw0KmuGKgnaruQU,31008
|
185
184
|
airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
|
@@ -191,7 +190,7 @@ airbyte_cdk/sources/declarative/schema/schema_loader.py,sha256=kjt8v0N5wWKA5zyLn
|
|
191
190
|
airbyte_cdk/sources/declarative/spec/__init__.py,sha256=H0UwoRhgucbKBIzg85AXrifybVmfpwWpPdy22vZKVuo,141
|
192
191
|
airbyte_cdk/sources/declarative/spec/spec.py,sha256=ODSNUgkDOhnLQnwLjgSaME6R3kNeywjROvbNrWEnsgU,1876
|
193
192
|
airbyte_cdk/sources/declarative/stream_slicers/__init__.py,sha256=sI9vhc95RwJYOnA0VKjcbtKgFcmAbWjhdWBXFbAijOs,176
|
194
|
-
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=
|
193
|
+
airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py,sha256=cjKGm4r438dd1GxrFHJ4aYrdzG2bkncnwaWxAwlXR3M,3585
|
195
194
|
airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py,sha256=SOkIPBi2Wu7yxIvA15yFzUAB95a3IzA8LPq5DEqHQQc,725
|
196
195
|
airbyte_cdk/sources/declarative/transformations/__init__.py,sha256=CPJ8TlMpiUmvG3624VYu_NfTzxwKcfBjM2Q2wJ7fkSA,919
|
197
196
|
airbyte_cdk/sources/declarative/transformations/add_fields.py,sha256=Eg1jQtRObgzxbtySTQs5uEZIjEklsoHFxYSPf78x6Ng,5420
|
@@ -225,25 +224,26 @@ airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha2
|
|
225
224
|
airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
|
226
225
|
airbyte_cdk/sources/file_based/file_based_source.py,sha256=Xg8OYWnGc-OcVBglvS08uwAWGWHBhEqsBnyODIkOK-4,20051
|
227
226
|
airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py,sha256=4e7FXqQ9hueacexC0SyrZyjF8oREYHza8pKF9CgKbD8,5050
|
228
|
-
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=
|
227
|
+
airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=rwz8AhEIqYB9gBF7uW9eR--eUiHOntzuwLH8jFHNacE,7854
|
228
|
+
airbyte_cdk/sources/file_based/file_record_data.py,sha256=Vkr5AyZzlsOezjVCLhFrm_WpymlQdolWCnFAwqLJ9Iw,453
|
229
229
|
airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
|
230
230
|
airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=USEYqiICXBWpDV443VtNOCmUA-GINzY_Zah74_5w3qQ,10860
|
231
231
|
airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
|
232
232
|
airbyte_cdk/sources/file_based/file_types/excel_parser.py,sha256=BeplCq0hmojELU6bZCvvpRLpQ9us81TqbGYwrhd3INo,7188
|
233
|
-
airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=
|
233
|
+
airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=5l2Jo6bp6neDmgM427PrZMZeqU0hCIZVWnzUZ_7BT10,1100
|
234
234
|
airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
|
235
235
|
airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
|
236
236
|
airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
|
237
237
|
airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
|
238
238
|
airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
|
239
|
-
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=
|
239
|
+
airbyte_cdk/sources/file_based/schema_helpers.py,sha256=dKXAOTmMI3YmC5u7PeHC9AaZmlL6ft7CYSFQKCg0sXw,9911
|
240
240
|
airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
|
241
241
|
airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=kjvX7nOmUALYd7HuZHilUzgJPZ-MnZ08mtvuBnt2tQ0,618
|
242
242
|
airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py,sha256=vjTlmYT_nqzY3DbT5xem7X-bwgA9RyXHoKFqiMO2URk,1728
|
243
243
|
airbyte_cdk/sources/file_based/stream/__init__.py,sha256=q_zmeOHHg0JK5j1YNSOIsyXGz-wlTl_0E8z5GKVAcVM,543
|
244
244
|
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=9pQh3BHYcxm8CRC8XawfmBxL8O9HggpWwCCbX_ncINE,7509
|
245
245
|
airbyte_cdk/sources/file_based/stream/concurrent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
246
|
-
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=
|
246
|
+
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=1AIuAOHa_M6zN9l0eAWBHwhKl4fdP4-KlUMOMzTv11U,13525
|
247
247
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py,sha256=Rx7TwjH8B7e0eee83Tlqxv1bWn-BVXOmlUAH7auM1uM,344
|
248
248
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py,sha256=5dYZMLBEbvCyrCT89lCYdm2FdrLPLuxjdpQSVGP5o0w,1856
|
249
249
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py,sha256=gRTL-9I3ejjQOpLKd6ixe9rB3kGlubCdhUt9ri6AdAI,14880
|
@@ -251,9 +251,9 @@ airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_c
|
|
251
251
|
airbyte_cdk/sources/file_based/stream/cursor/__init__.py,sha256=MhFB5hOo8sjwvCh8gangaymdg3EJWYt_72brFOZt068,191
|
252
252
|
airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py,sha256=om-x3gZFPgWDpi15S9RxZmR36VHnk8sytgN6LlBQhAw,1934
|
253
253
|
airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py,sha256=VGV7xLyBribuBMVrXtO1xqkWJD86bl7yhXtjnwLMohM,7051
|
254
|
-
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=
|
254
|
+
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=gzSN4cLywwzo_U1OdFS3Of_-4DRkUcX_j7Mv30MrxQs,17154
|
255
255
|
airbyte_cdk/sources/file_based/stream/identities_stream.py,sha256=FZH83Geoy3K3nwUk2VVNJERFcXUTnl-4XljjucUM23s,1893
|
256
|
-
airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=
|
256
|
+
airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py,sha256=6KxqdD3-VvwxDTk7TtZ0M32fga4CI3qZ9IKdAkySpx0,3844
|
257
257
|
airbyte_cdk/sources/file_based/types.py,sha256=INxG7OPnkdUP69oYNKMAbwhvV1AGvLRHs1J6pIia2FI,218
|
258
258
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
259
259
|
airbyte_cdk/sources/http_logger.py,sha256=H93kPAujHhPmXNX0JSFG3D-SL6yEFA5PtKot9Hu3TYA,1690
|
@@ -279,7 +279,7 @@ airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2Uff
|
|
279
279
|
airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
|
280
280
|
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=LFXbKBEMtNSVz_kZs9qydS9fPvzTU5wdgXRagRRJeHo,21388
|
281
281
|
airbyte_cdk/sources/streams/concurrent/cursor_types.py,sha256=ZyWLPpeLX1qXcP5MwS-wxK11IBMsnVPCw9zx8gA2_Ro,843
|
282
|
-
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=
|
282
|
+
airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=3SBjFa1z955pSE_2qt1C7mAky-RKjOZeQDePbZkWYYs,3371
|
283
283
|
airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
|
284
284
|
airbyte_cdk/sources/streams/concurrent/helpers.py,sha256=S6AW8TgIASCZ2UuUcQLE8OzgYUHWt2-KPOvNPwnQf-Q,1596
|
285
285
|
airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py,sha256=2t64b_z9cEPmlHZnjSiMTO8PEtEdiAJDG0JcYOtUqAE,3363
|
@@ -316,10 +316,11 @@ airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py,sha256=gVLo7nU-OR
|
|
316
316
|
airbyte_cdk/sources/streams/http/requests_native_auth/token.py,sha256=h5PTzcdH-RQLeCg7xZ45w_484OPUDSwNWl_iMJQmZoI,2526
|
317
317
|
airbyte_cdk/sources/streams/permissions/identities_stream.py,sha256=9O9k6k18Xm3Zsiw_vnI_jsHXfMCQiek6V-jMkJJLxn8,2621
|
318
318
|
airbyte_cdk/sources/streams/utils/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
319
|
-
airbyte_cdk/sources/types.py,sha256=
|
319
|
+
airbyte_cdk/sources/types.py,sha256=1oerHQpidFrIluUhWsM3-4Xst4wwUDqzkgtgZY0jjZU,5485
|
320
320
|
airbyte_cdk/sources/utils/__init__.py,sha256=TTN6VUxVy6Is8BhYQZR5pxJGQh8yH4duXh4O1TiMiEY,118
|
321
321
|
airbyte_cdk/sources/utils/casing.py,sha256=QC-gV1O4e8DR4-bhdXieUPKm_JamzslVyfABLYYRSXA,256
|
322
|
-
airbyte_cdk/sources/utils/
|
322
|
+
airbyte_cdk/sources/utils/files_directory.py,sha256=z8Dmr-wkL1sAqdwCST4MBUFAyMHPD2cJIzVdAuCynp8,391
|
323
|
+
airbyte_cdk/sources/utils/record_helper.py,sha256=7wL-pDYrBpcmZHa8ORtiSOqBZJEZI5hdl2dA1RYiatk,2029
|
323
324
|
airbyte_cdk/sources/utils/schema_helpers.py,sha256=bR3I70-e11S6B8r6VK-pthQXtcYrXojgXFvuK7lRrpg,8545
|
324
325
|
airbyte_cdk/sources/utils/slice_logger.py,sha256=qWWeFLAvigFz0b4O1_O3QDM1cy8PqZAMMgVPR2hEeb8,1778
|
325
326
|
airbyte_cdk/sources/utils/transform.py,sha256=0LOvIJg1vmg_70AiAVe-YHMr-LHrqEuxg9cm1BnYPDM,11725
|
@@ -343,7 +344,7 @@ airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBET
|
|
343
344
|
airbyte_cdk/test/mock_http/mocker.py,sha256=XgsjMtVoeMpRELPyALgrkHFauH9H5irxrz1Kcxh2yFY,8013
|
344
345
|
airbyte_cdk/test/mock_http/request.py,sha256=tdB8cqk2vLgCDTOKffBKsM06llYs4ZecgtH6DKyx6yY,4112
|
345
346
|
airbyte_cdk/test/mock_http/response.py,sha256=s4-cQQqTtmeej0pQDWqmG0vUWpHS-93lIWMpW3zSVyU,662
|
346
|
-
airbyte_cdk/test/mock_http/response_builder.py,sha256=
|
347
|
+
airbyte_cdk/test/mock_http/response_builder.py,sha256=F-v7ebftqGj7YVIMLKdodmU9U8Dq8aIyllWGo2NGwHc,8331
|
347
348
|
airbyte_cdk/test/standard_tests/__init__.py,sha256=YS2bghoGmQ-4GNIbe6RuEmvV-V1kpM1OyxTpebrs0Ig,1338
|
348
349
|
airbyte_cdk/test/standard_tests/_job_runner.py,sha256=d2JkwxJilYIJNmyVH946YMn8x1pnP3JaNT865V8vZzQ,5820
|
349
350
|
airbyte_cdk/test/standard_tests/connector_base.py,sha256=HGdDqLq8cCdBJ5T2s92PdN5miD2Vs_HczWOUbojAebY,5618
|
@@ -376,9 +377,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
376
377
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
377
378
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
378
379
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
379
|
-
airbyte_cdk-6.45.
|
380
|
-
airbyte_cdk-6.45.
|
381
|
-
airbyte_cdk-6.45.
|
382
|
-
airbyte_cdk-6.45.
|
383
|
-
airbyte_cdk-6.45.
|
384
|
-
airbyte_cdk-6.45.
|
380
|
+
airbyte_cdk-6.45.8.post2.dev14604759065.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
381
|
+
airbyte_cdk-6.45.8.post2.dev14604759065.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
382
|
+
airbyte_cdk-6.45.8.post2.dev14604759065.dist-info/METADATA,sha256=AI1_qt0J-_k2u08yhSHEZ8iAff6q26cJO4EX7W7UYJI,6134
|
383
|
+
airbyte_cdk-6.45.8.post2.dev14604759065.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
384
|
+
airbyte_cdk-6.45.8.post2.dev14604759065.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
385
|
+
airbyte_cdk-6.45.8.post2.dev14604759065.dist-info/RECORD,,
|
airbyte_cdk/cli/README.md
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
# Source Declarative Manifest CLI Usage Guide
|
2
|
-
|
3
|
-
This guide explains how to install and use the Source Declarative Manifest (SDM) CLI tool for Airbyte connector development.
|
4
|
-
|
5
|
-
## Installation
|
6
|
-
|
7
|
-
### Standard Installation
|
8
|
-
|
9
|
-
```bash
|
10
|
-
pipx install airbyte-cdk
|
11
|
-
```
|
12
|
-
|
13
|
-
If you encounter an error related to a missing `distutils` module, verify that you are running Python version `<=3.11` and try running:
|
14
|
-
|
15
|
-
```bash
|
16
|
-
python -m pipx install airbyte-cdk
|
17
|
-
```
|
18
|
-
|
19
|
-
## Using the CLI
|
20
|
-
|
21
|
-
The SDM CLI follows standard Airbyte connector command patterns:
|
22
|
-
|
23
|
-
```bash
|
24
|
-
source-declarative-manifest [command] --config /path/to/config.json
|
25
|
-
```
|
26
|
-
|
27
|
-
Where [command] can be:
|
28
|
-
|
29
|
-
spec - Show connector specification
|
30
|
-
check - Verify connection to the source
|
31
|
-
discover - List available streams
|
32
|
-
read - Read data from streams
|
33
|
-
|
34
|
-
:::caution
|
35
|
-
When developing locally (outside a Docker container), the CLI operates in "remote manifest mode" and expects your manifest to be included in your configuration file.
|
36
|
-
:::
|
37
|
-
|
38
|
-
### Steps for Local Testing
|
39
|
-
|
40
|
-
1. Convert your manifest from YAML to JSON
|
41
|
-
|
42
|
-
Your manifest is defined in YAML, but must be converted to JSON for the config file. You can use an [online tool](https://onlineyamltools.com/convert-yaml-to-json) to do so.
|
43
|
-
|
44
|
-
Create a config file that includes both your config parameters AND the manifest. Add your entire manifest as a JSON object under the `__injected_declarative_manifest` key
|
45
|
-
|
46
|
-
Example:
|
47
|
-
|
48
|
-
```json
|
49
|
-
{
|
50
|
-
"api_key": "very_secret_key",
|
51
|
-
"start_time": "04:20",
|
52
|
-
"__injected_declarative_manifest": {
|
53
|
-
// Insert the JSON version of your manifest here
|
54
|
-
}
|
55
|
-
}
|
56
|
-
```
|
57
|
-
|
58
|
-
2. Run the command against your config file
|
59
|
-
|
60
|
-
```bash
|
61
|
-
source-declarative-manifest check --config /relative/path/to/config.json
|
62
|
-
source-declarative-manifest read --config /relative/path/to/config.json --catalog /relative/path/to/catalog.json
|
63
|
-
```
|
@@ -1,13 +0,0 @@
|
|
1
|
-
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
-
|
3
|
-
from dataclasses import dataclass
|
4
|
-
from typing import Any, Dict, Optional
|
5
|
-
|
6
|
-
|
7
|
-
@dataclass
|
8
|
-
class AirbyteFileTransferRecordMessage:
|
9
|
-
stream: str
|
10
|
-
file: Dict[str, Any]
|
11
|
-
emitted_at: int
|
12
|
-
namespace: Optional[str] = None
|
13
|
-
data: Optional[Dict[str, Any]] = None
|
File without changes
|
File without changes
|
File without changes
|