airbyte-cdk 6.45.4__py3-none-any.whl → 6.45.4.post14.dev14544463167__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. airbyte_cdk/models/__init__.py +1 -0
  2. airbyte_cdk/models/airbyte_protocol.py +1 -3
  3. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
  4. airbyte_cdk/sources/declarative/auth/oauth.py +2 -2
  5. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -0
  6. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +36 -0
  7. airbyte_cdk/sources/declarative/extractors/record_selector.py +6 -1
  8. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +31 -0
  9. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +39 -1
  10. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +93 -0
  11. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +9 -4
  12. airbyte_cdk/sources/file_based/file_based_stream_reader.py +38 -16
  13. airbyte_cdk/sources/file_based/file_record_data.py +23 -0
  14. airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -15
  15. airbyte_cdk/sources/file_based/schema_helpers.py +11 -1
  16. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -12
  17. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +15 -38
  18. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +1 -3
  19. airbyte_cdk/sources/streams/concurrent/default_stream.py +3 -0
  20. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +28 -11
  21. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +4 -27
  22. airbyte_cdk/sources/types.py +11 -2
  23. airbyte_cdk/sources/utils/files_directory.py +15 -0
  24. airbyte_cdk/sources/utils/record_helper.py +8 -8
  25. airbyte_cdk/test/entrypoint_wrapper.py +4 -0
  26. airbyte_cdk/test/mock_http/response_builder.py +8 -0
  27. airbyte_cdk/test/standard_tests/__init__.py +46 -0
  28. airbyte_cdk/test/standard_tests/_job_runner.py +159 -0
  29. airbyte_cdk/test/standard_tests/connector_base.py +148 -0
  30. airbyte_cdk/test/standard_tests/declarative_sources.py +92 -0
  31. airbyte_cdk/test/standard_tests/destination_base.py +16 -0
  32. airbyte_cdk/test/standard_tests/models/__init__.py +7 -0
  33. airbyte_cdk/test/standard_tests/models/scenario.py +74 -0
  34. airbyte_cdk/test/standard_tests/pytest_hooks.py +61 -0
  35. airbyte_cdk/test/standard_tests/source_base.py +140 -0
  36. {airbyte_cdk-6.45.4.dist-info → airbyte_cdk-6.45.4.post14.dev14544463167.dist-info}/METADATA +3 -2
  37. {airbyte_cdk-6.45.4.dist-info → airbyte_cdk-6.45.4.post14.dev14544463167.dist-info}/RECORD +41 -30
  38. airbyte_cdk/models/file_transfer_record_message.py +0 -13
  39. {airbyte_cdk-6.45.4.dist-info → airbyte_cdk-6.45.4.post14.dev14544463167.dist-info}/LICENSE.txt +0 -0
  40. {airbyte_cdk-6.45.4.dist-info → airbyte_cdk-6.45.4.post14.dev14544463167.dist-info}/LICENSE_SHORT +0 -0
  41. {airbyte_cdk-6.45.4.dist-info → airbyte_cdk-6.45.4.post14.dev14544463167.dist-info}/WHEEL +0 -0
  42. {airbyte_cdk-6.45.4.dist-info → airbyte_cdk-6.45.4.post14.dev14544463167.dist-info}/entry_points.txt +0 -0
@@ -19,6 +19,7 @@ from .airbyte_protocol import (
19
19
  AirbyteMessage,
20
20
  AirbyteProtocol,
21
21
  AirbyteRecordMessage,
22
+ AirbyteRecordMessageFileReference,
22
23
  AirbyteStateBlob,
23
24
  AirbyteStateMessage,
24
25
  AirbyteStateStats,
@@ -8,8 +8,6 @@ from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
8
8
  from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
9
9
  from serpyco_rs.metadata import Alias
10
10
 
11
- from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
12
-
13
11
  # ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
14
12
 
15
13
 
@@ -84,7 +82,7 @@ class AirbyteMessage:
84
82
  spec: Optional[ConnectorSpecification] = None # type: ignore [name-defined]
85
83
  connectionStatus: Optional[AirbyteConnectionStatus] = None # type: ignore [name-defined]
86
84
  catalog: Optional[AirbyteCatalog] = None # type: ignore [name-defined]
87
- record: Optional[Union[AirbyteFileTransferRecordMessage, AirbyteRecordMessage]] = None # type: ignore [name-defined]
85
+ record: Optional[AirbyteRecordMessage] = None # type: ignore [name-defined]
88
86
  state: Optional[AirbyteStateMessage] = None
89
87
  trace: Optional[AirbyteTraceMessage] = None # type: ignore [name-defined]
90
88
  control: Optional[AirbyteControlMessage] = None # type: ignore [name-defined]
@@ -149,7 +149,7 @@ class ConcurrentReadProcessor:
149
149
  message = stream_data_to_airbyte_message(
150
150
  stream_name=record.stream_name,
151
151
  data_or_message=record.data,
152
- is_file_transfer_message=record.is_file_transfer_message,
152
+ file_reference=record.file_reference,
153
153
  )
154
154
  stream = self._stream_name_to_instance[record.stream_name]
155
155
 
@@ -239,8 +239,8 @@ class DeclarativeOauth2Authenticator(AbstractOauth2Authenticator, DeclarativeAut
239
239
  def _has_access_token_been_initialized(self) -> bool:
240
240
  return self._access_token is not None
241
241
 
242
- def set_token_expiry_date(self, value: Union[str, int]) -> None:
243
- self._token_expiry_date = self._parse_token_expiration_date(value)
242
+ def set_token_expiry_date(self, value: AirbyteDateTime) -> None:
243
+ self._token_expiry_date = value
244
244
 
245
245
  def get_assertion_name(self) -> str:
246
246
  return self.assertion_name
@@ -25,6 +25,7 @@ from airbyte_cdk.sources.declarative.incremental.per_partition_with_global impor
25
25
  PerPartitionWithGlobalCursor,
26
26
  )
27
27
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
28
+ from airbyte_cdk.sources.declarative.models import FileUploader
28
29
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
29
30
  ConcurrencyLevel as ConcurrencyLevelModel,
30
31
  )
@@ -206,6 +207,10 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
206
207
  # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
207
208
  # so we need to treat them as synchronous
208
209
 
210
+ supports_file_transfer = (
211
+ "file_uploader" in name_to_stream_mapping[declarative_stream.name]
212
+ )
213
+
209
214
  if (
210
215
  isinstance(declarative_stream, DeclarativeStream)
211
216
  and name_to_stream_mapping[declarative_stream.name]["type"]
@@ -322,6 +327,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
322
327
  else None,
323
328
  logger=self.logger,
324
329
  cursor=cursor,
330
+ supports_file_transfer=supports_file_transfer,
325
331
  )
326
332
  )
327
333
  elif (
@@ -353,6 +359,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
353
359
  cursor_field=None,
354
360
  logger=self.logger,
355
361
  cursor=final_state_cursor,
362
+ supports_file_transfer=supports_file_transfer,
356
363
  )
357
364
  )
358
365
  elif (
@@ -406,6 +413,7 @@ class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
406
413
  cursor_field=perpartition_cursor.cursor_field.cursor_field_key,
407
414
  logger=self.logger,
408
415
  cursor=perpartition_cursor,
416
+ supports_file_transfer=supports_file_transfer,
409
417
  )
410
418
  )
411
419
  else:
@@ -1448,6 +1448,42 @@ definitions:
1448
1448
  - "$ref": "#/definitions/LegacyToPerPartitionStateMigration"
1449
1449
  - "$ref": "#/definitions/CustomStateMigration"
1450
1450
  default: []
1451
+ file_uploader:
1452
+ title: File Uploader
1453
+ description: (experimental) Describes how to fetch a file
1454
+ type: object
1455
+ required:
1456
+ - type
1457
+ - requester
1458
+ - download_target_extractor
1459
+ properties:
1460
+ type:
1461
+ type: string
1462
+ enum: [ FileUploader ]
1463
+ requester:
1464
+ description: Requester component that describes how to prepare HTTP requests to send to the source API.
1465
+ anyOf:
1466
+ - "$ref": "#/definitions/CustomRequester"
1467
+ - "$ref": "#/definitions/HttpRequester"
1468
+ download_target_extractor:
1469
+ description: Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response
1470
+ anyOf:
1471
+ - "$ref": "#/definitions/CustomRecordExtractor"
1472
+ - "$ref": "#/definitions/DpathExtractor"
1473
+ file_extractor:
1474
+ description: Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content
1475
+ anyOf:
1476
+ - "$ref": "#/definitions/CustomRecordExtractor"
1477
+ - "$ref": "#/definitions/DpathExtractor"
1478
+ filename_extractor:
1479
+ description: Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.
1480
+ type: string
1481
+ interpolation_context:
1482
+ - config
1483
+ - record
1484
+ examples:
1485
+ - "{{ record.id }}/{{ record.file_name }}/"
1486
+ - "{{ record.id }}_{{ record.file_name }}/"
1451
1487
  $parameters:
1452
1488
  type: object
1453
1489
  additional_properties: true
@@ -15,6 +15,7 @@ from airbyte_cdk.sources.declarative.extractors.type_transformer import (
15
15
  )
16
16
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
17
17
  from airbyte_cdk.sources.declarative.models import SchemaNormalization
18
+ from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
18
19
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
19
20
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
20
21
  from airbyte_cdk.sources.utils.transform import TypeTransformer
@@ -42,6 +43,7 @@ class RecordSelector(HttpSelector):
42
43
  record_filter: Optional[RecordFilter] = None
43
44
  transformations: List[RecordTransformation] = field(default_factory=lambda: [])
44
45
  transform_before_filtering: bool = False
46
+ file_uploader: Optional[FileUploader] = None
45
47
 
46
48
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
47
49
  self._parameters = parameters
@@ -117,7 +119,10 @@ class RecordSelector(HttpSelector):
117
119
  transformed_filtered_data, schema=records_schema
118
120
  )
119
121
  for data in normalized_data:
120
- yield Record(data=data, stream_name=self.name, associated_slice=stream_slice)
122
+ record = Record(data=data, stream_name=self.name, associated_slice=stream_slice)
123
+ if self.file_uploader:
124
+ self.file_uploader.upload(record)
125
+ yield record
121
126
 
122
127
  def _normalize_by_schema(
123
128
  self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
@@ -2066,6 +2066,31 @@ class SelectiveAuthenticator(BaseModel):
2066
2066
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2067
2067
 
2068
2068
 
2069
+ class FileUploader(BaseModel):
2070
+ type: Literal["FileUploader"]
2071
+ requester: Union[CustomRequester, HttpRequester] = Field(
2072
+ ...,
2073
+ description="Requester component that describes how to prepare HTTP requests to send to the source API.",
2074
+ )
2075
+ download_target_extractor: Union[CustomRecordExtractor, DpathExtractor] = Field(
2076
+ ...,
2077
+ description="Responsible for fetching the url where the file is located. This is applied on each records and not on the HTTP response",
2078
+ )
2079
+ file_extractor: Optional[Union[CustomRecordExtractor, DpathExtractor]] = Field(
2080
+ None,
2081
+ description="Responsible for fetching the content of the file. If not defined, the assumption is that the whole response body is the file content",
2082
+ )
2083
+ filename_extractor: Optional[str] = Field(
2084
+ None,
2085
+ description="Defines the name to store the file. Stream name is automatically added to the file path. File unique ID can be used to avoid overwriting files. Random UUID will be used if the extractor is not provided.",
2086
+ examples=[
2087
+ "{{ record.id }}/{{ record.file_name }}/",
2088
+ "{{ record.id }}_{{ record.file_name }}/",
2089
+ ],
2090
+ )
2091
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2092
+
2093
+
2069
2094
  class DeclarativeStream(BaseModel):
2070
2095
  class Config:
2071
2096
  extra = Extra.allow
@@ -2124,6 +2149,11 @@ class DeclarativeStream(BaseModel):
2124
2149
  description="Array of state migrations to be applied on the input state",
2125
2150
  title="State Migrations",
2126
2151
  )
2152
+ file_uploader: Optional[FileUploader] = Field(
2153
+ None,
2154
+ description="(experimental) Describes how to fetch a file",
2155
+ title="File Uploader",
2156
+ )
2127
2157
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
2128
2158
 
2129
2159
 
@@ -2617,6 +2647,7 @@ CompositeErrorHandler.update_forward_refs()
2617
2647
  DeclarativeSource1.update_forward_refs()
2618
2648
  DeclarativeSource2.update_forward_refs()
2619
2649
  SelectiveAuthenticator.update_forward_refs()
2650
+ FileUploader.update_forward_refs()
2620
2651
  DeclarativeStream.update_forward_refs()
2621
2652
  SessionTokenAuthenticator.update_forward_refs()
2622
2653
  DynamicSchemaLoader.update_forward_refs()
@@ -106,7 +106,6 @@ from airbyte_cdk.sources.declarative.migrations.legacy_to_per_partition_state_mi
106
106
  )
107
107
  from airbyte_cdk.sources.declarative.models import (
108
108
  CustomStateMigration,
109
- GzipDecoder,
110
109
  )
111
110
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
112
111
  AddedFieldDefinition as AddedFieldDefinitionModel,
@@ -228,6 +227,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
228
227
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
229
228
  ExponentialBackoffStrategy as ExponentialBackoffStrategyModel,
230
229
  )
230
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
231
+ FileUploader as FileUploaderModel,
232
+ )
231
233
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
232
234
  FixedWindowCallRatePolicy as FixedWindowCallRatePolicyModel,
233
235
  )
@@ -479,6 +481,7 @@ from airbyte_cdk.sources.declarative.retrievers import (
479
481
  SimpleRetriever,
480
482
  SimpleRetrieverTestReadDecorator,
481
483
  )
484
+ from airbyte_cdk.sources.declarative.retrievers.file_uploader import FileUploader
482
485
  from airbyte_cdk.sources.declarative.schema import (
483
486
  ComplexFieldType,
484
487
  DefaultSchemaLoader,
@@ -676,6 +679,7 @@ class ModelToComponentFactory:
676
679
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
677
680
  ZipfileDecoderModel: self.create_zipfile_decoder,
678
681
  HTTPAPIBudgetModel: self.create_http_api_budget,
682
+ FileUploaderModel: self.create_file_uploader,
679
683
  FixedWindowCallRatePolicyModel: self.create_fixed_window_call_rate_policy,
680
684
  MovingWindowCallRatePolicyModel: self.create_moving_window_call_rate_policy,
681
685
  UnlimitedCallRatePolicyModel: self.create_unlimited_call_rate_policy,
@@ -1838,6 +1842,11 @@ class ModelToComponentFactory:
1838
1842
  transformations.append(
1839
1843
  self._create_component_from_model(model=transformation_model, config=config)
1840
1844
  )
1845
+ file_uploader = None
1846
+ if model.file_uploader:
1847
+ file_uploader = self._create_component_from_model(
1848
+ model=model.file_uploader, config=config
1849
+ )
1841
1850
 
1842
1851
  retriever = self._create_component_from_model(
1843
1852
  model=model.retriever,
@@ -1849,6 +1858,7 @@ class ModelToComponentFactory:
1849
1858
  stop_condition_on_cursor=stop_condition_on_cursor,
1850
1859
  client_side_incremental_sync=client_side_incremental_sync,
1851
1860
  transformations=transformations,
1861
+ file_uploader=file_uploader,
1852
1862
  incremental_sync=model.incremental_sync,
1853
1863
  )
1854
1864
  cursor_field = model.incremental_sync.cursor_field if model.incremental_sync else None
@@ -2794,6 +2804,7 @@ class ModelToComponentFactory:
2794
2804
  transformations: List[RecordTransformation] | None = None,
2795
2805
  decoder: Decoder | None = None,
2796
2806
  client_side_incremental_sync: Dict[str, Any] | None = None,
2807
+ file_uploader: Optional[FileUploader] = None,
2797
2808
  **kwargs: Any,
2798
2809
  ) -> RecordSelector:
2799
2810
  extractor = self._create_component_from_model(
@@ -2831,6 +2842,7 @@ class ModelToComponentFactory:
2831
2842
  config=config,
2832
2843
  record_filter=record_filter,
2833
2844
  transformations=transformations or [],
2845
+ file_uploader=file_uploader,
2834
2846
  schema_normalization=schema_normalization,
2835
2847
  parameters=model.parameters or {},
2836
2848
  transform_before_filtering=transform_before_filtering,
@@ -2888,6 +2900,7 @@ class ModelToComponentFactory:
2888
2900
  stop_condition_on_cursor: bool = False,
2889
2901
  client_side_incremental_sync: Optional[Dict[str, Any]] = None,
2890
2902
  transformations: List[RecordTransformation],
2903
+ file_uploader: Optional[FileUploader] = None,
2891
2904
  incremental_sync: Optional[
2892
2905
  Union[
2893
2906
  IncrementingCountCursorModel, DatetimeBasedCursorModel, CustomIncrementalSyncModel
@@ -2908,6 +2921,7 @@ class ModelToComponentFactory:
2908
2921
  decoder=decoder,
2909
2922
  transformations=transformations,
2910
2923
  client_side_incremental_sync=client_side_incremental_sync,
2924
+ file_uploader=file_uploader,
2911
2925
  )
2912
2926
 
2913
2927
  query_properties: Optional[QueryProperties] = None
@@ -3574,6 +3588,30 @@ class ModelToComponentFactory:
3574
3588
  matchers=matchers,
3575
3589
  )
3576
3590
 
3591
+ def create_file_uploader(
3592
+ self, model: FileUploaderModel, config: Config, **kwargs: Any
3593
+ ) -> FileUploader:
3594
+ name = "File Uploader"
3595
+ requester = self._create_component_from_model(
3596
+ model=model.requester,
3597
+ config=config,
3598
+ name=name,
3599
+ **kwargs,
3600
+ )
3601
+ download_target_extractor = self._create_component_from_model(
3602
+ model=model.download_target_extractor,
3603
+ config=config,
3604
+ name=name,
3605
+ **kwargs,
3606
+ )
3607
+ return FileUploader(
3608
+ requester=requester,
3609
+ download_target_extractor=download_target_extractor,
3610
+ config=config,
3611
+ parameters=model.parameters or {},
3612
+ filename_extractor=model.filename_extractor if model.filename_extractor else None,
3613
+ )
3614
+
3577
3615
  def create_moving_window_call_rate_policy(
3578
3616
  self, model: MovingWindowCallRatePolicyModel, config: Config, **kwargs: Any
3579
3617
  ) -> MovingWindowCallRatePolicy:
@@ -0,0 +1,93 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import json
6
+ import logging
7
+ import uuid
8
+ from dataclasses import InitVar, dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Mapping, Optional, Union
11
+
12
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
13
+ from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
14
+ from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
15
+ InterpolatedString,
16
+ )
17
+ from airbyte_cdk.sources.declarative.partition_routers.substream_partition_router import (
18
+ SafeResponse,
19
+ )
20
+ from airbyte_cdk.sources.declarative.requesters import Requester
21
+ from airbyte_cdk.sources.declarative.types import Record, StreamSlice
22
+ from airbyte_cdk.sources.types import Config
23
+ from airbyte_cdk.sources.utils.files_directory import get_files_directory
24
+
25
+ logger = logging.getLogger("airbyte")
26
+
27
+
28
+ @dataclass
29
+ class FileUploader:
30
+ requester: Requester
31
+ download_target_extractor: RecordExtractor
32
+ config: Config
33
+ parameters: InitVar[Mapping[str, Any]]
34
+
35
+ filename_extractor: Optional[Union[InterpolatedString, str]] = None
36
+ content_extractor: Optional[RecordExtractor] = None
37
+
38
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
39
+ if self.filename_extractor:
40
+ self.filename_extractor = InterpolatedString.create(
41
+ self.filename_extractor,
42
+ parameters=parameters,
43
+ )
44
+
45
+ def upload(self, record: Record) -> None:
46
+ mocked_response = SafeResponse()
47
+ mocked_response.content = json.dumps(record.data).encode()
48
+ download_targets = list(self.download_target_extractor.extract_records(mocked_response))
49
+ if not download_targets:
50
+ raise ValueError("No download targets found")
51
+
52
+ download_target = download_targets[0] # we just expect one download target
53
+ if not isinstance(download_target, str):
54
+ raise ValueError(
55
+ f"download_target is expected to be a str but was {type(download_target)}: {download_target}"
56
+ )
57
+
58
+ response = self.requester.send_request(
59
+ stream_slice=StreamSlice(
60
+ partition={}, cursor_slice={}, extra_fields={"download_target": download_target}
61
+ ),
62
+ )
63
+
64
+ if self.content_extractor:
65
+ raise NotImplementedError("TODO")
66
+ else:
67
+ files_directory = Path(get_files_directory())
68
+
69
+ file_name = (
70
+ self.filename_extractor.eval(self.config, record=record)
71
+ if self.filename_extractor
72
+ else str(uuid.uuid4())
73
+ )
74
+ file_name = file_name.lstrip("/")
75
+ file_relative_path = Path(record.stream_name) / Path(file_name)
76
+
77
+ full_path = files_directory / file_relative_path
78
+ full_path.parent.mkdir(parents=True, exist_ok=True)
79
+
80
+ with open(str(full_path), "wb") as f:
81
+ f.write(response.content)
82
+ file_size_bytes = full_path.stat().st_size
83
+
84
+ logger.info("File uploaded successfully")
85
+ logger.info(f"File url: {str(full_path)}")
86
+ logger.info(f"File size: {file_size_bytes / 1024} KB")
87
+ logger.info(f"File relative path: {str(file_relative_path)}")
88
+
89
+ record.file_reference = AirbyteRecordMessageFileReference(
90
+ staging_file_url=str(full_path),
91
+ source_file_relative_path=str(file_relative_path),
92
+ file_size_bytes=file_size_bytes,
93
+ )
@@ -58,11 +58,16 @@ class DeclarativePartition(Partition):
58
58
  def read(self) -> Iterable[Record]:
59
59
  for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
60
60
  if isinstance(stream_data, Mapping):
61
- yield Record(
62
- data=stream_data,
63
- stream_name=self.stream_name(),
64
- associated_slice=self._stream_slice,
61
+ record = (
62
+ stream_data
63
+ if isinstance(stream_data, Record)
64
+ else Record(
65
+ data=stream_data,
66
+ stream_name=self.stream_name(),
67
+ associated_slice=self._stream_slice,
68
+ )
65
69
  )
70
+ yield record
66
71
  else:
67
72
  self._message_repository.emit_message(stream_data)
68
73
 
@@ -8,16 +8,18 @@ from datetime import datetime
8
8
  from enum import Enum
9
9
  from io import IOBase
10
10
  from os import makedirs, path
11
- from typing import Any, Dict, Iterable, List, Optional, Set
11
+ from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
12
12
 
13
13
  from wcmatch.glob import GLOBSTAR, globmatch
14
14
 
15
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
15
16
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
16
17
  from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
17
18
  include_identities_stream,
18
19
  preserve_directory_structure,
19
20
  use_file_transfer,
20
21
  )
22
+ from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
21
23
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
22
24
 
23
25
 
@@ -28,6 +30,10 @@ class FileReadMode(Enum):
28
30
 
29
31
  class AbstractFileBasedStreamReader(ABC):
30
32
  DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
33
+ FILE_RELATIVE_PATH = "file_relative_path"
34
+ FILE_NAME = "file_name"
35
+ LOCAL_FILE_PATH = "local_file_path"
36
+ FILE_FOLDER = "file_folder"
31
37
 
32
38
  def __init__(self) -> None:
33
39
  self._config = None
@@ -148,9 +154,9 @@ class AbstractFileBasedStreamReader(ABC):
148
154
  return False
149
155
 
150
156
  @abstractmethod
151
- def get_file(
157
+ def upload(
152
158
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
153
- ) -> Dict[str, Any]:
159
+ ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
154
160
  """
155
161
  This is required for connectors that will support writing to
156
162
  files. It will handle the logic to download,get,read,acquire or
@@ -162,25 +168,41 @@ class AbstractFileBasedStreamReader(ABC):
162
168
  logger (logging.Logger): Logger for logging information and errors.
163
169
 
164
170
  Returns:
165
- dict: A dictionary containing the following:
166
- - "file_url" (str): The absolute path of the downloaded file.
167
- - "bytes" (int): The file size in bytes.
168
- - "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as
169
- this a mounted volume in the pod container.
170
-
171
+ AirbyteRecordMessageFileReference: A file reference object containing:
172
+ - staging_file_url (str): The absolute path to the referenced file in the staging area.
173
+ - file_size_bytes (int): The size of the referenced file in bytes.
174
+ - source_file_relative_path (str): The relative path to the referenced file in source.
171
175
  """
172
176
  ...
173
177
 
174
- def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
178
+ def _get_file_transfer_paths(
179
+ self, source_file_relative_path: str, staging_directory: str
180
+ ) -> MutableMapping[str, Any]:
181
+ """
182
+ This method is used to get the file transfer paths for a given source file relative path and local directory.
183
+ It returns a dictionary with the following keys:
184
+ - FILE_RELATIVE_PATH: The relative path to file in reference to the staging directory.
185
+ - LOCAL_FILE_PATH: The absolute path to the file.
186
+ - FILE_NAME: The name of the referenced file.
187
+ - FILE_FOLDER: The folder of the referenced file.
188
+ """
175
189
  preserve_directory_structure = self.preserve_directory_structure()
190
+
191
+ file_name = path.basename(source_file_relative_path)
192
+ file_folder = path.dirname(source_file_relative_path)
176
193
  if preserve_directory_structure:
177
194
  # Remove left slashes from source path format to make relative path for writing locally
178
- file_relative_path = file.uri.lstrip("/")
195
+ file_relative_path = source_file_relative_path.lstrip("/")
179
196
  else:
180
- file_relative_path = path.basename(file.uri)
181
- local_file_path = path.join(local_directory, file_relative_path)
182
-
197
+ file_relative_path = file_name
198
+ local_file_path = path.join(staging_directory, file_relative_path)
183
199
  # Ensure the local directory exists
184
200
  makedirs(path.dirname(local_file_path), exist_ok=True)
185
- absolute_file_path = path.abspath(local_file_path)
186
- return [file_relative_path, local_file_path, absolute_file_path]
201
+
202
+ file_paths = {
203
+ self.FILE_RELATIVE_PATH: file_relative_path,
204
+ self.LOCAL_FILE_PATH: local_file_path,
205
+ self.FILE_NAME: file_name,
206
+ self.FILE_FOLDER: file_folder,
207
+ }
208
+ return file_paths
@@ -0,0 +1,23 @@
1
+ #
2
+ # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ from datetime import datetime
6
+ from typing import Optional
7
+
8
+ from pydantic.v1 import BaseModel
9
+
10
+
11
+ class FileRecordData(BaseModel):
12
+ """
13
+ A record in a file-based stream.
14
+ """
15
+
16
+ folder: str
17
+ filename: str
18
+ bytes: int
19
+ source_uri: str
20
+ id: Optional[str] = None
21
+ created_at: Optional[str] = None
22
+ updated_at: Optional[str] = None
23
+ mime_type: Optional[str] = None
@@ -2,34 +2,27 @@
2
2
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
  import logging
5
- import os
6
- from typing import Any, Dict, Iterable
5
+ from typing import Iterable, Tuple
7
6
 
8
- from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
7
+ from airbyte_cdk.models import AirbyteRecordMessageFileReference
9
8
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
9
+ from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
10
10
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
11
-
12
- AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
13
- DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
11
+ from airbyte_cdk.sources.utils.files_directory import get_files_directory
14
12
 
15
13
 
16
14
  class FileTransfer:
17
15
  def __init__(self) -> None:
18
- self._local_directory = (
19
- AIRBYTE_STAGING_DIRECTORY
20
- if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
21
- else DEFAULT_LOCAL_DIRECTORY
22
- )
16
+ self._local_directory = get_files_directory()
23
17
 
24
- def get_file(
18
+ def upload(
25
19
  self,
26
- config: FileBasedStreamConfig,
27
20
  file: RemoteFile,
28
21
  stream_reader: AbstractFileBasedStreamReader,
29
22
  logger: logging.Logger,
30
- ) -> Iterable[Dict[str, Any]]:
23
+ ) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
31
24
  try:
32
- yield stream_reader.get_file(
25
+ yield stream_reader.upload(
33
26
  file=file, local_directory=self._local_directory, logger=logger
34
27
  )
35
28
  except Exception as ex:
@@ -18,9 +18,19 @@ JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
18
18
  SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
19
19
 
20
20
  schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
21
+
21
22
  file_transfer_schema = {
22
23
  "type": "object",
23
- "properties": {"data": {"type": "object"}, "file": {"type": "object"}},
24
+ "properties": {
25
+ "folder": {"type": "string"},
26
+ "file_name": {"type": "string"},
27
+ "source_uri": {"type": "string"},
28
+ "bytes": {"type": "integer"},
29
+ "id": {"type": ["null", "string"]},
30
+ "created_at": {"type": ["null", "string"]},
31
+ "updated_at": {"type": ["null", "string"]},
32
+ "mime_type": {"type": ["null", "string"]},
33
+ },
24
34
  }
25
35
 
26
36