airbyte-cdk 6.45.0.dev4107__py3-none-any.whl → 6.45.0.post6.dev14369631849__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. airbyte_cdk/connector_builder/connector_builder_handler.py +45 -6
  2. airbyte_cdk/connector_builder/main.py +5 -2
  3. airbyte_cdk/models/__init__.py +0 -1
  4. airbyte_cdk/models/airbyte_protocol.py +3 -1
  5. airbyte_cdk/models/file_transfer_record_message.py +13 -0
  6. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
  7. airbyte_cdk/sources/declarative/async_job/job.py +6 -0
  8. airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
  9. airbyte_cdk/sources/declarative/async_job/job_tracker.py +22 -6
  10. airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
  11. airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
  12. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -8
  13. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +210 -50
  14. airbyte_cdk/sources/declarative/extractors/record_selector.py +1 -6
  15. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +2 -1
  16. airbyte_cdk/sources/declarative/interpolation/macros.py +10 -4
  17. airbyte_cdk/sources/declarative/manifest_declarative_source.py +23 -2
  18. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +142 -43
  19. airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +16 -4
  20. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +263 -50
  21. airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
  22. airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +150 -0
  23. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +5 -1
  24. airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
  25. airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
  26. airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
  27. airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
  28. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
  29. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
  30. airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
  31. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
  32. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
  33. airbyte_cdk/sources/declarative/schema/default_schema_loader.py +1 -1
  34. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -9
  35. airbyte_cdk/sources/declarative/transformations/add_fields.py +3 -1
  36. airbyte_cdk/sources/file_based/file_based_stream_reader.py +15 -38
  37. airbyte_cdk/sources/file_based/file_types/file_transfer.py +15 -8
  38. airbyte_cdk/sources/file_based/schema_helpers.py +1 -9
  39. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +12 -3
  40. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +31 -16
  41. airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +3 -1
  42. airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
  43. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +4 -0
  44. airbyte_cdk/sources/types.py +2 -11
  45. airbyte_cdk/sources/utils/record_helper.py +8 -8
  46. airbyte_cdk/test/mock_http/response_builder.py +0 -8
  47. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/METADATA +2 -2
  48. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/RECORD +52 -46
  49. airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -89
  50. airbyte_cdk/sources/file_based/file_record_data.py +0 -22
  51. airbyte_cdk/sources/utils/files_directory.py +0 -15
  52. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/LICENSE.txt +0 -0
  53. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/LICENSE_SHORT +0 -0
  54. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/WHEEL +0 -0
  55. {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/entry_points.txt +0 -0
@@ -139,7 +139,9 @@ class AddFields(RecordTransformation):
139
139
  valid_types = (parsed_field.value_type,) if parsed_field.value_type else None
140
140
  value = parsed_field.value.eval(config, valid_types=valid_types, **kwargs)
141
141
  is_empty_condition = not self.condition
142
- if is_empty_condition or self._filter_interpolator.eval(config, value=value, **kwargs):
142
+ if is_empty_condition or self._filter_interpolator.eval(
143
+ config, value=value, path=parsed_field.path, **kwargs
144
+ ):
143
145
  dpath.new(record, parsed_field.path, value)
144
146
 
145
147
  def __eq__(self, other: Any) -> bool:
@@ -8,18 +8,16 @@ from datetime import datetime
8
8
  from enum import Enum
9
9
  from io import IOBase
10
10
  from os import makedirs, path
11
- from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
11
+ from typing import Any, Dict, Iterable, List, Optional, Set
12
12
 
13
13
  from wcmatch.glob import GLOBSTAR, globmatch
14
14
 
15
- from airbyte_cdk.models import AirbyteRecordMessageFileReference
16
15
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
17
16
  from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
18
17
  include_identities_stream,
19
18
  preserve_directory_structure,
20
19
  use_file_transfer,
21
20
  )
22
- from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
23
21
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
24
22
 
25
23
 
@@ -30,11 +28,6 @@ class FileReadMode(Enum):
30
28
 
31
29
  class AbstractFileBasedStreamReader(ABC):
32
30
  DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
33
- FILE_RELATIVE_PATH = "file_relative_path"
34
- FILE_NAME = "file_name"
35
- LOCAL_FILE_PATH = "local_file_path"
36
- SOURCE_FILE_URI = "source_file_relative_path"
37
- FILE_FOLDER = "file_folder"
38
31
 
39
32
  def __init__(self) -> None:
40
33
  self._config = None
@@ -155,9 +148,9 @@ class AbstractFileBasedStreamReader(ABC):
155
148
  return False
156
149
 
157
150
  @abstractmethod
158
- def upload(
151
+ def get_file(
159
152
  self, file: RemoteFile, local_directory: str, logger: logging.Logger
160
- ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
153
+ ) -> Dict[str, Any]:
161
154
  """
162
155
  This is required for connectors that will support writing to
163
156
  files. It will handle the logic to download,get,read,acquire or
@@ -169,41 +162,25 @@ class AbstractFileBasedStreamReader(ABC):
169
162
  logger (logging.Logger): Logger for logging information and errors.
170
163
 
171
164
  Returns:
172
- AirbyteRecordMessageFileReference: A file reference object containing:
173
- - staging_file_url (str): The absolute path to the referenced file in the staging area.
174
- - file_size_bytes (int): The size of the referenced file in bytes.
175
- - source_file_relative_path (str): The relative path to the referenced file in source.
165
+ dict: A dictionary containing the following:
166
+ - "file_url" (str): The absolute path of the downloaded file.
167
+ - "bytes" (int): The file size in bytes.
168
+ - "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as
169
+ this a mounted volume in the pod container.
170
+
176
171
  """
177
172
  ...
178
173
 
179
- def _get_file_transfer_paths(
180
- self,
181
- file: RemoteFile,
182
- local_directory: str,
183
- parse_file_path_from_uri: Optional[Callable[[str], str]] = None,
184
- ) -> MutableMapping[str, Any]:
174
+ def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
185
175
  preserve_directory_structure = self.preserve_directory_structure()
186
- if not parse_file_path_from_uri:
187
- file_path = file.uri
188
- else:
189
- file_path = parse_file_path_from_uri(file.uri)
190
-
191
- file_name = path.basename(file_path)
192
- file_folder = path.dirname(file_path)
193
176
  if preserve_directory_structure:
194
177
  # Remove left slashes from source path format to make relative path for writing locally
195
- file_relative_path = file_path.lstrip("/")
178
+ file_relative_path = file.uri.lstrip("/")
196
179
  else:
197
- file_relative_path = file_name
180
+ file_relative_path = path.basename(file.uri)
198
181
  local_file_path = path.join(local_directory, file_relative_path)
182
+
199
183
  # Ensure the local directory exists
200
184
  makedirs(path.dirname(local_file_path), exist_ok=True)
201
-
202
- file_paths = {
203
- self.FILE_RELATIVE_PATH: file_relative_path,
204
- self.LOCAL_FILE_PATH: local_file_path,
205
- self.FILE_NAME: file_name,
206
- self.FILE_FOLDER: file_folder,
207
- self.SOURCE_FILE_URI: file.uri,
208
- }
209
- return file_paths
185
+ absolute_file_path = path.abspath(local_file_path)
186
+ return [file_relative_path, local_file_path, absolute_file_path]
@@ -2,27 +2,34 @@
2
2
  # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
  import logging
5
- from typing import Iterable, Tuple
5
+ import os
6
+ from typing import Any, Dict, Iterable
6
7
 
7
- from airbyte_cdk.models import AirbyteRecordMessageFileReference
8
+ from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
8
9
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
9
- from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
10
10
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
11
- from airbyte_cdk.sources.utils.files_directory import get_files_directory
11
+
12
+ AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
13
+ DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
12
14
 
13
15
 
14
16
  class FileTransfer:
15
17
  def __init__(self) -> None:
16
- self._local_directory = get_files_directory()
18
+ self._local_directory = (
19
+ AIRBYTE_STAGING_DIRECTORY
20
+ if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
21
+ else DEFAULT_LOCAL_DIRECTORY
22
+ )
17
23
 
18
- def upload(
24
+ def get_file(
19
25
  self,
26
+ config: FileBasedStreamConfig,
20
27
  file: RemoteFile,
21
28
  stream_reader: AbstractFileBasedStreamReader,
22
29
  logger: logging.Logger,
23
- ) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
30
+ ) -> Iterable[Dict[str, Any]]:
24
31
  try:
25
- yield stream_reader.upload(
32
+ yield stream_reader.get_file(
26
33
  file=file, local_directory=self._local_directory, logger=logger
27
34
  )
28
35
  except Exception as ex:
@@ -18,17 +18,9 @@ JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
18
18
  SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
19
19
 
20
20
  schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
21
-
22
21
  file_transfer_schema = {
23
22
  "type": "object",
24
- "properties": {
25
- "folder": {"type": "string"},
26
- "file_name": {"type": "string"},
27
- "bytes": {"type": "integer"},
28
- "id": {"type": ["null", "string"]},
29
- "updated_at": {"type": ["null", "string"]},
30
- "mime_type": {"type": ["null", "string"]},
31
- },
23
+ "properties": {"data": {"type": "object"}, "file": {"type": "object"}},
32
24
  }
33
25
 
34
26
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  import copy
6
6
  import logging
7
- from functools import lru_cache
7
+ from functools import cache, lru_cache
8
8
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
9
9
 
10
10
  from typing_extensions import deprecated
@@ -258,14 +258,19 @@ class FileBasedStreamPartition(Partition):
258
258
  and record_data.record is not None
259
259
  ):
260
260
  # `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
261
- record_message_data = record_data.record.data
261
+ # If stream is flagged for file_transfer the record should data in file key
262
+ record_message_data = (
263
+ record_data.record.file
264
+ if self._use_file_transfer()
265
+ else record_data.record.data
266
+ )
262
267
  if not record_message_data:
263
268
  raise ExceptionWithDisplayMessage("A record without data was found")
264
269
  else:
265
270
  yield Record(
266
271
  data=record_message_data,
267
272
  stream_name=self.stream_name(),
268
- file_reference=record_data.record.file_reference,
273
+ is_file_transfer_message=self._use_file_transfer(),
269
274
  )
270
275
  else:
271
276
  self._message_repository.emit_message(record_data)
@@ -301,6 +306,10 @@ class FileBasedStreamPartition(Partition):
301
306
  def stream_name(self) -> str:
302
307
  return self._stream.name
303
308
 
309
+ @cache
310
+ def _use_file_transfer(self) -> bool:
311
+ return hasattr(self._stream, "use_file_transfer") and self._stream.use_file_transfer
312
+
304
313
  def __repr__(self) -> str:
305
314
  return f"FileBasedStreamPartition({self._stream.name}, {self._slice})"
306
315
 
@@ -11,7 +11,7 @@ from functools import cache
11
11
  from os import path
12
12
  from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union
13
13
 
14
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteStream, FailureType, Level
14
+ from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level
15
15
  from airbyte_cdk.models import Type as MessageType
16
16
  from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
17
17
  from airbyte_cdk.sources.file_based.exceptions import (
@@ -97,7 +97,14 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
97
97
  self, configured_catalog_json_schema: Dict[str, Any]
98
98
  ) -> Dict[str, Any]:
99
99
  if self.use_file_transfer:
100
- return file_transfer_schema
100
+ return {
101
+ "type": "object",
102
+ "properties": {
103
+ "file_path": {"type": "string"},
104
+ "file_size": {"type": "string"},
105
+ self.ab_file_name_col: {"type": "string"},
106
+ },
107
+ }
101
108
  else:
102
109
  return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
103
110
 
@@ -138,6 +145,14 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
138
145
  record[self.ab_file_name_col] = file.uri
139
146
  return record
140
147
 
148
+ def transform_record_for_file_transfer(
149
+ self, record: dict[str, Any], file: RemoteFile
150
+ ) -> dict[str, Any]:
151
+ # timstamp() returns a float representing the number of seconds since the unix epoch
152
+ record[self.modified] = int(file.last_modified.timestamp()) * 1000
153
+ record[self.source_file_url] = file.uri
154
+ return record
155
+
141
156
  def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
142
157
  """
143
158
  Yield all records from all remote files in `list_files_for_this_sync`.
@@ -151,7 +166,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
151
166
  raise MissingSchemaError(FileBasedSourceError.MISSING_SCHEMA, stream=self.name)
152
167
  # The stream only supports a single file type, so we can use the same parser for all files
153
168
  parser = self.get_parser()
154
- file_transfer = FileTransfer()
155
169
  for file in stream_slice["files"]:
156
170
  # only serialize the datetime once
157
171
  file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
@@ -159,13 +173,19 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
159
173
 
160
174
  try:
161
175
  if self.use_file_transfer:
162
- for file_record_data, file_reference in file_transfer.upload(
163
- file=file, stream_reader=self.stream_reader, logger=self.logger
176
+ self.logger.info(f"{self.name}: {file} file-based syncing")
177
+ # todo: complete here the code to not rely on local parser
178
+ file_transfer = FileTransfer()
179
+ for record in file_transfer.get_file(
180
+ self.config, file, self.stream_reader, self.logger
164
181
  ):
182
+ line_no += 1
183
+ if not self.record_passes_validation_policy(record):
184
+ n_skipped += 1
185
+ continue
186
+ record = self.transform_record_for_file_transfer(record, file)
165
187
  yield stream_data_to_airbyte_message(
166
- self.name,
167
- file_record_data.dict(exclude_none=True),
168
- file_reference=file_reference,
188
+ self.name, record, is_file_transfer_message=True
169
189
  )
170
190
  else:
171
191
  for record in parser.parse_records(
@@ -239,8 +259,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
239
259
 
240
260
  @cache
241
261
  def get_json_schema(self) -> JsonSchema:
242
- if self.use_file_transfer:
243
- return file_transfer_schema
244
262
  extra_fields = {
245
263
  self.ab_last_mod_col: {"type": "string"},
246
264
  self.ab_file_name_col: {"type": "string"},
@@ -264,7 +282,9 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
264
282
  return {"type": "object", "properties": {**extra_fields, **schema["properties"]}}
265
283
 
266
284
  def _get_raw_json_schema(self) -> JsonSchema:
267
- if self.config.input_schema:
285
+ if self.use_file_transfer:
286
+ return file_transfer_schema
287
+ elif self.config.input_schema:
268
288
  return self.config.get_input_schema() # type: ignore
269
289
  elif self.config.schemaless:
270
290
  return schemaless_schema
@@ -321,11 +341,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
321
341
  self.config.globs or [], self.config.legacy_prefix, self.logger
322
342
  )
323
343
 
324
- def as_airbyte_stream(self) -> AirbyteStream:
325
- file_stream = super().as_airbyte_stream()
326
- file_stream.is_file_based = self.use_file_transfer
327
- return file_stream
328
-
329
344
  def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
330
345
  loop = asyncio.get_event_loop()
331
346
  schema = loop.run_until_complete(self._infer_schema(files))
@@ -61,7 +61,9 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
61
61
  permissions_record = self.transform_record(
62
62
  permissions_record, file, file_datetime_string
63
63
  )
64
- yield stream_data_to_airbyte_message(self.name, permissions_record)
64
+ yield stream_data_to_airbyte_message(
65
+ self.name, permissions_record, is_file_transfer_message=False
66
+ )
65
67
  except Exception as e:
66
68
  self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
67
69
  yield AirbyteMessage(
@@ -29,7 +29,6 @@ class DefaultStream(AbstractStream):
29
29
  logger: Logger,
30
30
  cursor: Cursor,
31
31
  namespace: Optional[str] = None,
32
- supports_file_transfer: bool = False,
33
32
  ) -> None:
34
33
  self._stream_partition_generator = partition_generator
35
34
  self._name = name
@@ -40,7 +39,6 @@ class DefaultStream(AbstractStream):
40
39
  self._logger = logger
41
40
  self._cursor = cursor
42
41
  self._namespace = namespace
43
- self._supports_file_transfer = supports_file_transfer
44
42
 
45
43
  def generate_partitions(self) -> Iterable[Partition]:
46
44
  yield from self._stream_partition_generator.generate()
@@ -70,7 +68,6 @@ class DefaultStream(AbstractStream):
70
68
  json_schema=dict(self._json_schema),
71
69
  supported_sync_modes=[SyncMode.full_refresh],
72
70
  is_resumable=False,
73
- is_file_based=self._supports_file_transfer,
74
71
  )
75
72
 
76
73
  if self._namespace:
@@ -71,6 +71,10 @@ class AbstractStreamStateConverter(ABC):
71
71
  for stream_slice in state.get("slices", []):
72
72
  stream_slice[self.START_KEY] = self._from_state_message(stream_slice[self.START_KEY])
73
73
  stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
74
+ if self.MOST_RECENT_RECORD_KEY in stream_slice:
75
+ stream_slice[self.MOST_RECENT_RECORD_KEY] = self._from_state_message(
76
+ stream_slice[self.MOST_RECENT_RECORD_KEY]
77
+ )
74
78
  return state
75
79
 
76
80
  def serialize(
@@ -6,7 +6,6 @@ from __future__ import annotations
6
6
 
7
7
  from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
8
8
 
9
- from airbyte_cdk.models import AirbyteRecordMessageFileReference
10
9
  from airbyte_cdk.utils.slice_hasher import SliceHasher
11
10
 
12
11
  # A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
@@ -24,12 +23,12 @@ class Record(Mapping[str, Any]):
24
23
  data: Mapping[str, Any],
25
24
  stream_name: str,
26
25
  associated_slice: Optional[StreamSlice] = None,
27
- file_reference: Optional[AirbyteRecordMessageFileReference] = None,
26
+ is_file_transfer_message: bool = False,
28
27
  ):
29
28
  self._data = data
30
29
  self._associated_slice = associated_slice
31
30
  self.stream_name = stream_name
32
- self._file_reference = file_reference
31
+ self.is_file_transfer_message = is_file_transfer_message
33
32
 
34
33
  @property
35
34
  def data(self) -> Mapping[str, Any]:
@@ -39,14 +38,6 @@ class Record(Mapping[str, Any]):
39
38
  def associated_slice(self) -> Optional[StreamSlice]:
40
39
  return self._associated_slice
41
40
 
42
- @property
43
- def file_reference(self) -> AirbyteRecordMessageFileReference:
44
- return self._file_reference
45
-
46
- @file_reference.setter
47
- def file_reference(self, value: AirbyteRecordMessageFileReference) -> None:
48
- self._file_reference = value
49
-
50
41
  def __repr__(self) -> str:
51
42
  return repr(self._data)
52
43
 
@@ -9,10 +9,10 @@ from airbyte_cdk.models import (
9
9
  AirbyteLogMessage,
10
10
  AirbyteMessage,
11
11
  AirbyteRecordMessage,
12
- AirbyteRecordMessageFileReference,
13
12
  AirbyteTraceMessage,
14
13
  )
15
14
  from airbyte_cdk.models import Type as MessageType
15
+ from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
16
16
  from airbyte_cdk.sources.streams.core import StreamData
17
17
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
18
18
 
@@ -22,7 +22,7 @@ def stream_data_to_airbyte_message(
22
22
  data_or_message: StreamData,
23
23
  transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
24
24
  schema: Optional[Mapping[str, Any]] = None,
25
- file_reference: Optional[AirbyteRecordMessageFileReference] = None,
25
+ is_file_transfer_message: bool = False,
26
26
  ) -> AirbyteMessage:
27
27
  if schema is None:
28
28
  schema = {}
@@ -36,12 +36,12 @@ def stream_data_to_airbyte_message(
36
36
  # taken unless configured. See
37
37
  # docs/connector-development/cdk-python/schemas.md for details.
38
38
  transformer.transform(data, schema)
39
- message = AirbyteRecordMessage(
40
- stream=stream_name,
41
- data=data,
42
- emitted_at=now_millis,
43
- file_reference=file_reference,
44
- )
39
+ if is_file_transfer_message:
40
+ message = AirbyteFileTransferRecordMessage(
41
+ stream=stream_name, file=data, emitted_at=now_millis, data={}
42
+ )
43
+ else:
44
+ message = AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=now_millis)
45
45
  return AirbyteMessage(type=MessageType.RECORD, record=message)
46
46
  case AirbyteTraceMessage():
47
47
  return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)
@@ -198,14 +198,6 @@ def find_template(resource: str, execution_folder: str) -> Dict[str, Any]:
198
198
  return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file
199
199
 
200
200
 
201
- def find_binary_response(resource: str, execution_folder: str) -> bytes:
202
- response_filepath = str(
203
- get_unit_test_folder(execution_folder) / "resource" / "http" / "response" / f"{resource}"
204
- )
205
- with open(response_filepath, "rb") as response_file:
206
- return response_file.read() # type: ignore # we assume the dev correctly set up the resource file
207
-
208
-
209
201
  def create_record_builder(
210
202
  response_template: Dict[str, Any],
211
203
  records_path: Union[FieldPath, NestedPath],
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 6.45.0.dev4107
3
+ Version: 6.45.0.post6.dev14369631849
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -22,7 +22,7 @@ Provides-Extra: sql
22
22
  Provides-Extra: vector-db-based
23
23
  Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
24
24
  Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
25
- Requires-Dist: airbyte-protocol-models-dataclasses (>=0.15,<0.16)
25
+ Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
26
26
  Requires-Dist: anyascii (>=0.3.2,<0.4.0)
27
27
  Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
28
28
  Requires-Dist: backoff