airbyte-cdk 6.45.0.dev4107__py3-none-any.whl → 6.45.0.post6.dev14369631849__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +45 -6
- airbyte_cdk/connector_builder/main.py +5 -2
- airbyte_cdk/models/__init__.py +0 -1
- airbyte_cdk/models/airbyte_protocol.py +3 -1
- airbyte_cdk/models/file_transfer_record_message.py +13 -0
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +1 -1
- airbyte_cdk/sources/declarative/async_job/job.py +6 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +18 -18
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +22 -6
- airbyte_cdk/sources/declarative/checks/__init__.py +5 -2
- airbyte_cdk/sources/declarative/checks/check_stream.py +113 -11
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +0 -8
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +210 -50
- airbyte_cdk/sources/declarative/extractors/record_selector.py +1 -6
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +10 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +23 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +142 -43
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +16 -4
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +263 -50
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +4 -0
- airbyte_cdk/sources/declarative/partition_routers/grouping_partition_router.py +150 -0
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +5 -1
- airbyte_cdk/sources/declarative/requesters/query_properties/__init__.py +13 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/properties_from_endpoint.py +40 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/property_chunking.py +69 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/query_properties.py +58 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/__init__.py +10 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/group_by_key.py +33 -0
- airbyte_cdk/sources/declarative/requesters/query_properties/strategies/merge_strategy.py +19 -0
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +25 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +101 -30
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +1 -1
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -9
- airbyte_cdk/sources/declarative/transformations/add_fields.py +3 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +15 -38
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +15 -8
- airbyte_cdk/sources/file_based/schema_helpers.py +1 -9
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +12 -3
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +31 -16
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +3 -1
- airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -3
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +4 -0
- airbyte_cdk/sources/types.py +2 -11
- airbyte_cdk/sources/utils/record_helper.py +8 -8
- airbyte_cdk/test/mock_http/response_builder.py +0 -8
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/METADATA +2 -2
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/RECORD +52 -46
- airbyte_cdk/sources/declarative/retrievers/file_uploader.py +0 -89
- airbyte_cdk/sources/file_based/file_record_data.py +0 -22
- airbyte_cdk/sources/utils/files_directory.py +0 -15
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/entry_points.txt +0 -0
@@ -139,7 +139,9 @@ class AddFields(RecordTransformation):
|
|
139
139
|
valid_types = (parsed_field.value_type,) if parsed_field.value_type else None
|
140
140
|
value = parsed_field.value.eval(config, valid_types=valid_types, **kwargs)
|
141
141
|
is_empty_condition = not self.condition
|
142
|
-
if is_empty_condition or self._filter_interpolator.eval(
|
142
|
+
if is_empty_condition or self._filter_interpolator.eval(
|
143
|
+
config, value=value, path=parsed_field.path, **kwargs
|
144
|
+
):
|
143
145
|
dpath.new(record, parsed_field.path, value)
|
144
146
|
|
145
147
|
def __eq__(self, other: Any) -> bool:
|
@@ -8,18 +8,16 @@ from datetime import datetime
|
|
8
8
|
from enum import Enum
|
9
9
|
from io import IOBase
|
10
10
|
from os import makedirs, path
|
11
|
-
from typing import Any,
|
11
|
+
from typing import Any, Dict, Iterable, List, Optional, Set
|
12
12
|
|
13
13
|
from wcmatch.glob import GLOBSTAR, globmatch
|
14
14
|
|
15
|
-
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
16
15
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
17
16
|
from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import (
|
18
17
|
include_identities_stream,
|
19
18
|
preserve_directory_structure,
|
20
19
|
use_file_transfer,
|
21
20
|
)
|
22
|
-
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
23
21
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
24
22
|
|
25
23
|
|
@@ -30,11 +28,6 @@ class FileReadMode(Enum):
|
|
30
28
|
|
31
29
|
class AbstractFileBasedStreamReader(ABC):
|
32
30
|
DATE_TIME_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
|
33
|
-
FILE_RELATIVE_PATH = "file_relative_path"
|
34
|
-
FILE_NAME = "file_name"
|
35
|
-
LOCAL_FILE_PATH = "local_file_path"
|
36
|
-
SOURCE_FILE_URI = "source_file_relative_path"
|
37
|
-
FILE_FOLDER = "file_folder"
|
38
31
|
|
39
32
|
def __init__(self) -> None:
|
40
33
|
self._config = None
|
@@ -155,9 +148,9 @@ class AbstractFileBasedStreamReader(ABC):
|
|
155
148
|
return False
|
156
149
|
|
157
150
|
@abstractmethod
|
158
|
-
def
|
151
|
+
def get_file(
|
159
152
|
self, file: RemoteFile, local_directory: str, logger: logging.Logger
|
160
|
-
) ->
|
153
|
+
) -> Dict[str, Any]:
|
161
154
|
"""
|
162
155
|
This is required for connectors that will support writing to
|
163
156
|
files. It will handle the logic to download,get,read,acquire or
|
@@ -169,41 +162,25 @@ class AbstractFileBasedStreamReader(ABC):
|
|
169
162
|
logger (logging.Logger): Logger for logging information and errors.
|
170
163
|
|
171
164
|
Returns:
|
172
|
-
|
173
|
-
-
|
174
|
-
-
|
175
|
-
-
|
165
|
+
dict: A dictionary containing the following:
|
166
|
+
- "file_url" (str): The absolute path of the downloaded file.
|
167
|
+
- "bytes" (int): The file size in bytes.
|
168
|
+
- "file_relative_path" (str): The relative path of the file for local storage. Is relative to local_directory as
|
169
|
+
this a mounted volume in the pod container.
|
170
|
+
|
176
171
|
"""
|
177
172
|
...
|
178
173
|
|
179
|
-
def _get_file_transfer_paths(
|
180
|
-
self,
|
181
|
-
file: RemoteFile,
|
182
|
-
local_directory: str,
|
183
|
-
parse_file_path_from_uri: Optional[Callable[[str], str]] = None,
|
184
|
-
) -> MutableMapping[str, Any]:
|
174
|
+
def _get_file_transfer_paths(self, file: RemoteFile, local_directory: str) -> List[str]:
|
185
175
|
preserve_directory_structure = self.preserve_directory_structure()
|
186
|
-
if not parse_file_path_from_uri:
|
187
|
-
file_path = file.uri
|
188
|
-
else:
|
189
|
-
file_path = parse_file_path_from_uri(file.uri)
|
190
|
-
|
191
|
-
file_name = path.basename(file_path)
|
192
|
-
file_folder = path.dirname(file_path)
|
193
176
|
if preserve_directory_structure:
|
194
177
|
# Remove left slashes from source path format to make relative path for writing locally
|
195
|
-
file_relative_path =
|
178
|
+
file_relative_path = file.uri.lstrip("/")
|
196
179
|
else:
|
197
|
-
file_relative_path =
|
180
|
+
file_relative_path = path.basename(file.uri)
|
198
181
|
local_file_path = path.join(local_directory, file_relative_path)
|
182
|
+
|
199
183
|
# Ensure the local directory exists
|
200
184
|
makedirs(path.dirname(local_file_path), exist_ok=True)
|
201
|
-
|
202
|
-
|
203
|
-
self.FILE_RELATIVE_PATH: file_relative_path,
|
204
|
-
self.LOCAL_FILE_PATH: local_file_path,
|
205
|
-
self.FILE_NAME: file_name,
|
206
|
-
self.FILE_FOLDER: file_folder,
|
207
|
-
self.SOURCE_FILE_URI: file.uri,
|
208
|
-
}
|
209
|
-
return file_paths
|
185
|
+
absolute_file_path = path.abspath(local_file_path)
|
186
|
+
return [file_relative_path, local_file_path, absolute_file_path]
|
@@ -2,27 +2,34 @@
|
|
2
2
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
import logging
|
5
|
-
|
5
|
+
import os
|
6
|
+
from typing import Any, Dict, Iterable
|
6
7
|
|
7
|
-
from airbyte_cdk.
|
8
|
+
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
8
9
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
|
9
|
-
from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
|
10
10
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
11
|
-
|
11
|
+
|
12
|
+
AIRBYTE_STAGING_DIRECTORY = os.getenv("AIRBYTE_STAGING_DIRECTORY", "/staging/files")
|
13
|
+
DEFAULT_LOCAL_DIRECTORY = "/tmp/airbyte-file-transfer"
|
12
14
|
|
13
15
|
|
14
16
|
class FileTransfer:
|
15
17
|
def __init__(self) -> None:
|
16
|
-
self._local_directory =
|
18
|
+
self._local_directory = (
|
19
|
+
AIRBYTE_STAGING_DIRECTORY
|
20
|
+
if os.path.exists(AIRBYTE_STAGING_DIRECTORY)
|
21
|
+
else DEFAULT_LOCAL_DIRECTORY
|
22
|
+
)
|
17
23
|
|
18
|
-
def
|
24
|
+
def get_file(
|
19
25
|
self,
|
26
|
+
config: FileBasedStreamConfig,
|
20
27
|
file: RemoteFile,
|
21
28
|
stream_reader: AbstractFileBasedStreamReader,
|
22
29
|
logger: logging.Logger,
|
23
|
-
) -> Iterable[
|
30
|
+
) -> Iterable[Dict[str, Any]]:
|
24
31
|
try:
|
25
|
-
yield stream_reader.
|
32
|
+
yield stream_reader.get_file(
|
26
33
|
file=file, local_directory=self._local_directory, logger=logger
|
27
34
|
)
|
28
35
|
except Exception as ex:
|
@@ -18,17 +18,9 @@ JsonSchemaSupportedType = Union[List[str], Literal["string"], str]
|
|
18
18
|
SchemaType = Mapping[str, Mapping[str, JsonSchemaSupportedType]]
|
19
19
|
|
20
20
|
schemaless_schema = {"type": "object", "properties": {"data": {"type": "object"}}}
|
21
|
-
|
22
21
|
file_transfer_schema = {
|
23
22
|
"type": "object",
|
24
|
-
"properties": {
|
25
|
-
"folder": {"type": "string"},
|
26
|
-
"file_name": {"type": "string"},
|
27
|
-
"bytes": {"type": "integer"},
|
28
|
-
"id": {"type": ["null", "string"]},
|
29
|
-
"updated_at": {"type": ["null", "string"]},
|
30
|
-
"mime_type": {"type": ["null", "string"]},
|
31
|
-
},
|
23
|
+
"properties": {"data": {"type": "object"}, "file": {"type": "object"}},
|
32
24
|
}
|
33
25
|
|
34
26
|
|
@@ -4,7 +4,7 @@
|
|
4
4
|
|
5
5
|
import copy
|
6
6
|
import logging
|
7
|
-
from functools import lru_cache
|
7
|
+
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
10
|
from typing_extensions import deprecated
|
@@ -258,14 +258,19 @@ class FileBasedStreamPartition(Partition):
|
|
258
258
|
and record_data.record is not None
|
259
259
|
):
|
260
260
|
# `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
|
261
|
-
|
261
|
+
# If stream is flagged for file_transfer the record should data in file key
|
262
|
+
record_message_data = (
|
263
|
+
record_data.record.file
|
264
|
+
if self._use_file_transfer()
|
265
|
+
else record_data.record.data
|
266
|
+
)
|
262
267
|
if not record_message_data:
|
263
268
|
raise ExceptionWithDisplayMessage("A record without data was found")
|
264
269
|
else:
|
265
270
|
yield Record(
|
266
271
|
data=record_message_data,
|
267
272
|
stream_name=self.stream_name(),
|
268
|
-
|
273
|
+
is_file_transfer_message=self._use_file_transfer(),
|
269
274
|
)
|
270
275
|
else:
|
271
276
|
self._message_repository.emit_message(record_data)
|
@@ -301,6 +306,10 @@ class FileBasedStreamPartition(Partition):
|
|
301
306
|
def stream_name(self) -> str:
|
302
307
|
return self._stream.name
|
303
308
|
|
309
|
+
@cache
|
310
|
+
def _use_file_transfer(self) -> bool:
|
311
|
+
return hasattr(self._stream, "use_file_transfer") and self._stream.use_file_transfer
|
312
|
+
|
304
313
|
def __repr__(self) -> str:
|
305
314
|
return f"FileBasedStreamPartition({self._stream.name}, {self._slice})"
|
306
315
|
|
@@ -11,7 +11,7 @@ from functools import cache
|
|
11
11
|
from os import path
|
12
12
|
from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Set, Tuple, Union
|
13
13
|
|
14
|
-
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage,
|
14
|
+
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, FailureType, Level
|
15
15
|
from airbyte_cdk.models import Type as MessageType
|
16
16
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import PrimaryKeyType
|
17
17
|
from airbyte_cdk.sources.file_based.exceptions import (
|
@@ -97,7 +97,14 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
97
97
|
self, configured_catalog_json_schema: Dict[str, Any]
|
98
98
|
) -> Dict[str, Any]:
|
99
99
|
if self.use_file_transfer:
|
100
|
-
return
|
100
|
+
return {
|
101
|
+
"type": "object",
|
102
|
+
"properties": {
|
103
|
+
"file_path": {"type": "string"},
|
104
|
+
"file_size": {"type": "string"},
|
105
|
+
self.ab_file_name_col: {"type": "string"},
|
106
|
+
},
|
107
|
+
}
|
101
108
|
else:
|
102
109
|
return super()._filter_schema_invalid_properties(configured_catalog_json_schema)
|
103
110
|
|
@@ -138,6 +145,14 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
138
145
|
record[self.ab_file_name_col] = file.uri
|
139
146
|
return record
|
140
147
|
|
148
|
+
def transform_record_for_file_transfer(
|
149
|
+
self, record: dict[str, Any], file: RemoteFile
|
150
|
+
) -> dict[str, Any]:
|
151
|
+
# timstamp() returns a float representing the number of seconds since the unix epoch
|
152
|
+
record[self.modified] = int(file.last_modified.timestamp()) * 1000
|
153
|
+
record[self.source_file_url] = file.uri
|
154
|
+
return record
|
155
|
+
|
141
156
|
def read_records_from_slice(self, stream_slice: StreamSlice) -> Iterable[AirbyteMessage]:
|
142
157
|
"""
|
143
158
|
Yield all records from all remote files in `list_files_for_this_sync`.
|
@@ -151,7 +166,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
151
166
|
raise MissingSchemaError(FileBasedSourceError.MISSING_SCHEMA, stream=self.name)
|
152
167
|
# The stream only supports a single file type, so we can use the same parser for all files
|
153
168
|
parser = self.get_parser()
|
154
|
-
file_transfer = FileTransfer()
|
155
169
|
for file in stream_slice["files"]:
|
156
170
|
# only serialize the datetime once
|
157
171
|
file_datetime_string = file.last_modified.strftime(self.DATE_TIME_FORMAT)
|
@@ -159,13 +173,19 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
159
173
|
|
160
174
|
try:
|
161
175
|
if self.use_file_transfer:
|
162
|
-
|
163
|
-
|
176
|
+
self.logger.info(f"{self.name}: {file} file-based syncing")
|
177
|
+
# todo: complete here the code to not rely on local parser
|
178
|
+
file_transfer = FileTransfer()
|
179
|
+
for record in file_transfer.get_file(
|
180
|
+
self.config, file, self.stream_reader, self.logger
|
164
181
|
):
|
182
|
+
line_no += 1
|
183
|
+
if not self.record_passes_validation_policy(record):
|
184
|
+
n_skipped += 1
|
185
|
+
continue
|
186
|
+
record = self.transform_record_for_file_transfer(record, file)
|
165
187
|
yield stream_data_to_airbyte_message(
|
166
|
-
self.name,
|
167
|
-
file_record_data.dict(exclude_none=True),
|
168
|
-
file_reference=file_reference,
|
188
|
+
self.name, record, is_file_transfer_message=True
|
169
189
|
)
|
170
190
|
else:
|
171
191
|
for record in parser.parse_records(
|
@@ -239,8 +259,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
239
259
|
|
240
260
|
@cache
|
241
261
|
def get_json_schema(self) -> JsonSchema:
|
242
|
-
if self.use_file_transfer:
|
243
|
-
return file_transfer_schema
|
244
262
|
extra_fields = {
|
245
263
|
self.ab_last_mod_col: {"type": "string"},
|
246
264
|
self.ab_file_name_col: {"type": "string"},
|
@@ -264,7 +282,9 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
264
282
|
return {"type": "object", "properties": {**extra_fields, **schema["properties"]}}
|
265
283
|
|
266
284
|
def _get_raw_json_schema(self) -> JsonSchema:
|
267
|
-
if self.
|
285
|
+
if self.use_file_transfer:
|
286
|
+
return file_transfer_schema
|
287
|
+
elif self.config.input_schema:
|
268
288
|
return self.config.get_input_schema() # type: ignore
|
269
289
|
elif self.config.schemaless:
|
270
290
|
return schemaless_schema
|
@@ -321,11 +341,6 @@ class DefaultFileBasedStream(AbstractFileBasedStream, IncrementalMixin):
|
|
321
341
|
self.config.globs or [], self.config.legacy_prefix, self.logger
|
322
342
|
)
|
323
343
|
|
324
|
-
def as_airbyte_stream(self) -> AirbyteStream:
|
325
|
-
file_stream = super().as_airbyte_stream()
|
326
|
-
file_stream.is_file_based = self.use_file_transfer
|
327
|
-
return file_stream
|
328
|
-
|
329
344
|
def infer_schema(self, files: List[RemoteFile]) -> Mapping[str, Any]:
|
330
345
|
loop = asyncio.get_event_loop()
|
331
346
|
schema = loop.run_until_complete(self._infer_schema(files))
|
@@ -61,7 +61,9 @@ class PermissionsFileBasedStream(DefaultFileBasedStream):
|
|
61
61
|
permissions_record = self.transform_record(
|
62
62
|
permissions_record, file, file_datetime_string
|
63
63
|
)
|
64
|
-
yield stream_data_to_airbyte_message(
|
64
|
+
yield stream_data_to_airbyte_message(
|
65
|
+
self.name, permissions_record, is_file_transfer_message=False
|
66
|
+
)
|
65
67
|
except Exception as e:
|
66
68
|
self.logger.error(f"Failed to retrieve permissions for file {file.uri}: {str(e)}")
|
67
69
|
yield AirbyteMessage(
|
@@ -29,7 +29,6 @@ class DefaultStream(AbstractStream):
|
|
29
29
|
logger: Logger,
|
30
30
|
cursor: Cursor,
|
31
31
|
namespace: Optional[str] = None,
|
32
|
-
supports_file_transfer: bool = False,
|
33
32
|
) -> None:
|
34
33
|
self._stream_partition_generator = partition_generator
|
35
34
|
self._name = name
|
@@ -40,7 +39,6 @@ class DefaultStream(AbstractStream):
|
|
40
39
|
self._logger = logger
|
41
40
|
self._cursor = cursor
|
42
41
|
self._namespace = namespace
|
43
|
-
self._supports_file_transfer = supports_file_transfer
|
44
42
|
|
45
43
|
def generate_partitions(self) -> Iterable[Partition]:
|
46
44
|
yield from self._stream_partition_generator.generate()
|
@@ -70,7 +68,6 @@ class DefaultStream(AbstractStream):
|
|
70
68
|
json_schema=dict(self._json_schema),
|
71
69
|
supported_sync_modes=[SyncMode.full_refresh],
|
72
70
|
is_resumable=False,
|
73
|
-
is_file_based=self._supports_file_transfer,
|
74
71
|
)
|
75
72
|
|
76
73
|
if self._namespace:
|
@@ -71,6 +71,10 @@ class AbstractStreamStateConverter(ABC):
|
|
71
71
|
for stream_slice in state.get("slices", []):
|
72
72
|
stream_slice[self.START_KEY] = self._from_state_message(stream_slice[self.START_KEY])
|
73
73
|
stream_slice[self.END_KEY] = self._from_state_message(stream_slice[self.END_KEY])
|
74
|
+
if self.MOST_RECENT_RECORD_KEY in stream_slice:
|
75
|
+
stream_slice[self.MOST_RECENT_RECORD_KEY] = self._from_state_message(
|
76
|
+
stream_slice[self.MOST_RECENT_RECORD_KEY]
|
77
|
+
)
|
74
78
|
return state
|
75
79
|
|
76
80
|
def serialize(
|
airbyte_cdk/sources/types.py
CHANGED
@@ -6,7 +6,6 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
8
8
|
|
9
|
-
from airbyte_cdk.models import AirbyteRecordMessageFileReference
|
10
9
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
11
10
|
|
12
11
|
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
@@ -24,12 +23,12 @@ class Record(Mapping[str, Any]):
|
|
24
23
|
data: Mapping[str, Any],
|
25
24
|
stream_name: str,
|
26
25
|
associated_slice: Optional[StreamSlice] = None,
|
27
|
-
|
26
|
+
is_file_transfer_message: bool = False,
|
28
27
|
):
|
29
28
|
self._data = data
|
30
29
|
self._associated_slice = associated_slice
|
31
30
|
self.stream_name = stream_name
|
32
|
-
self.
|
31
|
+
self.is_file_transfer_message = is_file_transfer_message
|
33
32
|
|
34
33
|
@property
|
35
34
|
def data(self) -> Mapping[str, Any]:
|
@@ -39,14 +38,6 @@ class Record(Mapping[str, Any]):
|
|
39
38
|
def associated_slice(self) -> Optional[StreamSlice]:
|
40
39
|
return self._associated_slice
|
41
40
|
|
42
|
-
@property
|
43
|
-
def file_reference(self) -> AirbyteRecordMessageFileReference:
|
44
|
-
return self._file_reference
|
45
|
-
|
46
|
-
@file_reference.setter
|
47
|
-
def file_reference(self, value: AirbyteRecordMessageFileReference) -> None:
|
48
|
-
self._file_reference = value
|
49
|
-
|
50
41
|
def __repr__(self) -> str:
|
51
42
|
return repr(self._data)
|
52
43
|
|
@@ -9,10 +9,10 @@ from airbyte_cdk.models import (
|
|
9
9
|
AirbyteLogMessage,
|
10
10
|
AirbyteMessage,
|
11
11
|
AirbyteRecordMessage,
|
12
|
-
AirbyteRecordMessageFileReference,
|
13
12
|
AirbyteTraceMessage,
|
14
13
|
)
|
15
14
|
from airbyte_cdk.models import Type as MessageType
|
15
|
+
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
16
16
|
from airbyte_cdk.sources.streams.core import StreamData
|
17
17
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
18
18
|
|
@@ -22,7 +22,7 @@ def stream_data_to_airbyte_message(
|
|
22
22
|
data_or_message: StreamData,
|
23
23
|
transformer: TypeTransformer = TypeTransformer(TransformConfig.NoTransform),
|
24
24
|
schema: Optional[Mapping[str, Any]] = None,
|
25
|
-
|
25
|
+
is_file_transfer_message: bool = False,
|
26
26
|
) -> AirbyteMessage:
|
27
27
|
if schema is None:
|
28
28
|
schema = {}
|
@@ -36,12 +36,12 @@ def stream_data_to_airbyte_message(
|
|
36
36
|
# taken unless configured. See
|
37
37
|
# docs/connector-development/cdk-python/schemas.md for details.
|
38
38
|
transformer.transform(data, schema)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
39
|
+
if is_file_transfer_message:
|
40
|
+
message = AirbyteFileTransferRecordMessage(
|
41
|
+
stream=stream_name, file=data, emitted_at=now_millis, data={}
|
42
|
+
)
|
43
|
+
else:
|
44
|
+
message = AirbyteRecordMessage(stream=stream_name, data=data, emitted_at=now_millis)
|
45
45
|
return AirbyteMessage(type=MessageType.RECORD, record=message)
|
46
46
|
case AirbyteTraceMessage():
|
47
47
|
return AirbyteMessage(type=MessageType.TRACE, trace=data_or_message)
|
@@ -198,14 +198,6 @@ def find_template(resource: str, execution_folder: str) -> Dict[str, Any]:
|
|
198
198
|
return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file
|
199
199
|
|
200
200
|
|
201
|
-
def find_binary_response(resource: str, execution_folder: str) -> bytes:
|
202
|
-
response_filepath = str(
|
203
|
-
get_unit_test_folder(execution_folder) / "resource" / "http" / "response" / f"{resource}"
|
204
|
-
)
|
205
|
-
with open(response_filepath, "rb") as response_file:
|
206
|
-
return response_file.read() # type: ignore # we assume the dev correctly set up the resource file
|
207
|
-
|
208
|
-
|
209
201
|
def create_record_builder(
|
210
202
|
response_template: Dict[str, Any],
|
211
203
|
records_path: Union[FieldPath, NestedPath],
|
{airbyte_cdk-6.45.0.dev4107.dist-info → airbyte_cdk-6.45.0.post6.dev14369631849.dist-info}/METADATA
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.45.0.
|
3
|
+
Version: 6.45.0.post6.dev14369631849
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -22,7 +22,7 @@ Provides-Extra: sql
|
|
22
22
|
Provides-Extra: vector-db-based
|
23
23
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
24
24
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
25
|
-
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.
|
25
|
+
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.14,<0.15)
|
26
26
|
Requires-Dist: anyascii (>=0.3.2,<0.4.0)
|
27
27
|
Requires-Dist: avro (>=1.11.2,<1.13.0) ; extra == "file-based"
|
28
28
|
Requires-Dist: backoff
|