airbyte-cdk 6.8.1__py3-none-any.whl → 6.8.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +5 -11
- airbyte_cdk/config_observation.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -1
- airbyte_cdk/connector_builder/message_grouper.py +10 -10
- airbyte_cdk/destinations/destination.py +1 -1
- airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +4 -12
- airbyte_cdk/entrypoint.py +6 -7
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/sources/abstract_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -1
- airbyte_cdk/sources/connector_state_manager.py +4 -9
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -6
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +1 -1
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +4 -10
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +17 -16
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -4
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +6 -8
- airbyte_cdk/sources/declarative/interpolation/jinja.py +3 -3
- airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +6 -5
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +7 -13
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +6 -8
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -5
- airbyte_cdk/sources/declarative/spec/spec.py +1 -1
- airbyte_cdk/sources/embedded/base_integration.py +2 -3
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +4 -12
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +7 -18
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +11 -14
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -11
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -6
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +3 -3
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -5
- airbyte_cdk/sources/streams/concurrent/adapters.py +3 -6
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +3 -9
- airbyte_cdk/sources/streams/concurrent/cursor.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- airbyte_cdk/sources/streams/core.py +14 -17
- airbyte_cdk/sources/streams/http/http.py +19 -19
- airbyte_cdk/sources/streams/http/http_client.py +34 -3
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +33 -62
- airbyte_cdk/sources/utils/record_helper.py +1 -1
- airbyte_cdk/sources/utils/schema_helpers.py +1 -1
- airbyte_cdk/sources/utils/transform.py +15 -34
- airbyte_cdk/test/entrypoint_wrapper.py +6 -11
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/message_utils.py +3 -4
- airbyte_cdk/utils/spec_schema_transformations.py +2 -3
- airbyte_cdk/utils/traced_exception.py +12 -14
- airbyte_cdk-6.8.1rc1.dist-info/METADATA +307 -0
- {airbyte_cdk-6.8.1.dist-info → airbyte_cdk-6.8.1rc1.dist-info}/RECORD +66 -66
- airbyte_cdk-6.8.1.dist-info/METADATA +0 -111
- {airbyte_cdk-6.8.1.dist-info → airbyte_cdk-6.8.1rc1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.8.1.dist-info → airbyte_cdk-6.8.1rc1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.8.1.dist-info → airbyte_cdk-6.8.1rc1.dist-info}/entry_points.txt +0 -0
@@ -130,11 +130,11 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
130
130
|
if value:
|
131
131
|
params.update(
|
132
132
|
{
|
133
|
-
parent_config.request_option.field_name.eval(
|
133
|
+
parent_config.request_option.field_name.eval(
|
134
134
|
config=self.config
|
135
135
|
): value
|
136
136
|
}
|
137
|
-
)
|
137
|
+
) # type: ignore # field_name is always casted to an interpolated string
|
138
138
|
return params
|
139
139
|
|
140
140
|
def stream_slices(self) -> Iterable[StreamSlice]:
|
@@ -162,9 +162,9 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
162
162
|
extra_fields = None
|
163
163
|
if parent_stream_config.extra_fields:
|
164
164
|
extra_fields = [
|
165
|
-
[field_path_part.eval(self.config) for field_path_part in field_path]
|
165
|
+
[field_path_part.eval(self.config) for field_path_part in field_path]
|
166
166
|
for field_path in parent_stream_config.extra_fields
|
167
|
-
]
|
167
|
+
] # type: ignore # extra_fields is always casted to an interpolated string
|
168
168
|
|
169
169
|
# read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does
|
170
170
|
# not support either substreams or RFR, but something that needs to be considered once we do
|
@@ -192,10 +192,7 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
192
192
|
message=f"Parent stream returned records as invalid type {type(parent_record)}"
|
193
193
|
)
|
194
194
|
try:
|
195
|
-
partition_value = dpath.get(
|
196
|
-
parent_record, # type: ignore [arg-type]
|
197
|
-
parent_field,
|
198
|
-
)
|
195
|
+
partition_value = dpath.get(parent_record, parent_field)
|
199
196
|
except KeyError:
|
200
197
|
continue
|
201
198
|
|
@@ -231,10 +228,7 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
231
228
|
if extra_fields:
|
232
229
|
for extra_field_path in extra_fields:
|
233
230
|
try:
|
234
|
-
extra_field_value = dpath.get(
|
235
|
-
parent_record, # type: ignore [arg-type]
|
236
|
-
extra_field_path,
|
237
|
-
)
|
231
|
+
extra_field_value = dpath.get(parent_record, extra_field_path)
|
238
232
|
self.logger.debug(
|
239
233
|
f"Extracted extra_field_path: {extra_field_path} with value: {extra_field_value}"
|
240
234
|
)
|
@@ -297,7 +291,7 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
297
291
|
if not parent_state and incremental_dependency:
|
298
292
|
# Attempt to retrieve child state
|
299
293
|
substream_state = list(stream_state.values())
|
300
|
-
substream_state = substream_state[0] if substream_state else {}
|
294
|
+
substream_state = substream_state[0] if substream_state else {}
|
301
295
|
parent_state = {}
|
302
296
|
|
303
297
|
# Copy child state to parent streams with incremental dependencies
|
@@ -141,7 +141,7 @@ class DefaultErrorHandler(ErrorHandler):
|
|
141
141
|
for backoff_strategy in self.backoff_strategies:
|
142
142
|
backoff = backoff_strategy.backoff_time(
|
143
143
|
response_or_exception=response_or_exception, attempt_count=attempt_count
|
144
|
-
)
|
144
|
+
) # type: ignore # attempt_count maintained for compatibility with low code CDK
|
145
145
|
if backoff:
|
146
146
|
return backoff
|
147
147
|
return backoff
|
@@ -151,23 +151,21 @@ class HttpResponseFilter:
|
|
151
151
|
:param response: The HTTP response which can be used during interpolation
|
152
152
|
:return: The evaluated error message string to be emitted
|
153
153
|
"""
|
154
|
-
return self.error_message.eval(
|
154
|
+
return self.error_message.eval(
|
155
155
|
self.config, response=self._safe_response_json(response), headers=response.headers
|
156
|
-
)
|
156
|
+
) # type: ignore # error_message is always cast to an interpolated string
|
157
157
|
|
158
158
|
def _response_matches_predicate(self, response: requests.Response) -> bool:
|
159
159
|
return (
|
160
160
|
bool(
|
161
|
-
self.predicate.condition
|
162
|
-
and self.predicate.eval(
|
163
|
-
None,
|
164
|
-
response=self._safe_response_json(response),
|
165
|
-
headers=response.headers,
|
161
|
+
self.predicate.condition
|
162
|
+
and self.predicate.eval(
|
163
|
+
None, response=self._safe_response_json(response), headers=response.headers
|
166
164
|
)
|
167
165
|
)
|
168
166
|
if self.predicate
|
169
167
|
else False
|
170
|
-
)
|
168
|
+
) # type: ignore # predicate is always cast to an interpolated string
|
171
169
|
|
172
170
|
def _response_contains_error_message(self, response: requests.Response) -> bool:
|
173
171
|
if not self.error_message_contains:
|
@@ -194,7 +194,7 @@ class DefaultPaginator(Paginator):
|
|
194
194
|
and self.pagination_strategy.get_page_size()
|
195
195
|
and self.page_size_option.inject_into == option_type
|
196
196
|
):
|
197
|
-
options[self.page_size_option.field_name.eval(config=self.config)] = (
|
197
|
+
options[self.page_size_option.field_name.eval(config=self.config)] = (
|
198
198
|
self.pagination_strategy.get_page_size()
|
199
199
|
) # type: ignore # field_name is always cast to an interpolated string
|
200
200
|
return options
|
@@ -85,7 +85,7 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
|
|
85
85
|
self._partition_field_start.eval(self.config)
|
86
86
|
)
|
87
87
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
88
|
-
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
|
88
|
+
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
|
89
89
|
self._partition_field_end.eval(self.config)
|
90
|
-
)
|
90
|
+
) # type: ignore # field_name is always casted to an interpolated string
|
91
91
|
return options
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
6
|
from typing import Any, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
|
-
from
|
8
|
+
from deprecated import deprecated
|
9
9
|
|
10
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
|
11
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
|
@@ -4,7 +4,7 @@
|
|
4
4
|
from dataclasses import InitVar, dataclass, field
|
5
5
|
from typing import Any, Callable, Iterable, Mapping, Optional
|
6
6
|
|
7
|
-
from
|
7
|
+
from deprecated.classic import deprecated
|
8
8
|
|
9
9
|
from airbyte_cdk.models import FailureType
|
10
10
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
@@ -21,10 +21,7 @@ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
21
21
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
22
22
|
|
23
23
|
|
24
|
-
@deprecated(
|
25
|
-
"This class is experimental. Use at your own risk.",
|
26
|
-
category=ExperimentalClassWarning,
|
27
|
-
)
|
24
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
28
25
|
@dataclass
|
29
26
|
class AsyncRetriever(Retriever):
|
30
27
|
config: Config
|
@@ -52,9 +52,8 @@ class BaseEmbeddedIntegration(ABC, Generic[TConfig, TOutput]):
|
|
52
52
|
for message in self.source.read(self.config, configured_catalog, state):
|
53
53
|
if message.type == Type.RECORD:
|
54
54
|
output = self._handle_record(
|
55
|
-
message.record,
|
56
|
-
|
57
|
-
)
|
55
|
+
message.record, get_defined_id(stream, message.record.data)
|
56
|
+
) # type: ignore[union-attr] # record has `data`
|
58
57
|
if output:
|
59
58
|
yield output
|
60
59
|
elif message.type is Type.STATE and message.state:
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py
CHANGED
@@ -2,8 +2,6 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from __future__ import annotations
|
6
|
-
|
7
5
|
import logging
|
8
6
|
from abc import abstractmethod
|
9
7
|
from typing import TYPE_CHECKING, Optional, Tuple
|
@@ -24,11 +22,8 @@ if TYPE_CHECKING:
|
|
24
22
|
|
25
23
|
class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
26
24
|
@abstractmethod
|
27
|
-
def check_availability(
|
28
|
-
self,
|
29
|
-
stream: Stream,
|
30
|
-
logger: logging.Logger,
|
31
|
-
_: Optional[Source],
|
25
|
+
def check_availability(
|
26
|
+
self, stream: Stream, logger: logging.Logger, _: Optional[Source]
|
32
27
|
) -> Tuple[bool, Optional[str]]:
|
33
28
|
"""
|
34
29
|
Perform a connection check for the stream.
|
@@ -39,10 +34,7 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
|
39
34
|
|
40
35
|
@abstractmethod
|
41
36
|
def check_availability_and_parsability(
|
42
|
-
self,
|
43
|
-
stream: AbstractFileBasedStream,
|
44
|
-
logger: logging.Logger,
|
45
|
-
_: Optional[Source],
|
37
|
+
self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
|
46
38
|
) -> Tuple[bool, Optional[str]]:
|
47
39
|
"""
|
48
40
|
Performs a connection check for the stream, as well as additional checks that
|
@@ -54,7 +46,7 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
|
54
46
|
|
55
47
|
|
56
48
|
class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy):
|
57
|
-
def __init__(self, stream: AbstractFileBasedStream)
|
49
|
+
def __init__(self, stream: "AbstractFileBasedStream"):
|
58
50
|
self.stream = stream
|
59
51
|
|
60
52
|
def check_availability(self, logger: logging.Logger) -> StreamAvailability:
|
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -2,8 +2,6 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from __future__ import annotations
|
6
|
-
|
7
5
|
import logging
|
8
6
|
import traceback
|
9
7
|
from typing import TYPE_CHECKING, Optional, Tuple
|
@@ -27,15 +25,12 @@ if TYPE_CHECKING:
|
|
27
25
|
|
28
26
|
|
29
27
|
class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
|
30
|
-
def __init__(self, stream_reader: AbstractFileBasedStreamReader)
|
28
|
+
def __init__(self, stream_reader: AbstractFileBasedStreamReader):
|
31
29
|
self.stream_reader = stream_reader
|
32
30
|
|
33
|
-
def check_availability(
|
34
|
-
self,
|
35
|
-
|
36
|
-
logger: logging.Logger,
|
37
|
-
_: Optional[Source],
|
38
|
-
) -> Tuple[bool, Optional[str]]:
|
31
|
+
def check_availability(
|
32
|
+
self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
|
33
|
+
) -> Tuple[bool, Optional[str]]: # type: ignore[override]
|
39
34
|
"""
|
40
35
|
Perform a connection check for the stream (verify that we can list files from the stream).
|
41
36
|
|
@@ -49,10 +44,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
49
44
|
return True, None
|
50
45
|
|
51
46
|
def check_availability_and_parsability(
|
52
|
-
self,
|
53
|
-
stream: AbstractFileBasedStream,
|
54
|
-
logger: logging.Logger,
|
55
|
-
_: Optional[Source],
|
47
|
+
self, stream: "AbstractFileBasedStream", logger: logging.Logger, _: Optional[Source]
|
56
48
|
) -> Tuple[bool, Optional[str]]:
|
57
49
|
"""
|
58
50
|
Perform a connection check for the stream.
|
@@ -90,7 +82,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
90
82
|
|
91
83
|
return True, None
|
92
84
|
|
93
|
-
def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
|
85
|
+
def _check_list_files(self, stream: "AbstractFileBasedStream") -> RemoteFile:
|
94
86
|
"""
|
95
87
|
Check that we can list files from the stream.
|
96
88
|
|
@@ -110,10 +102,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
110
102
|
return file
|
111
103
|
|
112
104
|
def _check_parse_record(
|
113
|
-
self,
|
114
|
-
stream: AbstractFileBasedStream,
|
115
|
-
file: RemoteFile,
|
116
|
-
logger: logging.Logger,
|
105
|
+
self, stream: "AbstractFileBasedStream", file: RemoteFile, logger: logging.Logger
|
117
106
|
) -> None:
|
118
107
|
parser = stream.get_parser()
|
119
108
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
6
|
+
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
import fastavro
|
9
9
|
|
@@ -64,20 +64,18 @@ class AvroParser(FileTypeParser):
|
|
64
64
|
raise ValueError(f"Expected ParquetFormat, got {avro_format}")
|
65
65
|
|
66
66
|
with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
|
67
|
-
avro_reader = fastavro.reader(fp)
|
67
|
+
avro_reader = fastavro.reader(fp)
|
68
68
|
avro_schema = avro_reader.writer_schema
|
69
|
-
if not avro_schema["type"] == "record":
|
70
|
-
unsupported_type = avro_schema["type"]
|
69
|
+
if not avro_schema["type"] == "record":
|
70
|
+
unsupported_type = avro_schema["type"]
|
71
71
|
raise ValueError(
|
72
72
|
f"Only record based avro files are supported. Found {unsupported_type}"
|
73
73
|
)
|
74
74
|
json_schema = {
|
75
|
-
field["name"]: AvroParser._convert_avro_type_to_json(
|
76
|
-
avro_format,
|
77
|
-
field["name"], # type: ignore [index]
|
78
|
-
field["type"], # type: ignore [index]
|
75
|
+
field["name"]: AvroParser._convert_avro_type_to_json(
|
76
|
+
avro_format, field["name"], field["type"]
|
79
77
|
)
|
80
|
-
for field in avro_schema["fields"]
|
78
|
+
for field in avro_schema["fields"]
|
81
79
|
}
|
82
80
|
return json_schema
|
83
81
|
|
@@ -182,19 +180,18 @@ class AvroParser(FileTypeParser):
|
|
182
180
|
line_no = 0
|
183
181
|
try:
|
184
182
|
with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
|
185
|
-
avro_reader = fastavro.reader(fp)
|
183
|
+
avro_reader = fastavro.reader(fp)
|
186
184
|
schema = avro_reader.writer_schema
|
187
185
|
schema_field_name_to_type = {
|
188
|
-
field["name"]:
|
189
|
-
for field in schema["fields"] # type: ignore [index, call-overload] # If schema is not dict, it is not subscriptable by strings
|
186
|
+
field["name"]: field["type"] for field in schema["fields"]
|
190
187
|
}
|
191
188
|
for record in avro_reader:
|
192
189
|
line_no += 1
|
193
190
|
yield {
|
194
191
|
record_field: self._to_output_value(
|
195
192
|
avro_format,
|
196
|
-
schema_field_name_to_type[record_field],
|
197
|
-
record[record_field],
|
193
|
+
schema_field_name_to_type[record_field],
|
194
|
+
record[record_field],
|
198
195
|
)
|
199
196
|
for record_field, record_value in schema_field_name_to_type.items()
|
200
197
|
}
|
@@ -12,7 +12,7 @@ from io import IOBase
|
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
|
13
13
|
from uuid import uuid4
|
14
14
|
|
15
|
-
import orjson
|
15
|
+
from orjson import orjson
|
16
16
|
|
17
17
|
from airbyte_cdk.models import FailureType
|
18
18
|
from airbyte_cdk.sources.file_based.config.csv_format import (
|
@@ -117,7 +117,7 @@ class _CsvReader:
|
|
117
117
|
"""
|
118
118
|
# Note that this method assumes the dialect has already been registered if we're parsing the headers
|
119
119
|
if isinstance(config_format.header_definition, CsvHeaderUserProvided):
|
120
|
-
return config_format.header_definition.column_names
|
120
|
+
return config_format.header_definition.column_names # type: ignore # should be CsvHeaderUserProvided given the type
|
121
121
|
|
122
122
|
if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
|
123
123
|
self._skip_rows(
|
@@ -229,7 +229,7 @@ class CsvParser(FileTypeParser):
|
|
229
229
|
if discovered_schema:
|
230
230
|
property_types = {
|
231
231
|
col: prop["type"] for col, prop in discovered_schema["properties"].items()
|
232
|
-
}
|
232
|
+
} # type: ignore # discovered_schema["properties"] is known to be a mapping
|
233
233
|
deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
|
234
234
|
else:
|
235
235
|
deduped_property_types = {}
|
@@ -7,10 +7,10 @@ from io import IOBase
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
9
9
|
|
10
|
-
import orjson
|
11
10
|
import pandas as pd
|
12
11
|
from numpy import datetime64, issubdtype
|
13
12
|
from numpy import dtype as dtype_
|
13
|
+
from orjson import orjson
|
14
14
|
from pydantic.v1 import BaseModel
|
15
15
|
|
16
16
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
@@ -69,11 +69,8 @@ class ExcelParser(FileTypeParser):
|
|
69
69
|
df = self.open_and_parse_file(fp)
|
70
70
|
for column, df_type in df.dtypes.items():
|
71
71
|
# Choose the broadest data type if the column's data type differs in dataframes
|
72
|
-
prev_frame_column_type = fields.get(column)
|
73
|
-
fields[column] = self.dtype_to_json_type(
|
74
|
-
prev_frame_column_type,
|
75
|
-
df_type,
|
76
|
-
)
|
72
|
+
prev_frame_column_type = fields.get(column)
|
73
|
+
fields[column] = self.dtype_to_json_type(prev_frame_column_type, df_type)
|
77
74
|
|
78
75
|
schema = {
|
79
76
|
field: (
|
@@ -139,10 +136,7 @@ class ExcelParser(FileTypeParser):
|
|
139
136
|
return FileReadMode.READ_BINARY
|
140
137
|
|
141
138
|
@staticmethod
|
142
|
-
def dtype_to_json_type(
|
143
|
-
current_type: Optional[str],
|
144
|
-
dtype: dtype_, # type: ignore [type-arg]
|
145
|
-
) -> str:
|
139
|
+
def dtype_to_json_type(current_type: Optional[str], dtype: dtype_) -> str:
|
146
140
|
"""
|
147
141
|
Convert Pandas DataFrame types to Airbyte Types.
|
148
142
|
|
@@ -193,4 +187,4 @@ class ExcelParser(FileTypeParser):
|
|
193
187
|
Returns:
|
194
188
|
pd.DataFrame: Parsed data from the Excel file.
|
195
189
|
"""
|
196
|
-
return pd.ExcelFile(fp, engine="calamine").parse()
|
190
|
+
return pd.ExcelFile(fp, engine="calamine").parse()
|
@@ -6,7 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
8
8
|
|
9
|
-
import orjson
|
9
|
+
from orjson import orjson
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
12
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cache, cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
|
-
from
|
9
|
+
from deprecated import deprecated
|
10
10
|
|
11
11
|
from airbyte_cdk import AirbyteMessage
|
12
12
|
from airbyte_cdk.models import SyncMode
|
@@ -179,7 +179,7 @@ class AbstractFileBasedStream(Stream):
|
|
179
179
|
)
|
180
180
|
|
181
181
|
@cached_property
|
182
|
-
@deprecated("
|
182
|
+
@deprecated(version="3.7.0")
|
183
183
|
def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
|
184
184
|
return self._availability_strategy
|
185
185
|
|
@@ -7,7 +7,7 @@ import logging
|
|
7
7
|
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
|
-
from
|
10
|
+
from deprecated.classic import deprecated
|
11
11
|
|
12
12
|
from airbyte_cdk.models import (
|
13
13
|
AirbyteLogMessage,
|
@@ -56,10 +56,7 @@ This module contains adapters to help enabling concurrency on File-based Stream
|
|
56
56
|
"""
|
57
57
|
|
58
58
|
|
59
|
-
@deprecated(
|
60
|
-
"This class is experimental. Use at your own risk.",
|
61
|
-
category=ExperimentalClassWarning,
|
62
|
-
)
|
59
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
63
60
|
class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream):
|
64
61
|
@classmethod
|
65
62
|
def create_from_stream(
|
@@ -146,7 +143,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
|
|
146
143
|
return self._legacy_stream.supports_incremental
|
147
144
|
|
148
145
|
@property
|
149
|
-
@deprecated("
|
146
|
+
@deprecated(version="3.7.0")
|
150
147
|
def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
|
151
148
|
return self._legacy_stream.availability_strategy
|
152
149
|
|
@@ -21,7 +21,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor):
|
|
21
21
|
CURSOR_FIELD = "_ab_source_file_last_modified"
|
22
22
|
|
23
23
|
def __init__(self, stream_config: FileBasedStreamConfig, **_: Any):
|
24
|
-
super().__init__(stream_config)
|
24
|
+
super().__init__(stream_config)
|
25
25
|
self._file_to_datetime_history: MutableMapping[str, str] = {}
|
26
26
|
self._time_window_if_history_is_full = timedelta(
|
27
27
|
days=stream_config.days_to_sync_if_history_is_full
|
@@ -14,7 +14,7 @@ def format_http_message(
|
|
14
14
|
title: str,
|
15
15
|
description: str,
|
16
16
|
stream_name: Optional[str],
|
17
|
-
is_auxiliary: bool
|
17
|
+
is_auxiliary: bool = None,
|
18
18
|
) -> LogMessage:
|
19
19
|
request = response.request
|
20
20
|
log_message = {
|
@@ -42,10 +42,10 @@ def format_http_message(
|
|
42
42
|
"url": {"full": request.url},
|
43
43
|
}
|
44
44
|
if is_auxiliary is not None:
|
45
|
-
log_message["http"]["is_auxiliary"] = is_auxiliary
|
45
|
+
log_message["http"]["is_auxiliary"] = is_auxiliary
|
46
46
|
if stream_name:
|
47
47
|
log_message["airbyte_cdk"] = {"stream": {"name": stream_name}}
|
48
|
-
return log_message
|
48
|
+
return log_message
|
49
49
|
|
50
50
|
|
51
51
|
def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]:
|
@@ -5,7 +5,7 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
-
from
|
8
|
+
from deprecated.classic import deprecated
|
9
9
|
|
10
10
|
from airbyte_cdk.models import AirbyteStream
|
11
11
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
@@ -14,10 +14,7 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
15
|
|
16
16
|
|
17
|
-
@deprecated(
|
18
|
-
"This class is experimental. Use at your own risk.",
|
19
|
-
category=ExperimentalClassWarning,
|
20
|
-
)
|
17
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
21
18
|
class AbstractStream(ABC):
|
22
19
|
"""
|
23
20
|
AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
|
@@ -8,7 +8,7 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from
|
11
|
+
from deprecated.classic import deprecated
|
12
12
|
|
13
13
|
from airbyte_cdk.models import (
|
14
14
|
AirbyteLogMessage,
|
@@ -50,10 +50,7 @@ This module contains adapters to help enabling concurrency on Stream objects wit
|
|
50
50
|
"""
|
51
51
|
|
52
52
|
|
53
|
-
@deprecated(
|
54
|
-
"This class is experimental. Use at your own risk.",
|
55
|
-
category=ExperimentalClassWarning,
|
56
|
-
)
|
53
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
57
54
|
class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
58
55
|
"""
|
59
56
|
The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream.
|
@@ -300,7 +297,7 @@ class StreamPartition(Partition):
|
|
300
297
|
yield Record(
|
301
298
|
data=data_to_return,
|
302
299
|
stream_name=self.stream_name(),
|
303
|
-
associated_slice=self._slice,
|
300
|
+
associated_slice=self._slice,
|
304
301
|
)
|
305
302
|
else:
|
306
303
|
self._message_repository.emit_message(record_data)
|
@@ -6,7 +6,7 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from
|
9
|
+
from deprecated.classic import deprecated
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
12
12
|
|
@@ -48,10 +48,7 @@ class StreamUnavailable(StreamAvailability):
|
|
48
48
|
STREAM_AVAILABLE = StreamAvailable()
|
49
49
|
|
50
50
|
|
51
|
-
@deprecated(
|
52
|
-
"This class is experimental. Use at your own risk.",
|
53
|
-
category=ExperimentalClassWarning,
|
54
|
-
)
|
51
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
55
52
|
class AbstractAvailabilityStrategy(ABC):
|
56
53
|
"""
|
57
54
|
AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK.
|
@@ -71,10 +68,7 @@ class AbstractAvailabilityStrategy(ABC):
|
|
71
68
|
"""
|
72
69
|
|
73
70
|
|
74
|
-
@deprecated(
|
75
|
-
"This class is experimental. Use at your own risk.",
|
76
|
-
category=ExperimentalClassWarning,
|
77
|
-
)
|
71
|
+
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
78
72
|
class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
|
79
73
|
"""
|
80
74
|
An availability strategy that always indicates a stream is available.
|
@@ -473,7 +473,7 @@ class ConcurrentCursor(Cursor):
|
|
473
473
|
:return: True if the record's cursor value falls within the sync boundaries
|
474
474
|
"""
|
475
475
|
try:
|
476
|
-
record_cursor_value: CursorValueType = self._extract_cursor_value(record)
|
476
|
+
record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
477
477
|
except ValueError:
|
478
478
|
self._log_for_record_without_cursor_value()
|
479
479
|
return True
|
@@ -141,7 +141,7 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
141
141
|
raise ValueError(
|
142
142
|
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
143
143
|
)
|
144
|
-
return dt_object
|
144
|
+
return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types
|
145
145
|
|
146
146
|
|
147
147
|
class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
@@ -178,7 +178,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
178
178
|
raise ValueError(
|
179
179
|
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
180
180
|
)
|
181
|
-
return dt_object
|
181
|
+
return dt_object # type: ignore # we are manually type checking because pendulum.parse may return different types
|
182
182
|
|
183
183
|
|
184
184
|
class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
|