airbyte-cdk 6.8.1rc9__py3-none-any.whl → 6.8.2.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +11 -5
- airbyte_cdk/config_observation.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -1
- airbyte_cdk/connector_builder/message_grouper.py +10 -10
- airbyte_cdk/destinations/destination.py +1 -1
- airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +12 -4
- airbyte_cdk/entrypoint.py +7 -6
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/sources/abstract_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -1
- airbyte_cdk/sources/connector_state_manager.py +9 -4
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +6 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +76 -28
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +10 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +16 -17
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +4 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +3 -5
- airbyte_cdk/sources/declarative/incremental/__init__.py +3 -0
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +270 -0
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +8 -6
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +9 -0
- airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
- airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +71 -17
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +13 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +8 -6
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +5 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
- airbyte_cdk/sources/declarative/spec/spec.py +1 -1
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +0 -1
- airbyte_cdk/sources/embedded/base_integration.py +3 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +18 -7
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +14 -11
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +11 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +3 -3
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +5 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +10 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- airbyte_cdk/sources/streams/core.py +17 -14
- airbyte_cdk/sources/streams/http/http.py +19 -19
- airbyte_cdk/sources/streams/http/http_client.py +4 -48
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +62 -33
- airbyte_cdk/sources/utils/record_helper.py +1 -1
- airbyte_cdk/sources/utils/schema_helpers.py +1 -1
- airbyte_cdk/sources/utils/transform.py +34 -15
- airbyte_cdk/test/entrypoint_wrapper.py +11 -6
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/message_utils.py +4 -3
- airbyte_cdk/utils/spec_schema_transformations.py +3 -2
- airbyte_cdk/utils/traced_exception.py +14 -12
- airbyte_cdk-6.8.2.dev1.dist-info/METADATA +111 -0
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/RECORD +72 -71
- airbyte_cdk-6.8.1rc9.dist-info/METADATA +0 -307
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.8.1rc9.dist-info → airbyte_cdk-6.8.2.dev1.dist-info}/entry_points.txt +0 -0
@@ -130,11 +130,11 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
130
130
|
if value:
|
131
131
|
params.update(
|
132
132
|
{
|
133
|
-
parent_config.request_option.field_name.eval(
|
133
|
+
parent_config.request_option.field_name.eval( # type: ignore [union-attr]
|
134
134
|
config=self.config
|
135
135
|
): value
|
136
136
|
}
|
137
|
-
)
|
137
|
+
)
|
138
138
|
return params
|
139
139
|
|
140
140
|
def stream_slices(self) -> Iterable[StreamSlice]:
|
@@ -162,9 +162,9 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
162
162
|
extra_fields = None
|
163
163
|
if parent_stream_config.extra_fields:
|
164
164
|
extra_fields = [
|
165
|
-
[field_path_part.eval(self.config) for field_path_part in field_path]
|
165
|
+
[field_path_part.eval(self.config) for field_path_part in field_path] # type: ignore [union-attr]
|
166
166
|
for field_path in parent_stream_config.extra_fields
|
167
|
-
]
|
167
|
+
]
|
168
168
|
|
169
169
|
# read_stateless() assumes the parent is not concurrent. This is currently okay since the concurrent CDK does
|
170
170
|
# not support either substreams or RFR, but something that needs to be considered once we do
|
@@ -192,7 +192,10 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
192
192
|
message=f"Parent stream returned records as invalid type {type(parent_record)}"
|
193
193
|
)
|
194
194
|
try:
|
195
|
-
partition_value = dpath.get(
|
195
|
+
partition_value = dpath.get(
|
196
|
+
parent_record, # type: ignore [arg-type]
|
197
|
+
parent_field,
|
198
|
+
)
|
196
199
|
except KeyError:
|
197
200
|
continue
|
198
201
|
|
@@ -228,7 +231,10 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
228
231
|
if extra_fields:
|
229
232
|
for extra_field_path in extra_fields:
|
230
233
|
try:
|
231
|
-
extra_field_value = dpath.get(
|
234
|
+
extra_field_value = dpath.get(
|
235
|
+
parent_record, # type: ignore [arg-type]
|
236
|
+
extra_field_path,
|
237
|
+
)
|
232
238
|
self.logger.debug(
|
233
239
|
f"Extracted extra_field_path: {extra_field_path} with value: {extra_field_value}"
|
234
240
|
)
|
@@ -291,7 +297,7 @@ class SubstreamPartitionRouter(PartitionRouter):
|
|
291
297
|
if not parent_state and incremental_dependency:
|
292
298
|
# Attempt to retrieve child state
|
293
299
|
substream_state = list(stream_state.values())
|
294
|
-
substream_state = substream_state[0] if substream_state else {}
|
300
|
+
substream_state = substream_state[0] if substream_state else {} # type: ignore [assignment] # Incorrect type for assignment
|
295
301
|
parent_state = {}
|
296
302
|
|
297
303
|
# Copy child state to parent streams with incremental dependencies
|
@@ -141,7 +141,7 @@ class DefaultErrorHandler(ErrorHandler):
|
|
141
141
|
for backoff_strategy in self.backoff_strategies:
|
142
142
|
backoff = backoff_strategy.backoff_time(
|
143
143
|
response_or_exception=response_or_exception, attempt_count=attempt_count
|
144
|
-
)
|
144
|
+
)
|
145
145
|
if backoff:
|
146
146
|
return backoff
|
147
147
|
return backoff
|
@@ -151,21 +151,23 @@ class HttpResponseFilter:
|
|
151
151
|
:param response: The HTTP response which can be used during interpolation
|
152
152
|
:return: The evaluated error message string to be emitted
|
153
153
|
"""
|
154
|
-
return self.error_message.eval(
|
154
|
+
return self.error_message.eval( # type: ignore [no-any-return, union-attr]
|
155
155
|
self.config, response=self._safe_response_json(response), headers=response.headers
|
156
|
-
)
|
156
|
+
)
|
157
157
|
|
158
158
|
def _response_matches_predicate(self, response: requests.Response) -> bool:
|
159
159
|
return (
|
160
160
|
bool(
|
161
|
-
self.predicate.condition
|
162
|
-
and self.predicate.eval(
|
163
|
-
None,
|
161
|
+
self.predicate.condition # type: ignore [union-attr]
|
162
|
+
and self.predicate.eval( # type: ignore [union-attr]
|
163
|
+
None, # type: ignore [arg-type]
|
164
|
+
response=self._safe_response_json(response),
|
165
|
+
headers=response.headers,
|
164
166
|
)
|
165
167
|
)
|
166
168
|
if self.predicate
|
167
169
|
else False
|
168
|
-
)
|
170
|
+
)
|
169
171
|
|
170
172
|
def _response_contains_error_message(self, response: requests.Response) -> bool:
|
171
173
|
if not self.error_message_contains:
|
@@ -194,7 +194,7 @@ class DefaultPaginator(Paginator):
|
|
194
194
|
and self.pagination_strategy.get_page_size()
|
195
195
|
and self.page_size_option.inject_into == option_type
|
196
196
|
):
|
197
|
-
options[self.page_size_option.field_name.eval(config=self.config)] = (
|
197
|
+
options[self.page_size_option.field_name.eval(config=self.config)] = ( # type: ignore [union-attr]
|
198
198
|
self.pagination_strategy.get_page_size()
|
199
199
|
) # type: ignore # field_name is always cast to an interpolated string
|
200
200
|
return options
|
@@ -85,7 +85,7 @@ class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
|
|
85
85
|
self._partition_field_start.eval(self.config)
|
86
86
|
)
|
87
87
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
88
|
-
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
|
88
|
+
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get( # type: ignore [union-attr]
|
89
89
|
self._partition_field_end.eval(self.config)
|
90
|
-
)
|
90
|
+
)
|
91
91
|
return options
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
6
|
from typing import Any, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
|
-
from
|
8
|
+
from typing_extensions import deprecated
|
9
9
|
|
10
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
|
11
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
|
@@ -4,7 +4,7 @@
|
|
4
4
|
from dataclasses import InitVar, dataclass, field
|
5
5
|
from typing import Any, Callable, Iterable, Mapping, Optional
|
6
6
|
|
7
|
-
from
|
7
|
+
from typing_extensions import deprecated
|
8
8
|
|
9
9
|
from airbyte_cdk.models import FailureType
|
10
10
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
@@ -21,7 +21,10 @@ from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
|
21
21
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
22
22
|
|
23
23
|
|
24
|
-
@deprecated(
|
24
|
+
@deprecated(
|
25
|
+
"This class is experimental. Use at your own risk.",
|
26
|
+
category=ExperimentalClassWarning,
|
27
|
+
)
|
25
28
|
@dataclass
|
26
29
|
class AsyncRetriever(Retriever):
|
27
30
|
config: Config
|
@@ -178,7 +178,7 @@ class SimpleRetriever(Retriever):
|
|
178
178
|
stream_slice,
|
179
179
|
next_page_token,
|
180
180
|
self._paginator.get_request_headers,
|
181
|
-
self.
|
181
|
+
self.request_option_provider.get_request_headers,
|
182
182
|
)
|
183
183
|
if isinstance(headers, str):
|
184
184
|
raise ValueError("Request headers cannot be a string")
|
@@ -52,8 +52,9 @@ class BaseEmbeddedIntegration(ABC, Generic[TConfig, TOutput]):
|
|
52
52
|
for message in self.source.read(self.config, configured_catalog, state):
|
53
53
|
if message.type == Type.RECORD:
|
54
54
|
output = self._handle_record(
|
55
|
-
message.record,
|
56
|
-
|
55
|
+
message.record,
|
56
|
+
get_defined_id(stream, message.record.data), # type: ignore[union-attr, arg-type]
|
57
|
+
)
|
57
58
|
if output:
|
58
59
|
yield output
|
59
60
|
elif message.type is Type.STATE and message.state:
|
airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import logging
|
6
8
|
from abc import abstractmethod
|
7
9
|
from typing import TYPE_CHECKING, Optional, Tuple
|
@@ -22,8 +24,11 @@ if TYPE_CHECKING:
|
|
22
24
|
|
23
25
|
class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
24
26
|
@abstractmethod
|
25
|
-
def check_availability(
|
26
|
-
self,
|
27
|
+
def check_availability( # type: ignore[override] # Signature doesn't match base class
|
28
|
+
self,
|
29
|
+
stream: Stream,
|
30
|
+
logger: logging.Logger,
|
31
|
+
_: Optional[Source],
|
27
32
|
) -> Tuple[bool, Optional[str]]:
|
28
33
|
"""
|
29
34
|
Perform a connection check for the stream.
|
@@ -34,7 +39,10 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
|
34
39
|
|
35
40
|
@abstractmethod
|
36
41
|
def check_availability_and_parsability(
|
37
|
-
self,
|
42
|
+
self,
|
43
|
+
stream: AbstractFileBasedStream,
|
44
|
+
logger: logging.Logger,
|
45
|
+
_: Optional[Source],
|
38
46
|
) -> Tuple[bool, Optional[str]]:
|
39
47
|
"""
|
40
48
|
Performs a connection check for the stream, as well as additional checks that
|
@@ -46,7 +54,7 @@ class AbstractFileBasedAvailabilityStrategy(AvailabilityStrategy):
|
|
46
54
|
|
47
55
|
|
48
56
|
class AbstractFileBasedAvailabilityStrategyWrapper(AbstractAvailabilityStrategy):
|
49
|
-
def __init__(self, stream:
|
57
|
+
def __init__(self, stream: AbstractFileBasedStream) -> None:
|
50
58
|
self.stream = stream
|
51
59
|
|
52
60
|
def check_availability(self, logger: logging.Logger) -> StreamAvailability:
|
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import logging
|
6
8
|
import traceback
|
7
9
|
from typing import TYPE_CHECKING, Optional, Tuple
|
@@ -25,12 +27,15 @@ if TYPE_CHECKING:
|
|
25
27
|
|
26
28
|
|
27
29
|
class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
|
28
|
-
def __init__(self, stream_reader: AbstractFileBasedStreamReader):
|
30
|
+
def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None:
|
29
31
|
self.stream_reader = stream_reader
|
30
32
|
|
31
|
-
def check_availability(
|
32
|
-
self,
|
33
|
-
|
33
|
+
def check_availability( # type: ignore[override] # Signature doesn't match base class
|
34
|
+
self,
|
35
|
+
stream: AbstractFileBasedStream,
|
36
|
+
logger: logging.Logger,
|
37
|
+
_: Optional[Source],
|
38
|
+
) -> Tuple[bool, Optional[str]]:
|
34
39
|
"""
|
35
40
|
Perform a connection check for the stream (verify that we can list files from the stream).
|
36
41
|
|
@@ -44,7 +49,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
44
49
|
return True, None
|
45
50
|
|
46
51
|
def check_availability_and_parsability(
|
47
|
-
self,
|
52
|
+
self,
|
53
|
+
stream: AbstractFileBasedStream,
|
54
|
+
logger: logging.Logger,
|
55
|
+
_: Optional[Source],
|
48
56
|
) -> Tuple[bool, Optional[str]]:
|
49
57
|
"""
|
50
58
|
Perform a connection check for the stream.
|
@@ -82,7 +90,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
82
90
|
|
83
91
|
return True, None
|
84
92
|
|
85
|
-
def _check_list_files(self, stream:
|
93
|
+
def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
|
86
94
|
"""
|
87
95
|
Check that we can list files from the stream.
|
88
96
|
|
@@ -102,7 +110,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
102
110
|
return file
|
103
111
|
|
104
112
|
def _check_parse_record(
|
105
|
-
self,
|
113
|
+
self,
|
114
|
+
stream: AbstractFileBasedStream,
|
115
|
+
file: RemoteFile,
|
116
|
+
logger: logging.Logger,
|
106
117
|
) -> None:
|
107
118
|
parser = stream.get_parser()
|
108
119
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
6
|
+
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, cast
|
7
7
|
|
8
8
|
import fastavro
|
9
9
|
|
@@ -64,18 +64,20 @@ class AvroParser(FileTypeParser):
|
|
64
64
|
raise ValueError(f"Expected ParquetFormat, got {avro_format}")
|
65
65
|
|
66
66
|
with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
|
67
|
-
avro_reader = fastavro.reader(fp)
|
67
|
+
avro_reader = fastavro.reader(fp) # type: ignore [arg-type]
|
68
68
|
avro_schema = avro_reader.writer_schema
|
69
|
-
if not avro_schema["type"] == "record":
|
70
|
-
unsupported_type = avro_schema["type"]
|
69
|
+
if not avro_schema["type"] == "record": # type: ignore [index, call-overload]
|
70
|
+
unsupported_type = avro_schema["type"] # type: ignore [index, call-overload]
|
71
71
|
raise ValueError(
|
72
72
|
f"Only record based avro files are supported. Found {unsupported_type}"
|
73
73
|
)
|
74
74
|
json_schema = {
|
75
|
-
field["name"]: AvroParser._convert_avro_type_to_json(
|
76
|
-
avro_format,
|
75
|
+
field["name"]: AvroParser._convert_avro_type_to_json( # type: ignore [index]
|
76
|
+
avro_format,
|
77
|
+
field["name"], # type: ignore [index]
|
78
|
+
field["type"], # type: ignore [index]
|
77
79
|
)
|
78
|
-
for field in avro_schema["fields"]
|
80
|
+
for field in avro_schema["fields"] # type: ignore [index, call-overload]
|
79
81
|
}
|
80
82
|
return json_schema
|
81
83
|
|
@@ -180,18 +182,19 @@ class AvroParser(FileTypeParser):
|
|
180
182
|
line_no = 0
|
181
183
|
try:
|
182
184
|
with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
|
183
|
-
avro_reader = fastavro.reader(fp)
|
185
|
+
avro_reader = fastavro.reader(fp) # type: ignore [arg-type]
|
184
186
|
schema = avro_reader.writer_schema
|
185
187
|
schema_field_name_to_type = {
|
186
|
-
field["name"]: field["type"]
|
188
|
+
field["name"]: cast(dict[str, Any], field["type"]) # type: ignore [index]
|
189
|
+
for field in schema["fields"] # type: ignore [index, call-overload] # If schema is not dict, it is not subscriptable by strings
|
187
190
|
}
|
188
191
|
for record in avro_reader:
|
189
192
|
line_no += 1
|
190
193
|
yield {
|
191
194
|
record_field: self._to_output_value(
|
192
195
|
avro_format,
|
193
|
-
schema_field_name_to_type[record_field],
|
194
|
-
record[record_field],
|
196
|
+
schema_field_name_to_type[record_field], # type: ignore [index] # Any not subscriptable
|
197
|
+
record[record_field], # type: ignore [index] # Any not subscriptable
|
195
198
|
)
|
196
199
|
for record_field, record_value in schema_field_name_to_type.items()
|
197
200
|
}
|
@@ -12,7 +12,7 @@ from io import IOBase
|
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
|
13
13
|
from uuid import uuid4
|
14
14
|
|
15
|
-
|
15
|
+
import orjson
|
16
16
|
|
17
17
|
from airbyte_cdk.models import FailureType
|
18
18
|
from airbyte_cdk.sources.file_based.config.csv_format import (
|
@@ -117,7 +117,7 @@ class _CsvReader:
|
|
117
117
|
"""
|
118
118
|
# Note that this method assumes the dialect has already been registered if we're parsing the headers
|
119
119
|
if isinstance(config_format.header_definition, CsvHeaderUserProvided):
|
120
|
-
return config_format.header_definition.column_names
|
120
|
+
return config_format.header_definition.column_names
|
121
121
|
|
122
122
|
if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
|
123
123
|
self._skip_rows(
|
@@ -229,7 +229,7 @@ class CsvParser(FileTypeParser):
|
|
229
229
|
if discovered_schema:
|
230
230
|
property_types = {
|
231
231
|
col: prop["type"] for col, prop in discovered_schema["properties"].items()
|
232
|
-
}
|
232
|
+
}
|
233
233
|
deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
|
234
234
|
else:
|
235
235
|
deduped_property_types = {}
|
@@ -7,10 +7,10 @@ from io import IOBase
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
9
9
|
|
10
|
+
import orjson
|
10
11
|
import pandas as pd
|
11
12
|
from numpy import datetime64, issubdtype
|
12
13
|
from numpy import dtype as dtype_
|
13
|
-
from orjson import orjson
|
14
14
|
from pydantic.v1 import BaseModel
|
15
15
|
|
16
16
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
@@ -69,8 +69,11 @@ class ExcelParser(FileTypeParser):
|
|
69
69
|
df = self.open_and_parse_file(fp)
|
70
70
|
for column, df_type in df.dtypes.items():
|
71
71
|
# Choose the broadest data type if the column's data type differs in dataframes
|
72
|
-
prev_frame_column_type = fields.get(column)
|
73
|
-
fields[column] = self.dtype_to_json_type(
|
72
|
+
prev_frame_column_type = fields.get(column) # type: ignore [call-overload]
|
73
|
+
fields[column] = self.dtype_to_json_type( # type: ignore [index]
|
74
|
+
prev_frame_column_type,
|
75
|
+
df_type,
|
76
|
+
)
|
74
77
|
|
75
78
|
schema = {
|
76
79
|
field: (
|
@@ -136,7 +139,10 @@ class ExcelParser(FileTypeParser):
|
|
136
139
|
return FileReadMode.READ_BINARY
|
137
140
|
|
138
141
|
@staticmethod
|
139
|
-
def dtype_to_json_type(
|
142
|
+
def dtype_to_json_type(
|
143
|
+
current_type: Optional[str],
|
144
|
+
dtype: dtype_, # type: ignore [type-arg]
|
145
|
+
) -> str:
|
140
146
|
"""
|
141
147
|
Convert Pandas DataFrame types to Airbyte Types.
|
142
148
|
|
@@ -187,4 +193,4 @@ class ExcelParser(FileTypeParser):
|
|
187
193
|
Returns:
|
188
194
|
pd.DataFrame: Parsed data from the Excel file.
|
189
195
|
"""
|
190
|
-
return pd.ExcelFile(fp, engine="calamine").parse()
|
196
|
+
return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type, call-overload, no-any-return]
|
@@ -6,7 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
8
8
|
|
9
|
-
|
9
|
+
import orjson
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
12
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cache, cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
|
-
from
|
9
|
+
from typing_extensions import deprecated
|
10
10
|
|
11
11
|
from airbyte_cdk import AirbyteMessage
|
12
12
|
from airbyte_cdk.models import SyncMode
|
@@ -179,7 +179,7 @@ class AbstractFileBasedStream(Stream):
|
|
179
179
|
)
|
180
180
|
|
181
181
|
@cached_property
|
182
|
-
@deprecated(version
|
182
|
+
@deprecated("Deprecated as of CDK version 3.7.0.")
|
183
183
|
def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
|
184
184
|
return self._availability_strategy
|
185
185
|
|
@@ -7,7 +7,7 @@ import logging
|
|
7
7
|
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
|
-
from
|
10
|
+
from typing_extensions import deprecated
|
11
11
|
|
12
12
|
from airbyte_cdk.models import (
|
13
13
|
AirbyteLogMessage,
|
@@ -56,7 +56,10 @@ This module contains adapters to help enabling concurrency on File-based Stream
|
|
56
56
|
"""
|
57
57
|
|
58
58
|
|
59
|
-
@deprecated(
|
59
|
+
@deprecated(
|
60
|
+
"This class is experimental. Use at your own risk.",
|
61
|
+
category=ExperimentalClassWarning,
|
62
|
+
)
|
60
63
|
class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream):
|
61
64
|
@classmethod
|
62
65
|
def create_from_stream(
|
@@ -143,7 +146,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
|
|
143
146
|
return self._legacy_stream.supports_incremental
|
144
147
|
|
145
148
|
@property
|
146
|
-
@deprecated(version
|
149
|
+
@deprecated("Deprecated as of CDK version 3.7.0.")
|
147
150
|
def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
|
148
151
|
return self._legacy_stream.availability_strategy
|
149
152
|
|
@@ -21,7 +21,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor):
|
|
21
21
|
CURSOR_FIELD = "_ab_source_file_last_modified"
|
22
22
|
|
23
23
|
def __init__(self, stream_config: FileBasedStreamConfig, **_: Any):
|
24
|
-
super().__init__(stream_config)
|
24
|
+
super().__init__(stream_config) # type: ignore [safe-super]
|
25
25
|
self._file_to_datetime_history: MutableMapping[str, str] = {}
|
26
26
|
self._time_window_if_history_is_full = timedelta(
|
27
27
|
days=stream_config.days_to_sync_if_history_is_full
|
@@ -14,7 +14,7 @@ def format_http_message(
|
|
14
14
|
title: str,
|
15
15
|
description: str,
|
16
16
|
stream_name: Optional[str],
|
17
|
-
is_auxiliary: bool = None,
|
17
|
+
is_auxiliary: bool | None = None,
|
18
18
|
) -> LogMessage:
|
19
19
|
request = response.request
|
20
20
|
log_message = {
|
@@ -42,10 +42,10 @@ def format_http_message(
|
|
42
42
|
"url": {"full": request.url},
|
43
43
|
}
|
44
44
|
if is_auxiliary is not None:
|
45
|
-
log_message["http"]["is_auxiliary"] = is_auxiliary
|
45
|
+
log_message["http"]["is_auxiliary"] = is_auxiliary # type: ignore [index]
|
46
46
|
if stream_name:
|
47
47
|
log_message["airbyte_cdk"] = {"stream": {"name": stream_name}}
|
48
|
-
return log_message
|
48
|
+
return log_message # type: ignore [return-value] # got "dict[str, object]", expected "dict[str, JsonType]"
|
49
49
|
|
50
50
|
|
51
51
|
def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]:
|
@@ -5,7 +5,7 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
-
from
|
8
|
+
from typing_extensions import deprecated
|
9
9
|
|
10
10
|
from airbyte_cdk.models import AirbyteStream
|
11
11
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
@@ -14,7 +14,10 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
15
|
|
16
16
|
|
17
|
-
@deprecated(
|
17
|
+
@deprecated(
|
18
|
+
"This class is experimental. Use at your own risk.",
|
19
|
+
category=ExperimentalClassWarning,
|
20
|
+
)
|
18
21
|
class AbstractStream(ABC):
|
19
22
|
"""
|
20
23
|
AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
|
@@ -8,7 +8,7 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from
|
11
|
+
from typing_extensions import deprecated
|
12
12
|
|
13
13
|
from airbyte_cdk.models import (
|
14
14
|
AirbyteLogMessage,
|
@@ -50,7 +50,10 @@ This module contains adapters to help enabling concurrency on Stream objects wit
|
|
50
50
|
"""
|
51
51
|
|
52
52
|
|
53
|
-
@deprecated(
|
53
|
+
@deprecated(
|
54
|
+
"This class is experimental. Use at your own risk.",
|
55
|
+
category=ExperimentalClassWarning,
|
56
|
+
)
|
54
57
|
class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
55
58
|
"""
|
56
59
|
The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream.
|
@@ -297,7 +300,7 @@ class StreamPartition(Partition):
|
|
297
300
|
yield Record(
|
298
301
|
data=data_to_return,
|
299
302
|
stream_name=self.stream_name(),
|
300
|
-
associated_slice=self._slice,
|
303
|
+
associated_slice=self._slice, # type: ignore [arg-type]
|
301
304
|
)
|
302
305
|
else:
|
303
306
|
self._message_repository.emit_message(record_data)
|
@@ -6,7 +6,7 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from
|
9
|
+
from typing_extensions import deprecated
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
12
12
|
|
@@ -48,7 +48,10 @@ class StreamUnavailable(StreamAvailability):
|
|
48
48
|
STREAM_AVAILABLE = StreamAvailable()
|
49
49
|
|
50
50
|
|
51
|
-
@deprecated(
|
51
|
+
@deprecated(
|
52
|
+
"This class is experimental. Use at your own risk.",
|
53
|
+
category=ExperimentalClassWarning,
|
54
|
+
)
|
52
55
|
class AbstractAvailabilityStrategy(ABC):
|
53
56
|
"""
|
54
57
|
AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK.
|
@@ -68,7 +71,10 @@ class AbstractAvailabilityStrategy(ABC):
|
|
68
71
|
"""
|
69
72
|
|
70
73
|
|
71
|
-
@deprecated(
|
74
|
+
@deprecated(
|
75
|
+
"This class is experimental. Use at your own risk.",
|
76
|
+
category=ExperimentalClassWarning,
|
77
|
+
)
|
72
78
|
class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
|
73
79
|
"""
|
74
80
|
An availability strategy that always indicates a stream is available.
|
@@ -240,6 +240,15 @@ class ConcurrentCursor(Cursor):
|
|
240
240
|
def _extract_cursor_value(self, record: Record) -> Any:
|
241
241
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
242
242
|
|
243
|
+
def close_partition_without_emit(self, partition: Partition) -> None:
|
244
|
+
slice_count_before = len(self.state.get("slices", []))
|
245
|
+
self._add_slice_to_state(partition)
|
246
|
+
if slice_count_before < len(
|
247
|
+
self.state["slices"]
|
248
|
+
): # only emit if at least one slice has been processed
|
249
|
+
self._merge_partitions()
|
250
|
+
self._has_closed_at_least_one_slice = True
|
251
|
+
|
243
252
|
def close_partition(self, partition: Partition) -> None:
|
244
253
|
slice_count_before = len(self.state.get("slices", []))
|
245
254
|
self._add_slice_to_state(partition)
|
@@ -473,7 +482,7 @@ class ConcurrentCursor(Cursor):
|
|
473
482
|
:return: True if the record's cursor value falls within the sync boundaries
|
474
483
|
"""
|
475
484
|
try:
|
476
|
-
record_cursor_value: CursorValueType = self._extract_cursor_value(record)
|
485
|
+
record_cursor_value: CursorValueType = self._extract_cursor_value(record)
|
477
486
|
except ValueError:
|
478
487
|
self._log_for_record_without_cursor_value()
|
479
488
|
return True
|
@@ -141,7 +141,7 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
141
141
|
raise ValueError(
|
142
142
|
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
143
143
|
)
|
144
|
-
return dt_object
|
144
|
+
return dt_object
|
145
145
|
|
146
146
|
|
147
147
|
class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
@@ -178,7 +178,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
178
178
|
raise ValueError(
|
179
179
|
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
180
180
|
)
|
181
|
-
return dt_object
|
181
|
+
return dt_object
|
182
182
|
|
183
183
|
|
184
184
|
class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
|