airbyte-cdk 6.8.1rc8__py3-none-any.whl → 6.8.1rc10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +11 -5
- airbyte_cdk/config_observation.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -1
- airbyte_cdk/connector_builder/message_grouper.py +10 -10
- airbyte_cdk/destinations/destination.py +1 -1
- airbyte_cdk/destinations/vector_db_based/embedder.py +2 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +12 -4
- airbyte_cdk/entrypoint.py +7 -6
- airbyte_cdk/logger.py +2 -2
- airbyte_cdk/sources/abstract_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -1
- airbyte_cdk/sources/connector_state_manager.py +9 -4
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +6 -1
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -42
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +10 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +116 -19
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +4 -1
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +8 -6
- airbyte_cdk/sources/declarative/interpolation/jinja.py +35 -36
- airbyte_cdk/sources/declarative/interpolation/macros.py +1 -1
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +53 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +95 -2
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +6 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +100 -27
- airbyte_cdk/sources/declarative/partition_routers/__init__.py +2 -1
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +13 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +8 -6
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -1
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +2 -2
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -1
- airbyte_cdk/sources/declarative/resolvers/__init__.py +13 -0
- airbyte_cdk/sources/declarative/resolvers/components_resolver.py +55 -0
- airbyte_cdk/sources/declarative/resolvers/http_components_resolver.py +106 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +5 -2
- airbyte_cdk/sources/declarative/spec/spec.py +1 -1
- airbyte_cdk/sources/embedded/base_integration.py +3 -2
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +18 -7
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +14 -11
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +3 -3
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +11 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -1
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +3 -3
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +5 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +6 -3
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/concurrent/cursor.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +2 -2
- airbyte_cdk/sources/streams/core.py +17 -14
- airbyte_cdk/sources/streams/http/http.py +19 -19
- airbyte_cdk/sources/streams/http/http_client.py +4 -48
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_token.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +62 -33
- airbyte_cdk/sources/utils/record_helper.py +1 -1
- airbyte_cdk/sources/utils/schema_helpers.py +1 -1
- airbyte_cdk/sources/utils/transform.py +34 -15
- airbyte_cdk/test/entrypoint_wrapper.py +11 -6
- airbyte_cdk/test/mock_http/response_builder.py +1 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +1 -1
- airbyte_cdk/utils/event_timing.py +10 -10
- airbyte_cdk/utils/message_utils.py +4 -3
- airbyte_cdk/utils/spec_schema_transformations.py +3 -2
- airbyte_cdk/utils/traced_exception.py +14 -12
- airbyte_cdk-6.8.1rc10.dist-info/METADATA +111 -0
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/RECORD +73 -70
- airbyte_cdk-6.8.1rc8.dist-info/METADATA +0 -307
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.8.1rc8.dist-info → airbyte_cdk-6.8.1rc10.dist-info}/entry_points.txt +0 -0
airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py
CHANGED
@@ -2,6 +2,8 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
from __future__ import annotations
|
6
|
+
|
5
7
|
import logging
|
6
8
|
import traceback
|
7
9
|
from typing import TYPE_CHECKING, Optional, Tuple
|
@@ -25,12 +27,15 @@ if TYPE_CHECKING:
|
|
25
27
|
|
26
28
|
|
27
29
|
class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy):
|
28
|
-
def __init__(self, stream_reader: AbstractFileBasedStreamReader):
|
30
|
+
def __init__(self, stream_reader: AbstractFileBasedStreamReader) -> None:
|
29
31
|
self.stream_reader = stream_reader
|
30
32
|
|
31
|
-
def check_availability(
|
32
|
-
self,
|
33
|
-
|
33
|
+
def check_availability( # type: ignore[override] # Signature doesn't match base class
|
34
|
+
self,
|
35
|
+
stream: AbstractFileBasedStream,
|
36
|
+
logger: logging.Logger,
|
37
|
+
_: Optional[Source],
|
38
|
+
) -> Tuple[bool, Optional[str]]:
|
34
39
|
"""
|
35
40
|
Perform a connection check for the stream (verify that we can list files from the stream).
|
36
41
|
|
@@ -44,7 +49,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
44
49
|
return True, None
|
45
50
|
|
46
51
|
def check_availability_and_parsability(
|
47
|
-
self,
|
52
|
+
self,
|
53
|
+
stream: AbstractFileBasedStream,
|
54
|
+
logger: logging.Logger,
|
55
|
+
_: Optional[Source],
|
48
56
|
) -> Tuple[bool, Optional[str]]:
|
49
57
|
"""
|
50
58
|
Perform a connection check for the stream.
|
@@ -82,7 +90,7 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
82
90
|
|
83
91
|
return True, None
|
84
92
|
|
85
|
-
def _check_list_files(self, stream:
|
93
|
+
def _check_list_files(self, stream: AbstractFileBasedStream) -> RemoteFile:
|
86
94
|
"""
|
87
95
|
Check that we can list files from the stream.
|
88
96
|
|
@@ -102,7 +110,10 @@ class DefaultFileBasedAvailabilityStrategy(AbstractFileBasedAvailabilityStrategy
|
|
102
110
|
return file
|
103
111
|
|
104
112
|
def _check_parse_record(
|
105
|
-
self,
|
113
|
+
self,
|
114
|
+
stream: AbstractFileBasedStream,
|
115
|
+
file: RemoteFile,
|
116
|
+
logger: logging.Logger,
|
106
117
|
) -> None:
|
107
118
|
parser = stream.get_parser()
|
108
119
|
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
6
|
+
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, cast
|
7
7
|
|
8
8
|
import fastavro
|
9
9
|
|
@@ -64,18 +64,20 @@ class AvroParser(FileTypeParser):
|
|
64
64
|
raise ValueError(f"Expected ParquetFormat, got {avro_format}")
|
65
65
|
|
66
66
|
with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
|
67
|
-
avro_reader = fastavro.reader(fp)
|
67
|
+
avro_reader = fastavro.reader(fp) # type: ignore [arg-type]
|
68
68
|
avro_schema = avro_reader.writer_schema
|
69
|
-
if not avro_schema["type"] == "record":
|
70
|
-
unsupported_type = avro_schema["type"]
|
69
|
+
if not avro_schema["type"] == "record": # type: ignore [index, call-overload]
|
70
|
+
unsupported_type = avro_schema["type"] # type: ignore [index, call-overload]
|
71
71
|
raise ValueError(
|
72
72
|
f"Only record based avro files are supported. Found {unsupported_type}"
|
73
73
|
)
|
74
74
|
json_schema = {
|
75
|
-
field["name"]: AvroParser._convert_avro_type_to_json(
|
76
|
-
avro_format,
|
75
|
+
field["name"]: AvroParser._convert_avro_type_to_json( # type: ignore [index]
|
76
|
+
avro_format,
|
77
|
+
field["name"], # type: ignore [index]
|
78
|
+
field["type"], # type: ignore [index]
|
77
79
|
)
|
78
|
-
for field in avro_schema["fields"]
|
80
|
+
for field in avro_schema["fields"] # type: ignore [index, call-overload]
|
79
81
|
}
|
80
82
|
return json_schema
|
81
83
|
|
@@ -180,18 +182,19 @@ class AvroParser(FileTypeParser):
|
|
180
182
|
line_no = 0
|
181
183
|
try:
|
182
184
|
with stream_reader.open_file(file, self.file_read_mode, self.ENCODING, logger) as fp:
|
183
|
-
avro_reader = fastavro.reader(fp)
|
185
|
+
avro_reader = fastavro.reader(fp) # type: ignore [arg-type]
|
184
186
|
schema = avro_reader.writer_schema
|
185
187
|
schema_field_name_to_type = {
|
186
|
-
field["name"]: field["type"]
|
188
|
+
field["name"]: cast(dict[str, Any], field["type"]) # type: ignore [index]
|
189
|
+
for field in schema["fields"] # type: ignore [index, call-overload] # If schema is not dict, it is not subscriptable by strings
|
187
190
|
}
|
188
191
|
for record in avro_reader:
|
189
192
|
line_no += 1
|
190
193
|
yield {
|
191
194
|
record_field: self._to_output_value(
|
192
195
|
avro_format,
|
193
|
-
schema_field_name_to_type[record_field],
|
194
|
-
record[record_field],
|
196
|
+
schema_field_name_to_type[record_field], # type: ignore [index] # Any not subscriptable
|
197
|
+
record[record_field], # type: ignore [index] # Any not subscriptable
|
195
198
|
)
|
196
199
|
for record_field, record_value in schema_field_name_to_type.items()
|
197
200
|
}
|
@@ -12,7 +12,7 @@ from io import IOBase
|
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
|
13
13
|
from uuid import uuid4
|
14
14
|
|
15
|
-
|
15
|
+
import orjson
|
16
16
|
|
17
17
|
from airbyte_cdk.models import FailureType
|
18
18
|
from airbyte_cdk.sources.file_based.config.csv_format import (
|
@@ -117,7 +117,7 @@ class _CsvReader:
|
|
117
117
|
"""
|
118
118
|
# Note that this method assumes the dialect has already been registered if we're parsing the headers
|
119
119
|
if isinstance(config_format.header_definition, CsvHeaderUserProvided):
|
120
|
-
return config_format.header_definition.column_names
|
120
|
+
return config_format.header_definition.column_names
|
121
121
|
|
122
122
|
if isinstance(config_format.header_definition, CsvHeaderAutogenerated):
|
123
123
|
self._skip_rows(
|
@@ -229,7 +229,7 @@ class CsvParser(FileTypeParser):
|
|
229
229
|
if discovered_schema:
|
230
230
|
property_types = {
|
231
231
|
col: prop["type"] for col, prop in discovered_schema["properties"].items()
|
232
|
-
}
|
232
|
+
}
|
233
233
|
deduped_property_types = CsvParser._pre_propcess_property_types(property_types)
|
234
234
|
else:
|
235
235
|
deduped_property_types = {}
|
@@ -7,10 +7,10 @@ from io import IOBase
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
9
9
|
|
10
|
+
import orjson
|
10
11
|
import pandas as pd
|
11
12
|
from numpy import datetime64, issubdtype
|
12
13
|
from numpy import dtype as dtype_
|
13
|
-
from orjson import orjson
|
14
14
|
from pydantic.v1 import BaseModel
|
15
15
|
|
16
16
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
@@ -69,8 +69,11 @@ class ExcelParser(FileTypeParser):
|
|
69
69
|
df = self.open_and_parse_file(fp)
|
70
70
|
for column, df_type in df.dtypes.items():
|
71
71
|
# Choose the broadest data type if the column's data type differs in dataframes
|
72
|
-
prev_frame_column_type = fields.get(column)
|
73
|
-
fields[column] = self.dtype_to_json_type(
|
72
|
+
prev_frame_column_type = fields.get(column) # type: ignore [call-overload]
|
73
|
+
fields[column] = self.dtype_to_json_type( # type: ignore [index]
|
74
|
+
prev_frame_column_type,
|
75
|
+
df_type,
|
76
|
+
)
|
74
77
|
|
75
78
|
schema = {
|
76
79
|
field: (
|
@@ -136,7 +139,10 @@ class ExcelParser(FileTypeParser):
|
|
136
139
|
return FileReadMode.READ_BINARY
|
137
140
|
|
138
141
|
@staticmethod
|
139
|
-
def dtype_to_json_type(
|
142
|
+
def dtype_to_json_type(
|
143
|
+
current_type: Optional[str],
|
144
|
+
dtype: dtype_, # type: ignore [type-arg]
|
145
|
+
) -> str:
|
140
146
|
"""
|
141
147
|
Convert Pandas DataFrame types to Airbyte Types.
|
142
148
|
|
@@ -187,4 +193,4 @@ class ExcelParser(FileTypeParser):
|
|
187
193
|
Returns:
|
188
194
|
pd.DataFrame: Parsed data from the Excel file.
|
189
195
|
"""
|
190
|
-
return pd.ExcelFile(fp, engine="calamine").parse()
|
196
|
+
return pd.ExcelFile(fp, engine="calamine").parse() # type: ignore [arg-type, call-overload, no-any-return]
|
@@ -6,7 +6,7 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
8
8
|
|
9
|
-
|
9
|
+
import orjson
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
12
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
@@ -6,7 +6,7 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cache, cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
|
-
from
|
9
|
+
from typing_extensions import deprecated
|
10
10
|
|
11
11
|
from airbyte_cdk import AirbyteMessage
|
12
12
|
from airbyte_cdk.models import SyncMode
|
@@ -179,7 +179,7 @@ class AbstractFileBasedStream(Stream):
|
|
179
179
|
)
|
180
180
|
|
181
181
|
@cached_property
|
182
|
-
@deprecated(version
|
182
|
+
@deprecated("Deprecated as of CDK version 3.7.0.")
|
183
183
|
def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
|
184
184
|
return self._availability_strategy
|
185
185
|
|
@@ -7,7 +7,7 @@ import logging
|
|
7
7
|
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
|
-
from
|
10
|
+
from typing_extensions import deprecated
|
11
11
|
|
12
12
|
from airbyte_cdk.models import (
|
13
13
|
AirbyteLogMessage,
|
@@ -56,7 +56,10 @@ This module contains adapters to help enabling concurrency on File-based Stream
|
|
56
56
|
"""
|
57
57
|
|
58
58
|
|
59
|
-
@deprecated(
|
59
|
+
@deprecated(
|
60
|
+
"This class is experimental. Use at your own risk.",
|
61
|
+
category=ExperimentalClassWarning,
|
62
|
+
)
|
60
63
|
class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBasedStream):
|
61
64
|
@classmethod
|
62
65
|
def create_from_stream(
|
@@ -143,7 +146,7 @@ class FileBasedStreamFacade(AbstractStreamFacade[DefaultStream], AbstractFileBas
|
|
143
146
|
return self._legacy_stream.supports_incremental
|
144
147
|
|
145
148
|
@property
|
146
|
-
@deprecated(version
|
149
|
+
@deprecated("Deprecated as of CDK version 3.7.0.")
|
147
150
|
def availability_strategy(self) -> AbstractFileBasedAvailabilityStrategy:
|
148
151
|
return self._legacy_stream.availability_strategy
|
149
152
|
|
@@ -21,7 +21,7 @@ class DefaultFileBasedCursor(AbstractFileBasedCursor):
|
|
21
21
|
CURSOR_FIELD = "_ab_source_file_last_modified"
|
22
22
|
|
23
23
|
def __init__(self, stream_config: FileBasedStreamConfig, **_: Any):
|
24
|
-
super().__init__(stream_config)
|
24
|
+
super().__init__(stream_config) # type: ignore [safe-super]
|
25
25
|
self._file_to_datetime_history: MutableMapping[str, str] = {}
|
26
26
|
self._time_window_if_history_is_full = timedelta(
|
27
27
|
days=stream_config.days_to_sync_if_history_is_full
|
@@ -14,7 +14,7 @@ def format_http_message(
|
|
14
14
|
title: str,
|
15
15
|
description: str,
|
16
16
|
stream_name: Optional[str],
|
17
|
-
is_auxiliary: bool = None,
|
17
|
+
is_auxiliary: bool | None = None,
|
18
18
|
) -> LogMessage:
|
19
19
|
request = response.request
|
20
20
|
log_message = {
|
@@ -42,10 +42,10 @@ def format_http_message(
|
|
42
42
|
"url": {"full": request.url},
|
43
43
|
}
|
44
44
|
if is_auxiliary is not None:
|
45
|
-
log_message["http"]["is_auxiliary"] = is_auxiliary
|
45
|
+
log_message["http"]["is_auxiliary"] = is_auxiliary # type: ignore [index]
|
46
46
|
if stream_name:
|
47
47
|
log_message["airbyte_cdk"] = {"stream": {"name": stream_name}}
|
48
|
-
return log_message
|
48
|
+
return log_message # type: ignore [return-value] # got "dict[str, object]", expected "dict[str, JsonType]"
|
49
49
|
|
50
50
|
|
51
51
|
def _normalize_body_string(body_str: Optional[Union[str, bytes]]) -> Optional[str]:
|
@@ -5,7 +5,7 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
-
from
|
8
|
+
from typing_extensions import deprecated
|
9
9
|
|
10
10
|
from airbyte_cdk.models import AirbyteStream
|
11
11
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
@@ -14,7 +14,10 @@ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
15
|
|
16
16
|
|
17
|
-
@deprecated(
|
17
|
+
@deprecated(
|
18
|
+
"This class is experimental. Use at your own risk.",
|
19
|
+
category=ExperimentalClassWarning,
|
20
|
+
)
|
18
21
|
class AbstractStream(ABC):
|
19
22
|
"""
|
20
23
|
AbstractStream is an experimental interface for streams developed as part of the Concurrent CDK.
|
@@ -8,7 +8,7 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from
|
11
|
+
from typing_extensions import deprecated
|
12
12
|
|
13
13
|
from airbyte_cdk.models import (
|
14
14
|
AirbyteLogMessage,
|
@@ -50,7 +50,10 @@ This module contains adapters to help enabling concurrency on Stream objects wit
|
|
50
50
|
"""
|
51
51
|
|
52
52
|
|
53
|
-
@deprecated(
|
53
|
+
@deprecated(
|
54
|
+
"This class is experimental. Use at your own risk.",
|
55
|
+
category=ExperimentalClassWarning,
|
56
|
+
)
|
54
57
|
class StreamFacade(AbstractStreamFacade[DefaultStream], Stream):
|
55
58
|
"""
|
56
59
|
The StreamFacade is a Stream that wraps an AbstractStream and exposes it as a Stream.
|
@@ -297,7 +300,7 @@ class StreamPartition(Partition):
|
|
297
300
|
yield Record(
|
298
301
|
data=data_to_return,
|
299
302
|
stream_name=self.stream_name(),
|
300
|
-
associated_slice=self._slice,
|
303
|
+
associated_slice=self._slice, # type: ignore [arg-type]
|
301
304
|
)
|
302
305
|
else:
|
303
306
|
self._message_repository.emit_message(record_data)
|
@@ -6,7 +6,7 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from
|
9
|
+
from typing_extensions import deprecated
|
10
10
|
|
11
11
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
12
12
|
|
@@ -48,7 +48,10 @@ class StreamUnavailable(StreamAvailability):
|
|
48
48
|
STREAM_AVAILABLE = StreamAvailable()
|
49
49
|
|
50
50
|
|
51
|
-
@deprecated(
|
51
|
+
@deprecated(
|
52
|
+
"This class is experimental. Use at your own risk.",
|
53
|
+
category=ExperimentalClassWarning,
|
54
|
+
)
|
52
55
|
class AbstractAvailabilityStrategy(ABC):
|
53
56
|
"""
|
54
57
|
AbstractAvailabilityStrategy is an experimental interface developed as part of the Concurrent CDK.
|
@@ -68,7 +71,10 @@ class AbstractAvailabilityStrategy(ABC):
|
|
68
71
|
"""
|
69
72
|
|
70
73
|
|
71
|
-
@deprecated(
|
74
|
+
@deprecated(
|
75
|
+
"This class is experimental. Use at your own risk.",
|
76
|
+
category=ExperimentalClassWarning,
|
77
|
+
)
|
72
78
|
class AlwaysAvailableAvailabilityStrategy(AbstractAvailabilityStrategy):
|
73
79
|
"""
|
74
80
|
An availability strategy that always indicates a stream is available.
|
@@ -473,7 +473,7 @@ class ConcurrentCursor(Cursor):
|
|
473
473
|
:return: True if the record's cursor value falls within the sync boundaries
|
474
474
|
"""
|
475
475
|
try:
|
476
|
-
record_cursor_value: CursorValueType = self._extract_cursor_value(record)
|
476
|
+
record_cursor_value: CursorValueType = self._extract_cursor_value(record)
|
477
477
|
except ValueError:
|
478
478
|
self._log_for_record_without_cursor_value()
|
479
479
|
return True
|
@@ -141,7 +141,7 @@ class EpochValueConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
141
141
|
raise ValueError(
|
142
142
|
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
143
143
|
)
|
144
|
-
return dt_object
|
144
|
+
return dt_object
|
145
145
|
|
146
146
|
|
147
147
|
class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
@@ -178,7 +178,7 @@ class IsoMillisConcurrentStreamStateConverter(DateTimeStreamStateConverter):
|
|
178
178
|
raise ValueError(
|
179
179
|
f"DateTime object was expected but got {type(dt_object)} from pendulum.parse({timestamp})"
|
180
180
|
)
|
181
|
-
return dt_object
|
181
|
+
return dt_object
|
182
182
|
|
183
183
|
|
184
184
|
class CustomFormatConcurrentStreamStateConverter(IsoMillisConcurrentStreamStateConverter):
|
@@ -10,7 +10,7 @@ from dataclasses import dataclass
|
|
10
10
|
from functools import cached_property, lru_cache
|
11
11
|
from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
|
12
12
|
|
13
|
-
from
|
13
|
+
from typing_extensions import deprecated
|
14
14
|
|
15
15
|
import airbyte_cdk.sources.utils.casing as casing
|
16
16
|
from airbyte_cdk.models import (
|
@@ -92,8 +92,8 @@ class CheckpointMixin(ABC):
|
|
92
92
|
|
93
93
|
|
94
94
|
@deprecated(
|
95
|
-
version
|
96
|
-
|
95
|
+
"Deprecated as of CDK version 0.87.0. "
|
96
|
+
"Deprecated in favor of the `CheckpointMixin` which offers similar functionality."
|
97
97
|
)
|
98
98
|
class IncrementalMixin(CheckpointMixin, ABC):
|
99
99
|
"""Mixin to make stream incremental.
|
@@ -115,12 +115,6 @@ class StreamClassification:
|
|
115
115
|
has_multiple_slices: bool
|
116
116
|
|
117
117
|
|
118
|
-
# Moved to class declaration since get_updated_state is called on every record for incremental syncs, and thus the @deprecated decorator as well.
|
119
|
-
@deprecated(
|
120
|
-
version="0.1.49",
|
121
|
-
reason="Deprecated method get_updated_state, You should use explicit state property instead, see IncrementalMixin docs.",
|
122
|
-
action="ignore",
|
123
|
-
)
|
124
118
|
class Stream(ABC):
|
125
119
|
"""
|
126
120
|
Base abstract class for an Airbyte Stream. Makes no assumption of the Stream's underlying transport protocol.
|
@@ -222,7 +216,8 @@ class Stream(ABC):
|
|
222
216
|
# Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This
|
223
217
|
# should be fixed on the stream implementation, but we should also protect against this in the CDK as well
|
224
218
|
stream_state_tracker = self.get_updated_state(
|
225
|
-
stream_state_tracker,
|
219
|
+
stream_state_tracker,
|
220
|
+
record_data, # type: ignore [arg-type]
|
226
221
|
)
|
227
222
|
self._observe_state(checkpoint_reader, stream_state_tracker)
|
228
223
|
record_counter += 1
|
@@ -282,7 +277,7 @@ class Stream(ABC):
|
|
282
277
|
if state
|
283
278
|
else {}, # read() expects MutableMapping instead of Mapping which is used more often
|
284
279
|
state_manager=None,
|
285
|
-
internal_config=InternalConfig(),
|
280
|
+
internal_config=InternalConfig(), # type: ignore [call-arg]
|
286
281
|
)
|
287
282
|
|
288
283
|
@abstractmethod
|
@@ -322,7 +317,7 @@ class Stream(ABC):
|
|
322
317
|
# If we can offer incremental we always should. RFR is always less reliable than incremental which uses a real cursor value
|
323
318
|
if self.supports_incremental:
|
324
319
|
stream.source_defined_cursor = self.source_defined_cursor
|
325
|
-
stream.supported_sync_modes.append(SyncMode.incremental)
|
320
|
+
stream.supported_sync_modes.append(SyncMode.incremental)
|
326
321
|
stream.default_cursor_field = self._wrapped_cursor_field()
|
327
322
|
|
328
323
|
keys = Stream._wrapped_primary_key(self.primary_key)
|
@@ -436,10 +431,18 @@ class Stream(ABC):
|
|
436
431
|
"""
|
437
432
|
return None
|
438
433
|
|
434
|
+
# Commented-out to avoid any runtime penalty, since this is used in a hot per-record codepath.
|
435
|
+
# To be evaluated for re-introduction here: https://github.com/airbytehq/airbyte-python-cdk/issues/116
|
436
|
+
# @deprecated(
|
437
|
+
# "Deprecated method `get_updated_state` as of CDK version 0.1.49. "
|
438
|
+
# "Please use explicit state property instead, see `IncrementalMixin` docs."
|
439
|
+
# )
|
439
440
|
def get_updated_state(
|
440
441
|
self, current_stream_state: MutableMapping[str, Any], latest_record: Mapping[str, Any]
|
441
442
|
) -> MutableMapping[str, Any]:
|
442
|
-
"""
|
443
|
+
"""DEPRECATED. Please use explicit state property instead, see `IncrementalMixin` docs.
|
444
|
+
|
445
|
+
Override to extract state from the latest record. Needed to implement incremental sync.
|
443
446
|
|
444
447
|
Inspects the latest record extracted from the data source and the current state object and return an updated state object.
|
445
448
|
|
@@ -654,7 +657,7 @@ class Stream(ABC):
|
|
654
657
|
# todo: This can be consolidated into one ConnectorStateManager.update_and_create_state_message() method, but I want
|
655
658
|
# to reduce changes right now and this would span concurrent as well
|
656
659
|
state_manager.update_state_for_stream(self.name, self.namespace, stream_state)
|
657
|
-
return state_manager.create_state_message(self.name, self.namespace)
|
660
|
+
return state_manager.create_state_message(self.name, self.namespace) # type: ignore [no-any-return]
|
658
661
|
|
659
662
|
@property
|
660
663
|
def configured_json_schema(self) -> Optional[Dict[str, Any]]:
|
@@ -9,8 +9,8 @@ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optio
|
|
9
9
|
from urllib.parse import urljoin
|
10
10
|
|
11
11
|
import requests
|
12
|
-
from deprecated import deprecated
|
13
12
|
from requests.auth import AuthBase
|
13
|
+
from typing_extensions import deprecated
|
14
14
|
|
15
15
|
from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode
|
16
16
|
from airbyte_cdk.models import Type as MessageType
|
@@ -121,8 +121,8 @@ class HttpStream(Stream, CheckpointMixin, ABC):
|
|
121
121
|
|
122
122
|
@property
|
123
123
|
@deprecated(
|
124
|
-
version
|
125
|
-
|
124
|
+
"Deprecated as of CDK version 3.0.0. "
|
125
|
+
"You should set error_handler explicitly in HttpStream.get_error_handler() instead."
|
126
126
|
)
|
127
127
|
def raise_on_http_errors(self) -> bool:
|
128
128
|
"""
|
@@ -132,8 +132,8 @@ class HttpStream(Stream, CheckpointMixin, ABC):
|
|
132
132
|
|
133
133
|
@property
|
134
134
|
@deprecated(
|
135
|
-
version
|
136
|
-
|
135
|
+
"Deprecated as of CDK version 3.0.0. "
|
136
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
137
137
|
)
|
138
138
|
def max_retries(self) -> Union[int, None]:
|
139
139
|
"""
|
@@ -143,8 +143,8 @@ class HttpStream(Stream, CheckpointMixin, ABC):
|
|
143
143
|
|
144
144
|
@property
|
145
145
|
@deprecated(
|
146
|
-
version
|
147
|
-
|
146
|
+
"Deprecated as of CDK version 3.0.0. "
|
147
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
148
148
|
)
|
149
149
|
def max_time(self) -> Union[int, None]:
|
150
150
|
"""
|
@@ -154,8 +154,8 @@ class HttpStream(Stream, CheckpointMixin, ABC):
|
|
154
154
|
|
155
155
|
@property
|
156
156
|
@deprecated(
|
157
|
-
version
|
158
|
-
|
157
|
+
"Deprecated as of CDK version 3.0.0. "
|
158
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
159
159
|
)
|
160
160
|
def retry_factor(self) -> float:
|
161
161
|
"""
|
@@ -594,7 +594,7 @@ class HttpSubStream(HttpStream, ABC):
|
|
594
594
|
# Skip non-records (eg AirbyteLogMessage)
|
595
595
|
if isinstance(parent_record, AirbyteMessage):
|
596
596
|
if parent_record.type == MessageType.RECORD:
|
597
|
-
parent_record = parent_record.record.data
|
597
|
+
parent_record = parent_record.record.data # type: ignore [assignment, union-attr] # Incorrect type for assignment
|
598
598
|
else:
|
599
599
|
continue
|
600
600
|
elif isinstance(parent_record, Record):
|
@@ -603,8 +603,8 @@ class HttpSubStream(HttpStream, ABC):
|
|
603
603
|
|
604
604
|
|
605
605
|
@deprecated(
|
606
|
-
version
|
607
|
-
|
606
|
+
"Deprecated as of CDK version 3.0.0."
|
607
|
+
"You should set backoff_strategies explicitly in HttpStream.get_backoff_strategy() instead."
|
608
608
|
)
|
609
609
|
class HttpStreamAdapterBackoffStrategy(BackoffStrategy):
|
610
610
|
def __init__(self, stream: HttpStream):
|
@@ -619,8 +619,8 @@ class HttpStreamAdapterBackoffStrategy(BackoffStrategy):
|
|
619
619
|
|
620
620
|
|
621
621
|
@deprecated(
|
622
|
-
version
|
623
|
-
|
622
|
+
"Deprecated as of CDK version 3.0.0. "
|
623
|
+
"You should set error_handler explicitly in HttpStream.get_error_handler() instead."
|
624
624
|
)
|
625
625
|
class HttpStreamAdapterHttpStatusErrorHandler(HttpStatusErrorHandler):
|
626
626
|
def __init__(self, stream: HttpStream, **kwargs): # type: ignore # noqa
|
@@ -639,15 +639,15 @@ class HttpStreamAdapterHttpStatusErrorHandler(HttpStatusErrorHandler):
|
|
639
639
|
return ErrorResolution(
|
640
640
|
response_action=ResponseAction.RATE_LIMITED,
|
641
641
|
failure_type=FailureType.transient_error,
|
642
|
-
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
642
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
643
643
|
)
|
644
644
|
return ErrorResolution(
|
645
645
|
response_action=ResponseAction.RETRY,
|
646
646
|
failure_type=FailureType.transient_error,
|
647
|
-
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
647
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Retrying...",
|
648
648
|
)
|
649
649
|
else:
|
650
|
-
if response_or_exception.ok:
|
650
|
+
if response_or_exception.ok:
|
651
651
|
return ErrorResolution(
|
652
652
|
response_action=ResponseAction.SUCCESS,
|
653
653
|
failure_type=None,
|
@@ -657,13 +657,13 @@ class HttpStreamAdapterHttpStatusErrorHandler(HttpStatusErrorHandler):
|
|
657
657
|
return ErrorResolution(
|
658
658
|
response_action=ResponseAction.FAIL,
|
659
659
|
failure_type=FailureType.transient_error,
|
660
|
-
error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.",
|
660
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Unexpected error. Failed.",
|
661
661
|
)
|
662
662
|
else:
|
663
663
|
return ErrorResolution(
|
664
664
|
response_action=ResponseAction.IGNORE,
|
665
665
|
failure_type=FailureType.transient_error,
|
666
|
-
error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...",
|
666
|
+
error_message=f"Response status code: {response_or_exception.status_code}. Ignoring...",
|
667
667
|
)
|
668
668
|
else:
|
669
669
|
self._logger.error(f"Received unexpected response type: {type(response_or_exception)}")
|