airbyte-cdk 6.7.1.dev0__py3-none-any.whl → 6.7.1rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +1 -2
- airbyte_cdk/config_observation.py +1 -2
- airbyte_cdk/connector.py +0 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -2
- airbyte_cdk/destinations/destination.py +1 -2
- airbyte_cdk/destinations/vector_db_based/config.py +1 -2
- airbyte_cdk/destinations/vector_db_based/document_processor.py +3 -4
- airbyte_cdk/destinations/vector_db_based/embedder.py +4 -5
- airbyte_cdk/entrypoint.py +2 -3
- airbyte_cdk/logger.py +1 -2
- airbyte_cdk/models/__init__.py +0 -2
- airbyte_cdk/models/airbyte_protocol.py +1 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -2
- airbyte_cdk/sources/declarative/auth/jwt.py +0 -1
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +0 -1
- airbyte_cdk/sources/declarative/auth/token.py +1 -2
- airbyte_cdk/sources/declarative/auth/token_provider.py +2 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +8 -66
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -167
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +2 -3
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +0 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +0 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +0 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -1
- airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +48 -6
- airbyte_cdk/sources/declarative/extractors/record_selector.py +4 -32
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -5
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +3 -4
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -122
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -11
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +1 -2
- airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +3 -9
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +0 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +7 -12
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -7
- airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -1
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/embedded/tools.py +0 -1
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/config/avro_format.py +1 -2
- airbyte_cdk/sources/file_based/config/csv_format.py +1 -2
- airbyte_cdk/sources/file_based/config/excel_format.py +1 -2
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +1 -2
- airbyte_cdk/sources/file_based/config/jsonl_format.py +1 -2
- airbyte_cdk/sources/file_based/config/parquet_format.py +1 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +1 -2
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +1 -2
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +8 -9
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +1 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +4 -5
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +0 -1
- airbyte_cdk/sources/streams/call_rate.py +2 -1
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +1 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +4 -8
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +1 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +6 -30
- airbyte_cdk/sources/streams/concurrent/default_stream.py +0 -1
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +35 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +1 -2
- airbyte_cdk/sources/streams/http/http.py +2 -3
- airbyte_cdk/sources/streams/http/http_client.py +9 -48
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -1
- airbyte_cdk/sources/types.py +1 -14
- airbyte_cdk/sources/utils/schema_helpers.py +2 -3
- airbyte_cdk/sql/secrets.py +1 -2
- airbyte_cdk/sql/shared/sql_processor.py +6 -8
- airbyte_cdk/test/entrypoint_wrapper.py +3 -4
- airbyte_cdk/test/mock_http/mocker.py +0 -1
- airbyte_cdk/utils/schema_inferrer.py +1 -2
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +1 -2
- {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc2.dist-info}/METADATA +2 -9
- {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc2.dist-info}/RECORD +124 -123
- {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.7.1.dev0.dist-info → airbyte_cdk-6.7.1rc2.dist-info}/entry_points.txt +0 -0
@@ -8,8 +8,6 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from deprecated.classic import deprecated
|
12
|
-
|
13
11
|
from airbyte_cdk.models import (
|
14
12
|
AirbyteLogMessage,
|
15
13
|
AirbyteMessage,
|
@@ -39,10 +37,12 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
|
|
39
37
|
)
|
40
38
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
41
39
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
40
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
42
41
|
from airbyte_cdk.sources.streams.core import StreamData
|
43
|
-
from airbyte_cdk.sources.types import Record
|
44
42
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
45
43
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
44
|
+
from deprecated.classic import deprecated
|
45
|
+
|
46
46
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
47
47
|
|
48
48
|
"""
|
@@ -294,11 +294,7 @@ class StreamPartition(Partition):
|
|
294
294
|
self._stream.transformer.transform(
|
295
295
|
data_to_return, self._stream.get_json_schema()
|
296
296
|
)
|
297
|
-
yield Record(
|
298
|
-
data=data_to_return,
|
299
|
-
stream_name=self.stream_name(),
|
300
|
-
associated_slice=self._slice,
|
301
|
-
)
|
297
|
+
yield Record(data_to_return, self)
|
302
298
|
else:
|
303
299
|
self._message_repository.emit_message(record_data)
|
304
300
|
except Exception as e:
|
@@ -6,9 +6,8 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from deprecated.classic import deprecated
|
10
|
-
|
11
9
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
10
|
+
from deprecated.classic import deprecated
|
12
11
|
|
13
12
|
|
14
13
|
class StreamAvailability(ABC):
|
@@ -3,7 +3,6 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import functools
|
6
|
-
import logging
|
7
6
|
from abc import ABC, abstractmethod
|
8
7
|
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Protocol, Tuple
|
9
8
|
|
@@ -11,13 +10,12 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
11
10
|
from airbyte_cdk.sources.message import MessageRepository
|
12
11
|
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
13
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
15
|
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
16
16
|
AbstractStreamStateConverter,
|
17
17
|
)
|
18
|
-
from airbyte_cdk.sources.types import
|
19
|
-
|
20
|
-
LOGGER = logging.getLogger("airbyte")
|
18
|
+
from airbyte_cdk.sources.types import StreamSlice
|
21
19
|
|
22
20
|
|
23
21
|
def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
|
@@ -175,11 +173,9 @@ class ConcurrentCursor(Cursor):
|
|
175
173
|
self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
|
176
174
|
self._lookback_window = lookback_window
|
177
175
|
self._slice_range = slice_range
|
178
|
-
self._most_recent_cursor_value_per_partition: MutableMapping[
|
176
|
+
self._most_recent_cursor_value_per_partition: MutableMapping[Partition, Any] = {}
|
179
177
|
self._has_closed_at_least_one_slice = False
|
180
178
|
self._cursor_granularity = cursor_granularity
|
181
|
-
# Flag to track if the logger has been triggered (per stream)
|
182
|
-
self._should_be_synced_logger_triggered = False
|
183
179
|
|
184
180
|
@property
|
185
181
|
def state(self) -> MutableMapping[str, Any]:
|
@@ -214,12 +210,12 @@ class ConcurrentCursor(Cursor):
|
|
214
210
|
|
215
211
|
def observe(self, record: Record) -> None:
|
216
212
|
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
217
|
-
record.
|
213
|
+
record.partition
|
218
214
|
)
|
219
215
|
cursor_value = self._extract_cursor_value(record)
|
220
216
|
|
221
217
|
if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
|
222
|
-
self._most_recent_cursor_value_per_partition[record.
|
218
|
+
self._most_recent_cursor_value_per_partition[record.partition] = cursor_value
|
223
219
|
|
224
220
|
def _extract_cursor_value(self, record: Record) -> Any:
|
225
221
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
@@ -235,9 +231,7 @@ class ConcurrentCursor(Cursor):
|
|
235
231
|
self._has_closed_at_least_one_slice = True
|
236
232
|
|
237
233
|
def _add_slice_to_state(self, partition: Partition) -> None:
|
238
|
-
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
239
|
-
partition.to_slice()
|
240
|
-
)
|
234
|
+
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(partition)
|
241
235
|
|
242
236
|
if self._slice_boundary_fields:
|
243
237
|
if "slices" not in self.state:
|
@@ -448,21 +442,3 @@ class ConcurrentCursor(Cursor):
|
|
448
442
|
return lower + step
|
449
443
|
except OverflowError:
|
450
444
|
return self._end_provider()
|
451
|
-
|
452
|
-
def should_be_synced(self, record: Record) -> bool:
|
453
|
-
"""
|
454
|
-
Determines if a record should be synced based on its cursor value.
|
455
|
-
:param record: The record to evaluate
|
456
|
-
|
457
|
-
:return: True if the record's cursor value falls within the sync boundaries
|
458
|
-
"""
|
459
|
-
try:
|
460
|
-
record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
461
|
-
except ValueError:
|
462
|
-
if not self._should_be_synced_logger_triggered:
|
463
|
-
LOGGER.warning(
|
464
|
-
f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record. The incremental sync will assume it needs to be synced"
|
465
|
-
)
|
466
|
-
self._should_be_synced_logger_triggered = True
|
467
|
-
return True
|
468
|
-
return self.start <= record_cursor_value <= self._end_provider()
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING, Any, Mapping
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
9
|
+
|
10
|
+
|
11
|
+
class Record:
|
12
|
+
"""
|
13
|
+
Represents a record read from a stream.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
data: Mapping[str, Any],
|
19
|
+
partition: "Partition",
|
20
|
+
is_file_transfer_message: bool = False,
|
21
|
+
):
|
22
|
+
self.data = data
|
23
|
+
self.partition = partition
|
24
|
+
self.is_file_transfer_message = is_file_transfer_message
|
25
|
+
|
26
|
+
def __eq__(self, other: Any) -> bool:
|
27
|
+
if not isinstance(other, Record):
|
28
|
+
return False
|
29
|
+
return (
|
30
|
+
self.data == other.data
|
31
|
+
and self.partition.stream_name() == other.partition.stream_name()
|
32
|
+
)
|
33
|
+
|
34
|
+
def __repr__(self) -> str:
|
35
|
+
return f"Record(data={self.data}, stream_name={self.partition.stream_name()})"
|
@@ -8,7 +8,7 @@ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentin
|
|
8
8
|
PartitionGenerationCompletedSentinel,
|
9
9
|
)
|
10
10
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
11
|
-
from airbyte_cdk.sources.
|
11
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
12
12
|
|
13
13
|
|
14
14
|
class PartitionCompleteSentinel:
|
@@ -7,7 +7,6 @@ from datetime import datetime, timedelta, timezone
|
|
7
7
|
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
import pendulum
|
10
|
-
from pendulum.datetime import DateTime
|
11
10
|
|
12
11
|
# FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
|
13
12
|
# the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
|
@@ -17,6 +16,7 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
|
|
17
16
|
AbstractStreamStateConverter,
|
18
17
|
ConcurrencyCompatibleStateType,
|
19
18
|
)
|
19
|
+
from pendulum.datetime import DateTime
|
20
20
|
|
21
21
|
|
22
22
|
class DateTimeStreamStateConverter(AbstractStreamStateConverter):
|
@@ -10,8 +10,6 @@ from dataclasses import dataclass
|
|
10
10
|
from functools import cached_property, lru_cache
|
11
11
|
from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
|
12
12
|
|
13
|
-
from deprecated import deprecated
|
14
|
-
|
15
13
|
import airbyte_cdk.sources.utils.casing as casing
|
16
14
|
from airbyte_cdk.models import (
|
17
15
|
AirbyteMessage,
|
@@ -37,6 +35,7 @@ from airbyte_cdk.sources.types import StreamSlice
|
|
37
35
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
|
38
36
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
39
37
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
38
|
+
from deprecated import deprecated
|
40
39
|
|
41
40
|
# A stream's read method can return one of the following types:
|
42
41
|
# Mapping[str, Any]: The content of an AirbyteRecordMessage
|
@@ -4,13 +4,12 @@
|
|
4
4
|
|
5
5
|
from typing import Mapping, Type, Union
|
6
6
|
|
7
|
-
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
8
|
-
|
9
7
|
from airbyte_cdk.models import FailureType
|
10
8
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
11
9
|
ErrorResolution,
|
12
10
|
ResponseAction,
|
13
11
|
)
|
12
|
+
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
14
13
|
|
15
14
|
DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution] = {
|
16
15
|
InvalidSchema: ErrorResolution(
|
@@ -5,10 +5,9 @@ from enum import Enum
|
|
5
5
|
from typing import Optional, Union
|
6
6
|
|
7
7
|
import requests
|
8
|
-
from requests import HTTPError
|
9
|
-
|
10
8
|
from airbyte_cdk.models import FailureType
|
11
9
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
10
|
+
from requests import HTTPError
|
12
11
|
|
13
12
|
|
14
13
|
class ResponseAction(Enum):
|
@@ -9,9 +9,6 @@ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optio
|
|
9
9
|
from urllib.parse import urljoin
|
10
10
|
|
11
11
|
import requests
|
12
|
-
from deprecated import deprecated
|
13
|
-
from requests.auth import AuthBase
|
14
|
-
|
15
12
|
from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode
|
16
13
|
from airbyte_cdk.models import Type as MessageType
|
17
14
|
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
|
@@ -36,6 +33,8 @@ from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
|
36
33
|
from airbyte_cdk.sources.streams.http.http_client import HttpClient
|
37
34
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
38
35
|
from airbyte_cdk.sources.utils.types import JsonType
|
36
|
+
from deprecated import deprecated
|
37
|
+
from requests.auth import AuthBase
|
39
38
|
|
40
39
|
# list of all possible HTTP methods which can be used for sending of request bodies
|
41
40
|
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|
@@ -11,8 +11,6 @@ from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
|
|
11
11
|
import orjson
|
12
12
|
import requests
|
13
13
|
import requests_cache
|
14
|
-
from requests.auth import AuthBase
|
15
|
-
|
16
14
|
from airbyte_cdk.models import (
|
17
15
|
AirbyteMessageSerializer,
|
18
16
|
AirbyteStreamStatus,
|
@@ -45,13 +43,13 @@ from airbyte_cdk.sources.streams.http.rate_limiting import (
|
|
45
43
|
rate_limit_default_backoff_handler,
|
46
44
|
user_defined_backoff_handler,
|
47
45
|
)
|
48
|
-
from airbyte_cdk.sources.utils.types import JsonType
|
49
46
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
50
47
|
from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
|
51
48
|
from airbyte_cdk.utils.stream_status_utils import (
|
52
49
|
as_airbyte_message as stream_status_as_airbyte_message,
|
53
50
|
)
|
54
51
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
52
|
+
from requests.auth import AuthBase
|
55
53
|
|
56
54
|
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|
57
55
|
|
@@ -76,7 +74,6 @@ class MessageRepresentationAirbyteTracedErrors(AirbyteTracedException):
|
|
76
74
|
class HttpClient:
|
77
75
|
_DEFAULT_MAX_RETRY: int = 5
|
78
76
|
_DEFAULT_MAX_TIME: int = 60 * 10
|
79
|
-
_ACTIONS_TO_RETRY_ON = {ResponseAction.RETRY, ResponseAction.RATE_LIMITED}
|
80
77
|
|
81
78
|
def __init__(
|
82
79
|
self,
|
@@ -94,9 +91,11 @@ class HttpClient:
|
|
94
91
|
):
|
95
92
|
self._name = name
|
96
93
|
self._api_budget: APIBudget = api_budget or APIBudget(policies=[])
|
94
|
+
self._is_session_owner = False
|
97
95
|
if session:
|
98
96
|
self._session = session
|
99
97
|
else:
|
98
|
+
self._is_session_owner = True
|
100
99
|
self._use_cache = use_cache
|
101
100
|
self._session = self._request_session()
|
102
101
|
self._session.mount(
|
@@ -121,6 +120,10 @@ class HttpClient:
|
|
121
120
|
self._disable_retries = disable_retries
|
122
121
|
self._message_repository = message_repository
|
123
122
|
|
123
|
+
def __del__(self):
|
124
|
+
if self._is_session_owner:
|
125
|
+
self._session.close()
|
126
|
+
|
124
127
|
@property
|
125
128
|
def cache_filename(self) -> str:
|
126
129
|
"""
|
@@ -337,40 +340,6 @@ class HttpClient:
|
|
337
340
|
|
338
341
|
return response # type: ignore # will either return a valid response of type requests.Response or raise an exception
|
339
342
|
|
340
|
-
def _get_response_body(self, response: requests.Response) -> Optional[JsonType]:
|
341
|
-
"""
|
342
|
-
Extracts and returns the body of an HTTP response.
|
343
|
-
|
344
|
-
This method attempts to parse the response body as JSON. If the response
|
345
|
-
body is not valid JSON, it falls back to decoding the response content
|
346
|
-
as a UTF-8 string. If both attempts fail, it returns None.
|
347
|
-
|
348
|
-
Args:
|
349
|
-
response (requests.Response): The HTTP response object.
|
350
|
-
|
351
|
-
Returns:
|
352
|
-
Optional[JsonType]: The parsed JSON object as a string, the decoded
|
353
|
-
response content as a string, or None if both parsing attempts fail.
|
354
|
-
"""
|
355
|
-
try:
|
356
|
-
return str(response.json())
|
357
|
-
except requests.exceptions.JSONDecodeError:
|
358
|
-
try:
|
359
|
-
return response.content.decode("utf-8")
|
360
|
-
except Exception:
|
361
|
-
return "The Content of the Response couldn't be decoded."
|
362
|
-
|
363
|
-
def _evict_key(self, prepared_request: requests.PreparedRequest) -> None:
|
364
|
-
"""
|
365
|
-
Addresses high memory consumption when enabling concurrency in https://github.com/airbytehq/oncall/issues/6821.
|
366
|
-
|
367
|
-
The `_request_attempt_count` attribute keeps growing as multiple requests are made using the same `http_client`.
|
368
|
-
To mitigate this issue, we evict keys for completed requests once we confirm that no further retries are needed.
|
369
|
-
This helps manage memory usage more efficiently while maintaining the necessary logic for retry attempts.
|
370
|
-
"""
|
371
|
-
if prepared_request in self._request_attempt_count:
|
372
|
-
del self._request_attempt_count[prepared_request]
|
373
|
-
|
374
343
|
def _handle_error_resolution(
|
375
344
|
self,
|
376
345
|
response: Optional[requests.Response],
|
@@ -379,9 +348,6 @@ class HttpClient:
|
|
379
348
|
error_resolution: ErrorResolution,
|
380
349
|
exit_on_rate_limit: Optional[bool] = False,
|
381
350
|
) -> None:
|
382
|
-
if error_resolution.response_action not in self._ACTIONS_TO_RETRY_ON:
|
383
|
-
self._evict_key(request)
|
384
|
-
|
385
351
|
# Emit stream status RUNNING with the reason RATE_LIMITED to log that the rate limit has been reached
|
386
352
|
if error_resolution.response_action == ResponseAction.RATE_LIMITED:
|
387
353
|
# TODO: Update to handle with message repository when concurrent message repository is ready
|
@@ -402,18 +368,13 @@ class HttpClient:
|
|
402
368
|
|
403
369
|
if error_resolution.response_action == ResponseAction.FAIL:
|
404
370
|
if response is not None:
|
405
|
-
|
406
|
-
f"Request (body): '{str(request.body)}'. Response (body): '{self._get_response_body(response)}'. Response (headers): '{response.headers}'."
|
407
|
-
)
|
408
|
-
error_message = f"'{request.method}' request to '{request.url}' failed with status code '{response.status_code}' and error message: '{self._error_message_parser.parse_response_error_message(response)}'. {filtered_response_message}"
|
371
|
+
error_message = f"'{request.method}' request to '{request.url}' failed with status code '{response.status_code}' and error message '{response.content}'"
|
409
372
|
else:
|
410
373
|
error_message = (
|
411
374
|
f"'{request.method}' request to '{request.url}' failed with exception: '{exc}'"
|
412
375
|
)
|
413
376
|
|
414
|
-
|
415
|
-
self._logger.error(error_message)
|
416
|
-
|
377
|
+
self._logger.warning(filter_secrets(error_message))
|
417
378
|
raise MessageRepresentationAirbyteTracedErrors(
|
418
379
|
internal_message=error_message,
|
419
380
|
message=error_resolution.error_message or error_message,
|
@@ -10,13 +10,12 @@ from typing import Any, List, Mapping, MutableMapping, Optional, Tuple, Union
|
|
10
10
|
import backoff
|
11
11
|
import pendulum
|
12
12
|
import requests
|
13
|
-
from requests.auth import AuthBase
|
14
|
-
|
15
13
|
from airbyte_cdk.models import FailureType, Level
|
16
14
|
from airbyte_cdk.sources.http_logger import format_http_message
|
17
15
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
18
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
19
17
|
from airbyte_cdk.utils.airbyte_secrets_utils import add_to_secrets
|
18
|
+
from requests.auth import AuthBase
|
20
19
|
|
21
20
|
from ..exceptions import DefaultBackoffException
|
22
21
|
|
airbyte_cdk/sources/types.py
CHANGED
@@ -6,8 +6,6 @@ from __future__ import annotations
|
|
6
6
|
|
7
7
|
from typing import Any, ItemsView, Iterator, KeysView, List, Mapping, Optional, ValuesView
|
8
8
|
|
9
|
-
import orjson
|
10
|
-
|
11
9
|
# A FieldPointer designates a path to a field inside a mapping. For example, retrieving ["k1", "k1.2"] in the object {"k1" :{"k1.2":
|
12
10
|
# "hello"}] returns "hello"
|
13
11
|
FieldPointer = List[str]
|
@@ -17,17 +15,9 @@ StreamState = Mapping[str, Any]
|
|
17
15
|
|
18
16
|
|
19
17
|
class Record(Mapping[str, Any]):
|
20
|
-
def __init__(
|
21
|
-
self,
|
22
|
-
data: Mapping[str, Any],
|
23
|
-
stream_name: str,
|
24
|
-
associated_slice: Optional[StreamSlice] = None,
|
25
|
-
is_file_transfer_message: bool = False,
|
26
|
-
):
|
18
|
+
def __init__(self, data: Mapping[str, Any], associated_slice: Optional[StreamSlice]):
|
27
19
|
self._data = data
|
28
20
|
self._associated_slice = associated_slice
|
29
|
-
self.stream_name = stream_name
|
30
|
-
self.is_file_transfer_message = is_file_transfer_message
|
31
21
|
|
32
22
|
@property
|
33
23
|
def data(self) -> Mapping[str, Any]:
|
@@ -149,6 +139,3 @@ class StreamSlice(Mapping[str, Any]):
|
|
149
139
|
|
150
140
|
def __json_serializable__(self) -> Any:
|
151
141
|
return self._stream_slice
|
152
|
-
|
153
|
-
def __hash__(self) -> int:
|
154
|
-
return hash(orjson.dumps(self._stream_slice, option=orjson.OPT_SORT_KEYS))
|
@@ -10,13 +10,12 @@ import pkgutil
|
|
10
10
|
from typing import Any, ClassVar, Dict, List, Mapping, MutableMapping, Optional, Tuple
|
11
11
|
|
12
12
|
import jsonref
|
13
|
+
from airbyte_cdk.models import ConnectorSpecification, FailureType
|
14
|
+
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
13
15
|
from jsonschema import RefResolver, validate
|
14
16
|
from jsonschema.exceptions import ValidationError
|
15
17
|
from pydantic.v1 import BaseModel, Field
|
16
18
|
|
17
|
-
from airbyte_cdk.models import ConnectorSpecification, FailureType
|
18
|
-
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
19
|
-
|
20
19
|
|
21
20
|
class JsonFileLoader:
|
22
21
|
"""
|
airbyte_cdk/sql/secrets.py
CHANGED
@@ -6,9 +6,8 @@ from __future__ import annotations
|
|
6
6
|
import json
|
7
7
|
from typing import TYPE_CHECKING, Any
|
8
8
|
|
9
|
-
from pydantic_core import CoreSchema, core_schema
|
10
|
-
|
11
9
|
from airbyte_cdk.sql import exceptions as exc
|
10
|
+
from pydantic_core import CoreSchema, core_schema
|
12
11
|
|
13
12
|
if TYPE_CHECKING:
|
14
13
|
from pydantic import GetCoreSchemaHandler, GetJsonSchemaHandler, ValidationInfo
|
@@ -13,12 +13,6 @@ from typing import TYPE_CHECKING, Any, final
|
|
13
13
|
import pandas as pd
|
14
14
|
import sqlalchemy
|
15
15
|
import ulid
|
16
|
-
from airbyte_protocol_dataclasses.models import AirbyteStateMessage
|
17
|
-
from pandas import Index
|
18
|
-
from pydantic import BaseModel, Field
|
19
|
-
from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
|
20
|
-
from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
|
21
|
-
|
22
16
|
from airbyte_cdk.sql import exceptions as exc
|
23
17
|
from airbyte_cdk.sql._util.hashing import one_way_hash
|
24
18
|
from airbyte_cdk.sql._util.name_normalizers import LowerCaseNormalizer
|
@@ -30,10 +24,16 @@ from airbyte_cdk.sql.constants import (
|
|
30
24
|
)
|
31
25
|
from airbyte_cdk.sql.secrets import SecretString
|
32
26
|
from airbyte_cdk.sql.types import SQLTypeConverter
|
27
|
+
from airbyte_protocol_dataclasses.models import AirbyteStateMessage
|
28
|
+
from pandas import Index
|
29
|
+
from pydantic import BaseModel, Field
|
30
|
+
from sqlalchemy import Column, Table, and_, create_engine, insert, null, select, text, update
|
31
|
+
from sqlalchemy.exc import ProgrammingError, SQLAlchemyError
|
33
32
|
|
34
33
|
if TYPE_CHECKING:
|
35
34
|
from collections.abc import Generator
|
36
35
|
|
36
|
+
from airbyte_cdk.sql.shared.catalog_providers import CatalogProvider
|
37
37
|
from sqlalchemy.engine import Connection, Engine
|
38
38
|
from sqlalchemy.engine.cursor import CursorResult
|
39
39
|
from sqlalchemy.engine.reflection import Inspector
|
@@ -41,8 +41,6 @@ if TYPE_CHECKING:
|
|
41
41
|
from sqlalchemy.sql.elements import TextClause
|
42
42
|
from sqlalchemy.sql.type_api import TypeEngine
|
43
43
|
|
44
|
-
from airbyte_cdk.sql.shared.catalog_providers import CatalogProvider
|
45
|
-
|
46
44
|
|
47
45
|
class SQLRuntimeError(Exception):
|
48
46
|
"""Raised when an SQL operation fails."""
|
@@ -23,10 +23,6 @@ from io import StringIO
|
|
23
23
|
from pathlib import Path
|
24
24
|
from typing import Any, List, Mapping, Optional, Union
|
25
25
|
|
26
|
-
from orjson import orjson
|
27
|
-
from pydantic import ValidationError as V2ValidationError
|
28
|
-
from serpyco_rs import SchemaValidationError
|
29
|
-
|
30
26
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
31
27
|
from airbyte_cdk.exception_handler import assemble_uncaught_exception
|
32
28
|
from airbyte_cdk.logger import AirbyteLogFormatter
|
@@ -44,6 +40,9 @@ from airbyte_cdk.models import (
|
|
44
40
|
Type,
|
45
41
|
)
|
46
42
|
from airbyte_cdk.sources import Source
|
43
|
+
from orjson import orjson
|
44
|
+
from pydantic import ValidationError as V2ValidationError
|
45
|
+
from serpyco_rs import SchemaValidationError
|
47
46
|
|
48
47
|
|
49
48
|
class EntrypointOutput:
|
@@ -5,12 +5,11 @@
|
|
5
5
|
from collections import defaultdict
|
6
6
|
from typing import Any, Dict, List, Mapping, Optional
|
7
7
|
|
8
|
+
from airbyte_cdk.models import AirbyteRecordMessage
|
8
9
|
from genson import SchemaBuilder, SchemaNode
|
9
10
|
from genson.schema.strategies.object import Object
|
10
11
|
from genson.schema.strategies.scalar import Number
|
11
12
|
|
12
|
-
from airbyte_cdk.models import AirbyteRecordMessage
|
13
|
-
|
14
13
|
# schema keywords
|
15
14
|
_TYPE = "type"
|
16
15
|
_NULL_TYPE = "null"
|
@@ -5,8 +5,6 @@ import time
|
|
5
5
|
import traceback
|
6
6
|
from typing import Optional
|
7
7
|
|
8
|
-
from orjson import orjson
|
9
|
-
|
10
8
|
from airbyte_cdk.models import (
|
11
9
|
AirbyteConnectionStatus,
|
12
10
|
AirbyteErrorTraceMessage,
|
@@ -20,6 +18,7 @@ from airbyte_cdk.models import (
|
|
20
18
|
)
|
21
19
|
from airbyte_cdk.models import Type as MessageType
|
22
20
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
21
|
+
from orjson import orjson
|
23
22
|
|
24
23
|
|
25
24
|
class AirbyteTracedException(Exception):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: airbyte-cdk
|
3
|
-
Version: 6.7.
|
3
|
+
Version: 6.7.1rc2
|
4
4
|
Summary: A framework for writing Airbyte Connectors.
|
5
5
|
Home-page: https://airbyte.com
|
6
6
|
License: MIT
|
@@ -25,7 +25,7 @@ Requires-Dist: Deprecated (>=1.2,<1.3)
|
|
25
25
|
Requires-Dist: Jinja2 (>=3.1.2,<3.2.0)
|
26
26
|
Requires-Dist: PyYAML (>=6.0.1,<7.0.0)
|
27
27
|
Requires-Dist: Sphinx (>=4.2,<4.3) ; extra == "sphinx-docs"
|
28
|
-
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.
|
28
|
+
Requires-Dist: airbyte-protocol-models-dataclasses (>=0.13,<0.14)
|
29
29
|
Requires-Dist: avro (>=1.11.2,<1.12.0) ; extra == "file-based"
|
30
30
|
Requires-Dist: backoff
|
31
31
|
Requires-Dist: cachetools
|
@@ -170,13 +170,6 @@ Installing all extras is required to run the full suite of unit tests.
|
|
170
170
|
|
171
171
|
To see all available scripts, run `poetry run poe`.
|
172
172
|
|
173
|
-
#### Formatting the code
|
174
|
-
|
175
|
-
- Iterate on the CDK code locally
|
176
|
-
- Run `poetry run ruff format` to format your changes.
|
177
|
-
|
178
|
-
To see all available `ruff` options, run `poetry run ruff`.
|
179
|
-
|
180
173
|
##### Autogenerated files
|
181
174
|
|
182
175
|
Low-code CDK models are generated from `sources/declarative/declarative_component_schema.yaml`. If
|