airbyte-cdk 6.7.1rc3__py3-none-any.whl → 6.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +2 -1
- airbyte_cdk/config_observation.py +2 -1
- airbyte_cdk/connector.py +1 -0
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +2 -1
- airbyte_cdk/destinations/destination.py +2 -1
- airbyte_cdk/destinations/vector_db_based/config.py +2 -1
- airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
- airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
- airbyte_cdk/entrypoint.py +3 -2
- airbyte_cdk/logger.py +2 -1
- airbyte_cdk/models/__init__.py +2 -0
- airbyte_cdk/models/airbyte_protocol.py +2 -1
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +2 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
- airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
- airbyte_cdk/sources/declarative/auth/token.py +2 -1
- airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +6 -4
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +196 -0
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
- airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
- airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
- airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +7 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
- airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +144 -0
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +41 -4
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -2
- airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
- airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -4
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
- airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
- airbyte_cdk/sources/embedded/tools.py +1 -0
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
- airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
- airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
- airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
- airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
- airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
- airbyte_cdk/sources/file_based/file_based_source.py +2 -1
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +1 -0
- airbyte_cdk/sources/streams/call_rate.py +1 -2
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
- airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
- airbyte_cdk/sources/streams/concurrent/cursor.py +30 -6
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +2 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
- airbyte_cdk/sources/streams/http/http.py +3 -2
- airbyte_cdk/sources/streams/http/http_client.py +49 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
- airbyte_cdk/sources/types.py +14 -1
- airbyte_cdk/sources/utils/schema_helpers.py +3 -2
- airbyte_cdk/sql/secrets.py +2 -1
- airbyte_cdk/sql/shared/sql_processor.py +8 -6
- airbyte_cdk/test/entrypoint_wrapper.py +4 -3
- airbyte_cdk/test/mock_http/mocker.py +1 -0
- airbyte_cdk/utils/schema_inferrer.py +2 -1
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +2 -1
- {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/METADATA +9 -2
- {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/RECORD +122 -123
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
- {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.7.1rc3.dist-info → airbyte_cdk-6.7.2.dist-info}/entry_points.txt +0 -0
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
|
|
6
6
|
from typing import Any, List, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
import requests
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.declarative.requesters.error_handlers.default_http_response_filter import (
|
10
11
|
DefaultHttpResponseFilter,
|
11
12
|
)
|
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
|
|
6
6
|
from typing import Any, Mapping, Optional, Set, Union
|
7
7
|
|
8
8
|
import requests
|
9
|
+
|
9
10
|
from airbyte_cdk.models import FailureType
|
10
11
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
11
12
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
@@ -6,6 +6,8 @@ from datetime import timedelta
|
|
6
6
|
from typing import Any, Dict, Iterable, Mapping, Optional
|
7
7
|
|
8
8
|
import requests
|
9
|
+
from requests import Response
|
10
|
+
|
9
11
|
from airbyte_cdk import AirbyteMessage
|
10
12
|
from airbyte_cdk.logger import lazy_log
|
11
13
|
from airbyte_cdk.models import FailureType, Type
|
@@ -23,7 +25,6 @@ from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
|
23
25
|
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
|
24
26
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
25
27
|
from airbyte_cdk.utils import AirbyteTracedException
|
26
|
-
from requests import Response
|
27
28
|
|
28
29
|
LOGGER = logging.getLogger("airbyte")
|
29
30
|
|
@@ -41,7 +42,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
|
|
41
42
|
|
42
43
|
job_timeout: Optional[timedelta] = None
|
43
44
|
record_extractor: RecordExtractor = field(
|
44
|
-
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor()
|
45
|
+
init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
|
45
46
|
)
|
46
47
|
|
47
48
|
def __post_init__(self) -> None:
|
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
|
|
6
6
|
from typing import Any, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
8
|
import requests
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
|
10
11
|
from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
|
11
12
|
|
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
|
|
6
6
|
from typing import Any, Mapping, Optional, Union
|
7
7
|
|
8
8
|
import requests
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
10
11
|
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
|
11
12
|
PaginationStrategy,
|
@@ -6,10 +6,12 @@ from abc import ABC, abstractmethod
|
|
6
6
|
from typing import Any, Optional
|
7
7
|
|
8
8
|
import requests
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
10
11
|
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
|
11
12
|
PaginationStrategy,
|
12
13
|
)
|
14
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor
|
13
15
|
from airbyte_cdk.sources.types import Record
|
14
16
|
|
15
17
|
|
@@ -25,7 +27,11 @@ class PaginationStopCondition(ABC):
|
|
25
27
|
|
26
28
|
|
27
29
|
class CursorStopCondition(PaginationStopCondition):
|
28
|
-
def __init__(
|
30
|
+
def __init__(
|
31
|
+
self,
|
32
|
+
cursor: DeclarativeCursor
|
33
|
+
| ConcurrentCursor, # migrate to use both old and concurrent versions
|
34
|
+
):
|
29
35
|
self._cursor = cursor
|
30
36
|
|
31
37
|
def is_met(self, record: Record) -> bool:
|
@@ -46,8 +52,8 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
|
|
46
52
|
return None
|
47
53
|
return self._delegate.next_page_token(response, last_page_size, last_record)
|
48
54
|
|
49
|
-
def reset(self) -> None:
|
50
|
-
self._delegate.reset()
|
55
|
+
def reset(self, reset_value: Optional[Any] = None) -> None:
|
56
|
+
self._delegate.reset(reset_value)
|
51
57
|
|
52
58
|
def get_page_size(self) -> Optional[int]:
|
53
59
|
return self._delegate.get_page_size()
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py
CHANGED
@@ -5,6 +5,8 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
6
|
from typing import Any, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
|
+
from deprecated import deprecated
|
9
|
+
|
8
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
|
9
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
|
10
12
|
InterpolatedNestedRequestInputProvider,
|
@@ -17,7 +19,6 @@ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_
|
|
17
19
|
)
|
18
20
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
19
21
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
20
|
-
from deprecated import deprecated
|
21
22
|
|
22
23
|
RequestInput = Union[str, Mapping[str, str]]
|
23
24
|
ValidRequestTypes = (str, list)
|
@@ -7,6 +7,7 @@ from enum import Enum
|
|
7
7
|
from typing import Any, Callable, Mapping, MutableMapping, Optional, Union
|
8
8
|
|
9
9
|
import requests
|
10
|
+
|
10
11
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
11
12
|
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
|
12
13
|
RequestOptionsProvider,
|
@@ -4,6 +4,8 @@
|
|
4
4
|
from dataclasses import InitVar, dataclass, field
|
5
5
|
from typing import Any, Callable, Iterable, Mapping, Optional
|
6
6
|
|
7
|
+
from deprecated.classic import deprecated
|
8
|
+
|
7
9
|
from airbyte_cdk.models import FailureType
|
8
10
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
9
11
|
AsyncJobOrchestrator,
|
@@ -17,7 +19,6 @@ from airbyte_cdk.sources.source import ExperimentalClassWarning
|
|
17
19
|
from airbyte_cdk.sources.streams.core import StreamData
|
18
20
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
19
21
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
20
|
-
from deprecated.classic import deprecated
|
21
22
|
|
22
23
|
|
23
24
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
@@ -20,6 +20,7 @@ from typing import (
|
|
20
20
|
)
|
21
21
|
|
22
22
|
import requests
|
23
|
+
|
23
24
|
from airbyte_cdk.models import AirbyteMessage
|
24
25
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
25
26
|
from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
|
@@ -467,8 +468,9 @@ class SimpleRetriever(Retriever):
|
|
467
468
|
else:
|
468
469
|
return None
|
469
470
|
|
470
|
-
|
471
|
-
|
471
|
+
def _extract_record(
|
472
|
+
self, stream_data: StreamData, stream_slice: StreamSlice
|
473
|
+
) -> Optional[Record]:
|
472
474
|
"""
|
473
475
|
As we allow the output of _read_pages to be StreamData, it can be multiple things. Therefore, we need to filter out and normalize
|
474
476
|
to data to streamline the rest of the process.
|
@@ -477,9 +479,15 @@ class SimpleRetriever(Retriever):
|
|
477
479
|
# Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData`
|
478
480
|
return stream_data
|
479
481
|
elif isinstance(stream_data, (dict, Mapping)):
|
480
|
-
return Record(
|
482
|
+
return Record(
|
483
|
+
data=dict(stream_data), associated_slice=stream_slice, stream_name=self.name
|
484
|
+
)
|
481
485
|
elif isinstance(stream_data, AirbyteMessage) and stream_data.record:
|
482
|
-
return Record(
|
486
|
+
return Record(
|
487
|
+
data=stream_data.record.data, # type:ignore # AirbyteMessage always has record.data
|
488
|
+
associated_slice=stream_slice,
|
489
|
+
stream_name=self.name,
|
490
|
+
)
|
483
491
|
return None
|
484
492
|
|
485
493
|
# stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever
|
@@ -1,14 +1,13 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
|
-
from typing import
|
3
|
+
from typing import Any, Callable, Iterable, Mapping, Optional
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
6
6
|
from airbyte_cdk.sources.message import MessageRepository
|
7
7
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
8
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
9
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
10
9
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
11
|
-
from airbyte_cdk.sources.types import StreamSlice
|
10
|
+
from airbyte_cdk.sources.types import Record, StreamSlice
|
12
11
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
13
12
|
|
14
13
|
|
@@ -59,7 +58,11 @@ class DeclarativePartition(Partition):
|
|
59
58
|
def read(self) -> Iterable[Record]:
|
60
59
|
for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
|
61
60
|
if isinstance(stream_data, Mapping):
|
62
|
-
yield Record(
|
61
|
+
yield Record(
|
62
|
+
data=stream_data,
|
63
|
+
stream_name=self.stream_name(),
|
64
|
+
associated_slice=self._stream_slice,
|
65
|
+
)
|
63
66
|
else:
|
64
67
|
self._message_repository.emit_message(stream_data)
|
65
68
|
|
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
|
|
6
6
|
from typing import Any, Dict, List, Mapping, Optional, Type, Union
|
7
7
|
|
8
8
|
import dpath
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
10
11
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
11
12
|
from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
|
@@ -7,6 +7,7 @@ from typing import Any, Dict, List, Mapping, Optional
|
|
7
7
|
|
8
8
|
import dpath
|
9
9
|
import dpath.exceptions
|
10
|
+
|
10
11
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
11
12
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
13
|
from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
|
@@ -6,6 +6,7 @@ import pkgutil
|
|
6
6
|
from typing import Any, List, Mapping, Optional
|
7
7
|
|
8
8
|
import yaml
|
9
|
+
|
9
10
|
from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
|
10
11
|
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
11
12
|
ConcurrentDeclarativeSource,
|
@@ -7,10 +7,11 @@ from abc import abstractmethod
|
|
7
7
|
from typing import Any, Dict, List, Literal, Optional, Union
|
8
8
|
|
9
9
|
import dpath
|
10
|
+
from pydantic.v1 import AnyUrl, BaseModel, Field
|
11
|
+
|
10
12
|
from airbyte_cdk import OneOfOptionConfig
|
11
13
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
14
|
from airbyte_cdk.sources.utils import schema_helpers
|
13
|
-
from pydantic.v1 import AnyUrl, BaseModel, Field
|
14
15
|
|
15
16
|
|
16
17
|
class DeliverRecords(BaseModel):
|
@@ -3,9 +3,10 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
7
6
|
from pydantic.v1 import BaseModel, Field
|
8
7
|
|
8
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
9
|
+
|
9
10
|
|
10
11
|
class AvroFormat(BaseModel):
|
11
12
|
class Config(OneOfOptionConfig):
|
@@ -6,10 +6,11 @@ import codecs
|
|
6
6
|
from enum import Enum
|
7
7
|
from typing import Any, Dict, List, Optional, Set, Union
|
8
8
|
|
9
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
10
9
|
from pydantic.v1 import BaseModel, Field, root_validator, validator
|
11
10
|
from pydantic.v1.error_wrappers import ValidationError
|
12
11
|
|
12
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
13
|
+
|
13
14
|
|
14
15
|
class InferenceType(Enum):
|
15
16
|
NONE = "None"
|
@@ -2,9 +2,10 @@
|
|
2
2
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
6
5
|
from pydantic.v1 import BaseModel, Field
|
7
6
|
|
7
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
+
|
8
9
|
|
9
10
|
class ExcelFormat(BaseModel):
|
10
11
|
class Config(OneOfOptionConfig):
|
@@ -5,6 +5,8 @@
|
|
5
5
|
from enum import Enum
|
6
6
|
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
|
+
from pydantic.v1 import BaseModel, Field, validator
|
9
|
+
|
8
10
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
9
11
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
10
12
|
from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
|
@@ -13,7 +15,6 @@ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
|
13
15
|
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
14
16
|
from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
|
15
17
|
from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
|
16
|
-
from pydantic.v1 import BaseModel, Field, validator
|
17
18
|
|
18
19
|
PrimaryKeyType = Optional[Union[str, List[str]]]
|
19
20
|
|
@@ -2,9 +2,10 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
6
5
|
from pydantic.v1 import BaseModel, Field
|
7
6
|
|
7
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
+
|
8
9
|
|
9
10
|
class JsonlFormat(BaseModel):
|
10
11
|
class Config(OneOfOptionConfig):
|
@@ -3,9 +3,10 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
|
6
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
7
6
|
from pydantic.v1 import BaseModel, Field
|
8
7
|
|
8
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
9
|
+
|
9
10
|
|
10
11
|
class ParquetFormat(BaseModel):
|
11
12
|
class Config(OneOfOptionConfig):
|
@@ -4,9 +4,10 @@
|
|
4
4
|
|
5
5
|
from typing import List, Literal, Optional, Union
|
6
6
|
|
7
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
7
|
from pydantic.v1 import BaseModel, Field
|
9
8
|
|
9
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
10
|
+
|
10
11
|
|
11
12
|
class LocalProcessingConfigModel(BaseModel):
|
12
13
|
mode: Literal["local"] = Field("local", const=True)
|
@@ -8,6 +8,8 @@ from abc import ABC
|
|
8
8
|
from collections import Counter
|
9
9
|
from typing import Any, Iterator, List, Mapping, Optional, Tuple, Type, Union
|
10
10
|
|
11
|
+
from pydantic.v1.error_wrappers import ValidationError
|
12
|
+
|
11
13
|
from airbyte_cdk.logger import AirbyteLogFormatter, init_logger
|
12
14
|
from airbyte_cdk.models import (
|
13
15
|
AirbyteMessage,
|
@@ -60,7 +62,6 @@ from airbyte_cdk.sources.streams import Stream
|
|
60
62
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
61
63
|
from airbyte_cdk.utils.analytics_message import create_analytics_message
|
62
64
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
63
|
-
from pydantic.v1.error_wrappers import ValidationError
|
64
65
|
|
65
66
|
DEFAULT_CONCURRENCY = 100
|
66
67
|
MAX_CONCURRENCY = 100
|
@@ -10,9 +10,10 @@ from io import IOBase
|
|
10
10
|
from os import makedirs, path
|
11
11
|
from typing import Any, Dict, Iterable, List, Optional, Set
|
12
12
|
|
13
|
+
from wcmatch.glob import GLOBSTAR, globmatch
|
14
|
+
|
13
15
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
14
16
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
15
|
-
from wcmatch.glob import GLOBSTAR, globmatch
|
16
17
|
|
17
18
|
|
18
19
|
class FileReadMode(Enum):
|
@@ -6,6 +6,7 @@ import logging
|
|
6
6
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
import fastavro
|
9
|
+
|
9
10
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
10
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
11
12
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
@@ -12,6 +12,8 @@ from io import IOBase
|
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
|
13
13
|
from uuid import uuid4
|
14
14
|
|
15
|
+
from orjson import orjson
|
16
|
+
|
15
17
|
from airbyte_cdk.models import FailureType
|
16
18
|
from airbyte_cdk.sources.file_based.config.csv_format import (
|
17
19
|
CsvFormat,
|
@@ -29,7 +31,6 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP
|
|
29
31
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
30
32
|
from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
|
31
33
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
32
|
-
from orjson import orjson
|
33
34
|
|
34
35
|
DIALECT_NAME = "_config_dialect"
|
35
36
|
|
@@ -8,6 +8,11 @@ from pathlib import Path
|
|
8
8
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
9
9
|
|
10
10
|
import pandas as pd
|
11
|
+
from numpy import datetime64, issubdtype
|
12
|
+
from numpy import dtype as dtype_
|
13
|
+
from orjson import orjson
|
14
|
+
from pydantic.v1 import BaseModel
|
15
|
+
|
11
16
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
12
17
|
ExcelFormat,
|
13
18
|
FileBasedStreamConfig,
|
@@ -24,11 +29,6 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
|
24
29
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
25
30
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
26
31
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
27
|
-
from numpy import datetime64
|
28
|
-
from numpy import dtype as dtype_
|
29
|
-
from numpy import issubdtype
|
30
|
-
from orjson import orjson
|
31
|
-
from pydantic.v1 import BaseModel
|
32
32
|
|
33
33
|
|
34
34
|
class ExcelParser(FileTypeParser):
|
@@ -6,6 +6,8 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
8
8
|
|
9
|
+
from orjson import orjson
|
10
|
+
|
9
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
10
12
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
11
13
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
@@ -19,7 +21,6 @@ from airbyte_cdk.sources.file_based.schema_helpers import (
|
|
19
21
|
SchemaType,
|
20
22
|
merge_schemas,
|
21
23
|
)
|
22
|
-
from orjson import orjson
|
23
24
|
|
24
25
|
|
25
26
|
class JsonlParser(FileTypeParser):
|
@@ -10,6 +10,8 @@ from urllib.parse import unquote
|
|
10
10
|
|
11
11
|
import pyarrow as pa
|
12
12
|
import pyarrow.parquet as pq
|
13
|
+
from pyarrow import DictionaryArray, Scalar
|
14
|
+
|
13
15
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
14
16
|
FileBasedStreamConfig,
|
15
17
|
ParquetFormat,
|
@@ -26,7 +28,6 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
|
26
28
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
27
29
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
28
30
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
29
|
-
from pyarrow import DictionaryArray, Scalar
|
30
31
|
|
31
32
|
|
32
33
|
class ParquetParser(FileTypeParser):
|
@@ -9,7 +9,16 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
|
|
9
9
|
|
10
10
|
import backoff
|
11
11
|
import dpath
|
12
|
+
import nltk
|
12
13
|
import requests
|
14
|
+
from unstructured.file_utils.filetype import (
|
15
|
+
EXT_TO_FILETYPE,
|
16
|
+
FILETYPE_TO_MIMETYPE,
|
17
|
+
STR_TO_FILETYPE,
|
18
|
+
FileType,
|
19
|
+
detect_filetype,
|
20
|
+
)
|
21
|
+
|
13
22
|
from airbyte_cdk.models import FailureType
|
14
23
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
15
24
|
from airbyte_cdk.sources.file_based.config.unstructured_format import (
|
@@ -28,14 +37,6 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
28
37
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
29
38
|
from airbyte_cdk.utils import is_cloud_environment
|
30
39
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
31
|
-
from unstructured.file_utils.filetype import (
|
32
|
-
EXT_TO_FILETYPE,
|
33
|
-
FILETYPE_TO_MIMETYPE,
|
34
|
-
STR_TO_FILETYPE,
|
35
|
-
FileType,
|
36
|
-
detect_filetype,
|
37
|
-
)
|
38
|
-
import nltk
|
39
40
|
|
40
41
|
unstructured_partition_pdf = None
|
41
42
|
unstructured_partition_docx = None
|
@@ -6,6 +6,8 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cache, cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
|
+
from deprecated import deprecated
|
10
|
+
|
9
11
|
from airbyte_cdk import AirbyteMessage
|
10
12
|
from airbyte_cdk.models import SyncMode
|
11
13
|
from airbyte_cdk.sources.file_based.availability_strategy import (
|
@@ -30,7 +32,6 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
30
32
|
from airbyte_cdk.sources.file_based.types import StreamSlice
|
31
33
|
from airbyte_cdk.sources.streams import Stream
|
32
34
|
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
33
|
-
from deprecated import deprecated
|
34
35
|
|
35
36
|
|
36
37
|
class AbstractFileBasedStream(Stream):
|
@@ -7,6 +7,8 @@ import logging
|
|
7
7
|
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
|
+
from deprecated.classic import deprecated
|
11
|
+
|
10
12
|
from airbyte_cdk.models import (
|
11
13
|
AirbyteLogMessage,
|
12
14
|
AirbyteMessage,
|
@@ -39,11 +41,10 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
|
|
39
41
|
)
|
40
42
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
41
43
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
42
|
-
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
43
44
|
from airbyte_cdk.sources.streams.core import StreamData
|
45
|
+
from airbyte_cdk.sources.types import Record
|
44
46
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
45
47
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
46
|
-
from deprecated.classic import deprecated
|
47
48
|
|
48
49
|
if TYPE_CHECKING:
|
49
50
|
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
@@ -247,7 +248,7 @@ class FileBasedStreamPartition(Partition):
|
|
247
248
|
self._stream.transformer.transform(
|
248
249
|
data_to_return, self._stream.get_json_schema()
|
249
250
|
)
|
250
|
-
yield Record(data_to_return, self)
|
251
|
+
yield Record(data=data_to_return, stream_name=self.stream_name())
|
251
252
|
elif (
|
252
253
|
isinstance(record_data, AirbyteMessage)
|
253
254
|
and record_data.type == Type.RECORD
|
@@ -265,7 +266,7 @@ class FileBasedStreamPartition(Partition):
|
|
265
266
|
else:
|
266
267
|
yield Record(
|
267
268
|
data=record_message_data,
|
268
|
-
|
269
|
+
stream_name=self.stream_name(),
|
269
270
|
is_file_transfer_message=self._use_file_transfer(),
|
270
271
|
)
|
271
272
|
else:
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py
CHANGED
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
12
12
|
from airbyte_cdk.sources.file_based.types import StreamState
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
|
-
from airbyte_cdk.sources.
|
15
|
+
from airbyte_cdk.sources.types import Record
|
16
16
|
|
17
17
|
if TYPE_CHECKING:
|
18
18
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -19,7 +19,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
|
|
19
19
|
from airbyte_cdk.sources.message.repository import MessageRepository
|
20
20
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
21
21
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
22
|
-
from airbyte_cdk.sources.
|
22
|
+
from airbyte_cdk.sources.types import Record
|
23
23
|
|
24
24
|
if TYPE_CHECKING:
|
25
25
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|