airbyte-cdk 6.7.1__py3-none-any.whl → 6.7.1rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +1 -2
- airbyte_cdk/config_observation.py +1 -2
- airbyte_cdk/connector.py +0 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -2
- airbyte_cdk/destinations/destination.py +1 -2
- airbyte_cdk/destinations/vector_db_based/config.py +1 -2
- airbyte_cdk/destinations/vector_db_based/document_processor.py +3 -4
- airbyte_cdk/destinations/vector_db_based/embedder.py +4 -5
- airbyte_cdk/entrypoint.py +2 -3
- airbyte_cdk/logger.py +1 -2
- airbyte_cdk/models/__init__.py +0 -2
- airbyte_cdk/models/airbyte_protocol.py +1 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -2
- airbyte_cdk/sources/declarative/auth/jwt.py +0 -1
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +0 -1
- airbyte_cdk/sources/declarative/auth/token.py +1 -2
- airbyte_cdk/sources/declarative/auth/token_provider.py +2 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +4 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +0 -167
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +2 -3
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +0 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +0 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +0 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -1
- airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +48 -6
- airbyte_cdk/sources/declarative/extractors/record_selector.py +4 -32
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -5
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +3 -4
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +0 -122
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +1 -2
- airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +3 -9
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +0 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +7 -12
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -7
- airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -1
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/embedded/tools.py +0 -1
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/config/avro_format.py +1 -2
- airbyte_cdk/sources/file_based/config/csv_format.py +1 -2
- airbyte_cdk/sources/file_based/config/excel_format.py +1 -2
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +1 -2
- airbyte_cdk/sources/file_based/config/jsonl_format.py +1 -2
- airbyte_cdk/sources/file_based/config/parquet_format.py +1 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +1 -2
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +1 -2
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +8 -9
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +1 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +4 -5
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +0 -1
- airbyte_cdk/sources/streams/call_rate.py +2 -1
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +1 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +4 -8
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +1 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +6 -30
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +35 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +1 -2
- airbyte_cdk/sources/streams/http/http.py +2 -3
- airbyte_cdk/sources/streams/http/http_client.py +8 -49
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -1
- airbyte_cdk/sources/types.py +1 -14
- airbyte_cdk/sources/utils/schema_helpers.py +2 -3
- airbyte_cdk/sql/secrets.py +1 -2
- airbyte_cdk/sql/shared/sql_processor.py +6 -8
- airbyte_cdk/test/entrypoint_wrapper.py +3 -4
- airbyte_cdk/test/mock_http/mocker.py +0 -1
- airbyte_cdk/utils/schema_inferrer.py +1 -2
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +1 -2
- {airbyte_cdk-6.7.1.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/METADATA +2 -9
- {airbyte_cdk-6.7.1.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/RECORD +123 -122
- {airbyte_cdk-6.7.1.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.1.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.7.1.dist-info → airbyte_cdk-6.7.1rc1.dist-info}/entry_points.txt +0 -0
@@ -6,12 +6,10 @@ from abc import ABC, abstractmethod
|
|
6
6
|
from typing import Any, Optional
|
7
7
|
|
8
8
|
import requests
|
9
|
-
|
10
9
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
11
10
|
from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
|
12
11
|
PaginationStrategy,
|
13
12
|
)
|
14
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor
|
15
13
|
from airbyte_cdk.sources.types import Record
|
16
14
|
|
17
15
|
|
@@ -27,11 +25,7 @@ class PaginationStopCondition(ABC):
|
|
27
25
|
|
28
26
|
|
29
27
|
class CursorStopCondition(PaginationStopCondition):
|
30
|
-
def __init__(
|
31
|
-
self,
|
32
|
-
cursor: DeclarativeCursor
|
33
|
-
| ConcurrentCursor, # migrate to use both old and concurrent versions
|
34
|
-
):
|
28
|
+
def __init__(self, cursor: DeclarativeCursor):
|
35
29
|
self._cursor = cursor
|
36
30
|
|
37
31
|
def is_met(self, record: Record) -> bool:
|
@@ -52,8 +46,8 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
|
|
52
46
|
return None
|
53
47
|
return self._delegate.next_page_token(response, last_page_size, last_record)
|
54
48
|
|
55
|
-
def reset(self
|
56
|
-
self._delegate.reset(
|
49
|
+
def reset(self) -> None:
|
50
|
+
self._delegate.reset()
|
57
51
|
|
58
52
|
def get_page_size(self) -> Optional[int]:
|
59
53
|
return self._delegate.get_page_size()
|
airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py
CHANGED
@@ -5,8 +5,6 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass, field
|
6
6
|
from typing import Any, Mapping, MutableMapping, Optional, Union
|
7
7
|
|
8
|
-
from deprecated import deprecated
|
9
|
-
|
10
8
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
|
11
9
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
|
12
10
|
InterpolatedNestedRequestInputProvider,
|
@@ -19,6 +17,7 @@ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_
|
|
19
17
|
)
|
20
18
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
21
19
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
20
|
+
from deprecated import deprecated
|
22
21
|
|
23
22
|
RequestInput = Union[str, Mapping[str, str]]
|
24
23
|
ValidRequestTypes = (str, list)
|
@@ -7,7 +7,6 @@ from enum import Enum
|
|
7
7
|
from typing import Any, Callable, Mapping, MutableMapping, Optional, Union
|
8
8
|
|
9
9
|
import requests
|
10
|
-
|
11
10
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
12
11
|
from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
|
13
12
|
RequestOptionsProvider,
|
@@ -4,8 +4,6 @@
|
|
4
4
|
from dataclasses import InitVar, dataclass, field
|
5
5
|
from typing import Any, Callable, Iterable, Mapping, Optional
|
6
6
|
|
7
|
-
from deprecated.classic import deprecated
|
8
|
-
|
9
7
|
from airbyte_cdk.models import FailureType
|
10
8
|
from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
|
11
9
|
AsyncJobOrchestrator,
|
@@ -19,6 +17,7 @@ from airbyte_cdk.sources.source import ExperimentalClassWarning
|
|
19
17
|
from airbyte_cdk.sources.streams.core import StreamData
|
20
18
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
21
19
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
20
|
+
from deprecated.classic import deprecated
|
22
21
|
|
23
22
|
|
24
23
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
@@ -20,7 +20,6 @@ from typing import (
|
|
20
20
|
)
|
21
21
|
|
22
22
|
import requests
|
23
|
-
|
24
23
|
from airbyte_cdk.models import AirbyteMessage
|
25
24
|
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
26
25
|
from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
|
@@ -361,6 +360,9 @@ class SimpleRetriever(Retriever):
|
|
361
360
|
next_page_token = self._next_page_token(response)
|
362
361
|
if not next_page_token:
|
363
362
|
pagination_complete = True
|
363
|
+
# Closing the response to avoid memory issues. Note that this assumes the caller as completely consumed the response before
|
364
|
+
# iterating on another one
|
365
|
+
response.close()
|
364
366
|
|
365
367
|
# Always return an empty generator just in case no records were ever yielded
|
366
368
|
yield from []
|
@@ -468,9 +470,8 @@ class SimpleRetriever(Retriever):
|
|
468
470
|
else:
|
469
471
|
return None
|
470
472
|
|
471
|
-
|
472
|
-
|
473
|
-
) -> Optional[Record]:
|
473
|
+
@staticmethod
|
474
|
+
def _extract_record(stream_data: StreamData, stream_slice: StreamSlice) -> Optional[Record]:
|
474
475
|
"""
|
475
476
|
As we allow the output of _read_pages to be StreamData, it can be multiple things. Therefore, we need to filter out and normalize
|
476
477
|
to data to streamline the rest of the process.
|
@@ -479,15 +480,9 @@ class SimpleRetriever(Retriever):
|
|
479
480
|
# Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData`
|
480
481
|
return stream_data
|
481
482
|
elif isinstance(stream_data, (dict, Mapping)):
|
482
|
-
return Record(
|
483
|
-
data=dict(stream_data), associated_slice=stream_slice, stream_name=self.name
|
484
|
-
)
|
483
|
+
return Record(dict(stream_data), stream_slice)
|
485
484
|
elif isinstance(stream_data, AirbyteMessage) and stream_data.record:
|
486
|
-
return Record(
|
487
|
-
data=stream_data.record.data, # type:ignore # AirbyteMessage always has record.data
|
488
|
-
associated_slice=stream_slice,
|
489
|
-
stream_name=self.name,
|
490
|
-
)
|
485
|
+
return Record(stream_data.record.data, stream_slice)
|
491
486
|
return None
|
492
487
|
|
493
488
|
# stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever
|
@@ -1,13 +1,14 @@
|
|
1
1
|
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
2
|
|
3
|
-
from typing import
|
3
|
+
from typing import Iterable, Optional, Mapping, Any, Callable
|
4
4
|
|
5
5
|
from airbyte_cdk.sources.declarative.retrievers import Retriever
|
6
6
|
from airbyte_cdk.sources.message import MessageRepository
|
7
7
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
8
8
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
9
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
9
10
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
10
|
-
from airbyte_cdk.sources.types import
|
11
|
+
from airbyte_cdk.sources.types import StreamSlice
|
11
12
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
12
13
|
|
13
14
|
|
@@ -58,11 +59,7 @@ class DeclarativePartition(Partition):
|
|
58
59
|
def read(self) -> Iterable[Record]:
|
59
60
|
for stream_data in self._retriever.read_records(self._json_schema, self._stream_slice):
|
60
61
|
if isinstance(stream_data, Mapping):
|
61
|
-
yield Record(
|
62
|
-
data=stream_data,
|
63
|
-
stream_name=self.stream_name(),
|
64
|
-
associated_slice=self._stream_slice,
|
65
|
-
)
|
62
|
+
yield Record(stream_data, self)
|
66
63
|
else:
|
67
64
|
self._message_repository.emit_message(stream_data)
|
68
65
|
|
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass, field
|
|
6
6
|
from typing import Any, Dict, List, Mapping, Optional, Type, Union
|
7
7
|
|
8
8
|
import dpath
|
9
|
-
|
10
9
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
11
10
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
12
11
|
from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
|
@@ -7,7 +7,6 @@ from typing import Any, Dict, List, Mapping, Optional
|
|
7
7
|
|
8
8
|
import dpath
|
9
9
|
import dpath.exceptions
|
10
|
-
|
11
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
12
11
|
from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
13
12
|
from airbyte_cdk.sources.types import Config, FieldPointer, StreamSlice, StreamState
|
@@ -6,7 +6,6 @@ import pkgutil
|
|
6
6
|
from typing import Any, List, Mapping, Optional
|
7
7
|
|
8
8
|
import yaml
|
9
|
-
|
10
9
|
from airbyte_cdk.models import AirbyteStateMessage, ConfiguredAirbyteCatalog
|
11
10
|
from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
12
11
|
ConcurrentDeclarativeSource,
|
@@ -7,11 +7,10 @@ from abc import abstractmethod
|
|
7
7
|
from typing import Any, Dict, List, Literal, Optional, Union
|
8
8
|
|
9
9
|
import dpath
|
10
|
-
from pydantic.v1 import AnyUrl, BaseModel, Field
|
11
|
-
|
12
10
|
from airbyte_cdk import OneOfOptionConfig
|
13
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
14
12
|
from airbyte_cdk.sources.utils import schema_helpers
|
13
|
+
from pydantic.v1 import AnyUrl, BaseModel, Field
|
15
14
|
|
16
15
|
|
17
16
|
class DeliverRecords(BaseModel):
|
@@ -6,11 +6,10 @@ import codecs
|
|
6
6
|
from enum import Enum
|
7
7
|
from typing import Any, Dict, List, Optional, Set, Union
|
8
8
|
|
9
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
9
10
|
from pydantic.v1 import BaseModel, Field, root_validator, validator
|
10
11
|
from pydantic.v1.error_wrappers import ValidationError
|
11
12
|
|
12
|
-
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
13
|
-
|
14
13
|
|
15
14
|
class InferenceType(Enum):
|
16
15
|
NONE = "None"
|
@@ -5,8 +5,6 @@
|
|
5
5
|
from enum import Enum
|
6
6
|
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
|
-
from pydantic.v1 import BaseModel, Field, validator
|
9
|
-
|
10
8
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
11
9
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
12
10
|
from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
|
@@ -15,6 +13,7 @@ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
|
15
13
|
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
16
14
|
from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
|
17
15
|
from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
|
16
|
+
from pydantic.v1 import BaseModel, Field, validator
|
18
17
|
|
19
18
|
PrimaryKeyType = Optional[Union[str, List[str]]]
|
20
19
|
|
@@ -4,9 +4,8 @@
|
|
4
4
|
|
5
5
|
from typing import List, Literal, Optional, Union
|
6
6
|
|
7
|
-
from pydantic.v1 import BaseModel, Field
|
8
|
-
|
9
7
|
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
+
from pydantic.v1 import BaseModel, Field
|
10
9
|
|
11
10
|
|
12
11
|
class LocalProcessingConfigModel(BaseModel):
|
@@ -8,8 +8,6 @@ from abc import ABC
|
|
8
8
|
from collections import Counter
|
9
9
|
from typing import Any, Iterator, List, Mapping, Optional, Tuple, Type, Union
|
10
10
|
|
11
|
-
from pydantic.v1.error_wrappers import ValidationError
|
12
|
-
|
13
11
|
from airbyte_cdk.logger import AirbyteLogFormatter, init_logger
|
14
12
|
from airbyte_cdk.models import (
|
15
13
|
AirbyteMessage,
|
@@ -62,6 +60,7 @@ from airbyte_cdk.sources.streams import Stream
|
|
62
60
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
63
61
|
from airbyte_cdk.utils.analytics_message import create_analytics_message
|
64
62
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
63
|
+
from pydantic.v1.error_wrappers import ValidationError
|
65
64
|
|
66
65
|
DEFAULT_CONCURRENCY = 100
|
67
66
|
MAX_CONCURRENCY = 100
|
@@ -10,10 +10,9 @@ from io import IOBase
|
|
10
10
|
from os import makedirs, path
|
11
11
|
from typing import Any, Dict, Iterable, List, Optional, Set
|
12
12
|
|
13
|
-
from wcmatch.glob import GLOBSTAR, globmatch
|
14
|
-
|
15
13
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
16
14
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
15
|
+
from wcmatch.glob import GLOBSTAR, globmatch
|
17
16
|
|
18
17
|
|
19
18
|
class FileReadMode(Enum):
|
@@ -6,7 +6,6 @@ import logging
|
|
6
6
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
import fastavro
|
9
|
-
|
10
9
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
11
10
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
11
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
@@ -12,8 +12,6 @@ from io import IOBase
|
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
|
13
13
|
from uuid import uuid4
|
14
14
|
|
15
|
-
from orjson import orjson
|
16
|
-
|
17
15
|
from airbyte_cdk.models import FailureType
|
18
16
|
from airbyte_cdk.sources.file_based.config.csv_format import (
|
19
17
|
CsvFormat,
|
@@ -31,6 +29,7 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP
|
|
31
29
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
32
30
|
from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
|
33
31
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
32
|
+
from orjson import orjson
|
34
33
|
|
35
34
|
DIALECT_NAME = "_config_dialect"
|
36
35
|
|
@@ -8,11 +8,6 @@ from pathlib import Path
|
|
8
8
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
9
9
|
|
10
10
|
import pandas as pd
|
11
|
-
from numpy import datetime64, issubdtype
|
12
|
-
from numpy import dtype as dtype_
|
13
|
-
from orjson import orjson
|
14
|
-
from pydantic.v1 import BaseModel
|
15
|
-
|
16
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
17
12
|
ExcelFormat,
|
18
13
|
FileBasedStreamConfig,
|
@@ -29,6 +24,11 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
|
29
24
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
30
25
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
31
26
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
27
|
+
from numpy import datetime64
|
28
|
+
from numpy import dtype as dtype_
|
29
|
+
from numpy import issubdtype
|
30
|
+
from orjson import orjson
|
31
|
+
from pydantic.v1 import BaseModel
|
32
32
|
|
33
33
|
|
34
34
|
class ExcelParser(FileTypeParser):
|
@@ -6,8 +6,6 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
8
8
|
|
9
|
-
from orjson import orjson
|
10
|
-
|
11
9
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
10
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
13
11
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
@@ -21,6 +19,7 @@ from airbyte_cdk.sources.file_based.schema_helpers import (
|
|
21
19
|
SchemaType,
|
22
20
|
merge_schemas,
|
23
21
|
)
|
22
|
+
from orjson import orjson
|
24
23
|
|
25
24
|
|
26
25
|
class JsonlParser(FileTypeParser):
|
@@ -10,8 +10,6 @@ from urllib.parse import unquote
|
|
10
10
|
|
11
11
|
import pyarrow as pa
|
12
12
|
import pyarrow.parquet as pq
|
13
|
-
from pyarrow import DictionaryArray, Scalar
|
14
|
-
|
15
13
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
16
14
|
FileBasedStreamConfig,
|
17
15
|
ParquetFormat,
|
@@ -28,6 +26,7 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
|
28
26
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
29
27
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
30
28
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
29
|
+
from pyarrow import DictionaryArray, Scalar
|
31
30
|
|
32
31
|
|
33
32
|
class ParquetParser(FileTypeParser):
|
@@ -9,16 +9,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
|
|
9
9
|
|
10
10
|
import backoff
|
11
11
|
import dpath
|
12
|
-
import nltk
|
13
12
|
import requests
|
14
|
-
from unstructured.file_utils.filetype import (
|
15
|
-
EXT_TO_FILETYPE,
|
16
|
-
FILETYPE_TO_MIMETYPE,
|
17
|
-
STR_TO_FILETYPE,
|
18
|
-
FileType,
|
19
|
-
detect_filetype,
|
20
|
-
)
|
21
|
-
|
22
13
|
from airbyte_cdk.models import FailureType
|
23
14
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
24
15
|
from airbyte_cdk.sources.file_based.config.unstructured_format import (
|
@@ -37,6 +28,14 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
37
28
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
38
29
|
from airbyte_cdk.utils import is_cloud_environment
|
39
30
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
31
|
+
from unstructured.file_utils.filetype import (
|
32
|
+
EXT_TO_FILETYPE,
|
33
|
+
FILETYPE_TO_MIMETYPE,
|
34
|
+
STR_TO_FILETYPE,
|
35
|
+
FileType,
|
36
|
+
detect_filetype,
|
37
|
+
)
|
38
|
+
import nltk
|
40
39
|
|
41
40
|
unstructured_partition_pdf = None
|
42
41
|
unstructured_partition_docx = None
|
@@ -6,8 +6,6 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cache, cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
|
-
from deprecated import deprecated
|
10
|
-
|
11
9
|
from airbyte_cdk import AirbyteMessage
|
12
10
|
from airbyte_cdk.models import SyncMode
|
13
11
|
from airbyte_cdk.sources.file_based.availability_strategy import (
|
@@ -32,6 +30,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
32
30
|
from airbyte_cdk.sources.file_based.types import StreamSlice
|
33
31
|
from airbyte_cdk.sources.streams import Stream
|
34
32
|
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
33
|
+
from deprecated import deprecated
|
35
34
|
|
36
35
|
|
37
36
|
class AbstractFileBasedStream(Stream):
|
@@ -7,8 +7,6 @@ import logging
|
|
7
7
|
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
|
-
from deprecated.classic import deprecated
|
11
|
-
|
12
10
|
from airbyte_cdk.models import (
|
13
11
|
AirbyteLogMessage,
|
14
12
|
AirbyteMessage,
|
@@ -41,10 +39,11 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
|
|
41
39
|
)
|
42
40
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
43
41
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
42
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
44
43
|
from airbyte_cdk.sources.streams.core import StreamData
|
45
|
-
from airbyte_cdk.sources.types import Record
|
46
44
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
47
45
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
46
|
+
from deprecated.classic import deprecated
|
48
47
|
|
49
48
|
if TYPE_CHECKING:
|
50
49
|
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
@@ -248,7 +247,7 @@ class FileBasedStreamPartition(Partition):
|
|
248
247
|
self._stream.transformer.transform(
|
249
248
|
data_to_return, self._stream.get_json_schema()
|
250
249
|
)
|
251
|
-
yield Record(
|
250
|
+
yield Record(data_to_return, self)
|
252
251
|
elif (
|
253
252
|
isinstance(record_data, AirbyteMessage)
|
254
253
|
and record_data.type == Type.RECORD
|
@@ -266,7 +265,7 @@ class FileBasedStreamPartition(Partition):
|
|
266
265
|
else:
|
267
266
|
yield Record(
|
268
267
|
data=record_message_data,
|
269
|
-
|
268
|
+
partition=self,
|
270
269
|
is_file_transfer_message=self._use_file_transfer(),
|
271
270
|
)
|
272
271
|
else:
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py
CHANGED
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
12
12
|
from airbyte_cdk.sources.file_based.types import StreamState
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
|
-
from airbyte_cdk.sources.
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
16
16
|
|
17
17
|
if TYPE_CHECKING:
|
18
18
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -19,7 +19,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
|
|
19
19
|
from airbyte_cdk.sources.message.repository import MessageRepository
|
20
20
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
21
21
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
22
|
-
from airbyte_cdk.sources.
|
22
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
23
|
|
24
24
|
if TYPE_CHECKING:
|
25
25
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -16,7 +16,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
|
|
16
16
|
from airbyte_cdk.sources.message import MessageRepository
|
17
17
|
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
18
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
19
|
-
from airbyte_cdk.sources.
|
19
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
22
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -14,8 +14,9 @@ from urllib import parse
|
|
14
14
|
|
15
15
|
import requests
|
16
16
|
import requests_cache
|
17
|
-
from pyrate_limiter import InMemoryBucket, Limiter
|
17
|
+
from pyrate_limiter import InMemoryBucket, Limiter
|
18
18
|
from pyrate_limiter import Rate as PyRateRate
|
19
|
+
from pyrate_limiter import RateItem, TimeClock
|
19
20
|
from pyrate_limiter.exceptions import BucketFullException
|
20
21
|
|
21
22
|
# prevents mypy from complaining about missing session attributes in LimiterMixin
|
@@ -5,13 +5,12 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
-
from deprecated.classic import deprecated
|
9
|
-
|
10
8
|
from airbyte_cdk.models import AirbyteStream
|
11
9
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
12
10
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
|
13
11
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
14
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
13
|
+
from deprecated.classic import deprecated
|
15
14
|
|
16
15
|
|
17
16
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
@@ -8,8 +8,6 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from deprecated.classic import deprecated
|
12
|
-
|
13
11
|
from airbyte_cdk.models import (
|
14
12
|
AirbyteLogMessage,
|
15
13
|
AirbyteMessage,
|
@@ -39,10 +37,12 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
|
|
39
37
|
)
|
40
38
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
41
39
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
40
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
42
41
|
from airbyte_cdk.sources.streams.core import StreamData
|
43
|
-
from airbyte_cdk.sources.types import Record
|
44
42
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
45
43
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
44
|
+
from deprecated.classic import deprecated
|
45
|
+
|
46
46
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
47
47
|
|
48
48
|
"""
|
@@ -294,11 +294,7 @@ class StreamPartition(Partition):
|
|
294
294
|
self._stream.transformer.transform(
|
295
295
|
data_to_return, self._stream.get_json_schema()
|
296
296
|
)
|
297
|
-
yield Record(
|
298
|
-
data=data_to_return,
|
299
|
-
stream_name=self.stream_name(),
|
300
|
-
associated_slice=self._slice,
|
301
|
-
)
|
297
|
+
yield Record(data_to_return, self)
|
302
298
|
else:
|
303
299
|
self._message_repository.emit_message(record_data)
|
304
300
|
except Exception as e:
|
@@ -6,9 +6,8 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from deprecated.classic import deprecated
|
10
|
-
|
11
9
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
10
|
+
from deprecated.classic import deprecated
|
12
11
|
|
13
12
|
|
14
13
|
class StreamAvailability(ABC):
|