airbyte-cdk 6.7.0__py3-none-any.whl → 6.7.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +1 -2
- airbyte_cdk/config_observation.py +1 -2
- airbyte_cdk/connector.py +0 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -2
- airbyte_cdk/destinations/destination.py +1 -2
- airbyte_cdk/destinations/vector_db_based/config.py +1 -2
- airbyte_cdk/destinations/vector_db_based/document_processor.py +3 -4
- airbyte_cdk/destinations/vector_db_based/embedder.py +4 -5
- airbyte_cdk/entrypoint.py +2 -3
- airbyte_cdk/logger.py +1 -2
- airbyte_cdk/models/airbyte_protocol.py +1 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -2
- airbyte_cdk/sources/declarative/auth/jwt.py +0 -1
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +0 -1
- airbyte_cdk/sources/declarative/auth/token.py +1 -2
- airbyte_cdk/sources/declarative/auth/token_provider.py +2 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +4 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +13 -13
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +2 -3
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +0 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +0 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +0 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -1
- airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +48 -6
- airbyte_cdk/sources/declarative/extractors/record_selector.py +4 -32
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -5
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +3 -4
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +1 -2
- airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +3 -9
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +0 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -12
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -7
- airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -1
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/embedded/tools.py +0 -1
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/config/avro_format.py +1 -2
- airbyte_cdk/sources/file_based/config/csv_format.py +1 -2
- airbyte_cdk/sources/file_based/config/excel_format.py +1 -2
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +1 -2
- airbyte_cdk/sources/file_based/config/jsonl_format.py +1 -2
- airbyte_cdk/sources/file_based/config/parquet_format.py +1 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +1 -2
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +1 -2
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +8 -9
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +1 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +4 -5
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +0 -1
- airbyte_cdk/sources/streams/call_rate.py +2 -1
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +1 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +4 -8
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +1 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +6 -30
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +35 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +1 -2
- airbyte_cdk/sources/streams/http/http.py +2 -3
- airbyte_cdk/sources/streams/http/http_client.py +2 -49
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -1
- airbyte_cdk/sources/types.py +1 -14
- airbyte_cdk/sources/utils/schema_helpers.py +2 -3
- airbyte_cdk/sql/secrets.py +1 -2
- airbyte_cdk/sql/shared/sql_processor.py +6 -8
- airbyte_cdk/test/entrypoint_wrapper.py +3 -4
- airbyte_cdk/test/mock_http/mocker.py +0 -1
- airbyte_cdk/utils/schema_inferrer.py +1 -2
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +1 -2
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/METADATA +2 -9
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/RECORD +121 -120
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/entry_points.txt +0 -0
@@ -5,8 +5,6 @@
|
|
5
5
|
from enum import Enum
|
6
6
|
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
|
-
from pydantic.v1 import BaseModel, Field, validator
|
9
|
-
|
10
8
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
11
9
|
from airbyte_cdk.sources.file_based.config.csv_format import CsvFormat
|
12
10
|
from airbyte_cdk.sources.file_based.config.excel_format import ExcelFormat
|
@@ -15,6 +13,7 @@ from airbyte_cdk.sources.file_based.config.parquet_format import ParquetFormat
|
|
15
13
|
from airbyte_cdk.sources.file_based.config.unstructured_format import UnstructuredFormat
|
16
14
|
from airbyte_cdk.sources.file_based.exceptions import ConfigValidationError, FileBasedSourceError
|
17
15
|
from airbyte_cdk.sources.file_based.schema_helpers import type_mapping_to_jsonschema
|
16
|
+
from pydantic.v1 import BaseModel, Field, validator
|
18
17
|
|
19
18
|
PrimaryKeyType = Optional[Union[str, List[str]]]
|
20
19
|
|
@@ -4,9 +4,8 @@
|
|
4
4
|
|
5
5
|
from typing import List, Literal, Optional, Union
|
6
6
|
|
7
|
-
from pydantic.v1 import BaseModel, Field
|
8
|
-
|
9
7
|
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
8
|
+
from pydantic.v1 import BaseModel, Field
|
10
9
|
|
11
10
|
|
12
11
|
class LocalProcessingConfigModel(BaseModel):
|
@@ -8,8 +8,6 @@ from abc import ABC
|
|
8
8
|
from collections import Counter
|
9
9
|
from typing import Any, Iterator, List, Mapping, Optional, Tuple, Type, Union
|
10
10
|
|
11
|
-
from pydantic.v1.error_wrappers import ValidationError
|
12
|
-
|
13
11
|
from airbyte_cdk.logger import AirbyteLogFormatter, init_logger
|
14
12
|
from airbyte_cdk.models import (
|
15
13
|
AirbyteMessage,
|
@@ -62,6 +60,7 @@ from airbyte_cdk.sources.streams import Stream
|
|
62
60
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
63
61
|
from airbyte_cdk.utils.analytics_message import create_analytics_message
|
64
62
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
63
|
+
from pydantic.v1.error_wrappers import ValidationError
|
65
64
|
|
66
65
|
DEFAULT_CONCURRENCY = 100
|
67
66
|
MAX_CONCURRENCY = 100
|
@@ -10,10 +10,9 @@ from io import IOBase
|
|
10
10
|
from os import makedirs, path
|
11
11
|
from typing import Any, Dict, Iterable, List, Optional, Set
|
12
12
|
|
13
|
-
from wcmatch.glob import GLOBSTAR, globmatch
|
14
|
-
|
15
13
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
16
14
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
15
|
+
from wcmatch.glob import GLOBSTAR, globmatch
|
17
16
|
|
18
17
|
|
19
18
|
class FileReadMode(Enum):
|
@@ -6,7 +6,6 @@ import logging
|
|
6
6
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple
|
7
7
|
|
8
8
|
import fastavro
|
9
|
-
|
10
9
|
from airbyte_cdk.sources.file_based.config.avro_format import AvroFormat
|
11
10
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
11
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
@@ -12,8 +12,6 @@ from io import IOBase
|
|
12
12
|
from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, Optional, Set, Tuple
|
13
13
|
from uuid import uuid4
|
14
14
|
|
15
|
-
from orjson import orjson
|
16
|
-
|
17
15
|
from airbyte_cdk.models import FailureType
|
18
16
|
from airbyte_cdk.sources.file_based.config.csv_format import (
|
19
17
|
CsvFormat,
|
@@ -31,6 +29,7 @@ from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeP
|
|
31
29
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
32
30
|
from airbyte_cdk.sources.file_based.schema_helpers import TYPE_PYTHON_MAPPING, SchemaType
|
33
31
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
32
|
+
from orjson import orjson
|
34
33
|
|
35
34
|
DIALECT_NAME = "_config_dialect"
|
36
35
|
|
@@ -8,11 +8,6 @@ from pathlib import Path
|
|
8
8
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
9
9
|
|
10
10
|
import pandas as pd
|
11
|
-
from numpy import datetime64, issubdtype
|
12
|
-
from numpy import dtype as dtype_
|
13
|
-
from orjson import orjson
|
14
|
-
from pydantic.v1 import BaseModel
|
15
|
-
|
16
11
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
17
12
|
ExcelFormat,
|
18
13
|
FileBasedStreamConfig,
|
@@ -29,6 +24,11 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
|
29
24
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
30
25
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
31
26
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
27
|
+
from numpy import datetime64
|
28
|
+
from numpy import dtype as dtype_
|
29
|
+
from numpy import issubdtype
|
30
|
+
from orjson import orjson
|
31
|
+
from pydantic.v1 import BaseModel
|
32
32
|
|
33
33
|
|
34
34
|
class ExcelParser(FileTypeParser):
|
@@ -6,8 +6,6 @@ import json
|
|
6
6
|
import logging
|
7
7
|
from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union
|
8
8
|
|
9
|
-
from orjson import orjson
|
10
|
-
|
11
9
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
12
10
|
from airbyte_cdk.sources.file_based.exceptions import FileBasedSourceError, RecordParseError
|
13
11
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
@@ -21,6 +19,7 @@ from airbyte_cdk.sources.file_based.schema_helpers import (
|
|
21
19
|
SchemaType,
|
22
20
|
merge_schemas,
|
23
21
|
)
|
22
|
+
from orjson import orjson
|
24
23
|
|
25
24
|
|
26
25
|
class JsonlParser(FileTypeParser):
|
@@ -10,8 +10,6 @@ from urllib.parse import unquote
|
|
10
10
|
|
11
11
|
import pyarrow as pa
|
12
12
|
import pyarrow.parquet as pq
|
13
|
-
from pyarrow import DictionaryArray, Scalar
|
14
|
-
|
15
13
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import (
|
16
14
|
FileBasedStreamConfig,
|
17
15
|
ParquetFormat,
|
@@ -28,6 +26,7 @@ from airbyte_cdk.sources.file_based.file_based_stream_reader import (
|
|
28
26
|
from airbyte_cdk.sources.file_based.file_types.file_type_parser import FileTypeParser
|
29
27
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
30
28
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
29
|
+
from pyarrow import DictionaryArray, Scalar
|
31
30
|
|
32
31
|
|
33
32
|
class ParquetParser(FileTypeParser):
|
@@ -9,16 +9,7 @@ from typing import Any, Dict, Iterable, List, Mapping, Optional, Tuple, Union
|
|
9
9
|
|
10
10
|
import backoff
|
11
11
|
import dpath
|
12
|
-
import nltk
|
13
12
|
import requests
|
14
|
-
from unstructured.file_utils.filetype import (
|
15
|
-
EXT_TO_FILETYPE,
|
16
|
-
FILETYPE_TO_MIMETYPE,
|
17
|
-
STR_TO_FILETYPE,
|
18
|
-
FileType,
|
19
|
-
detect_filetype,
|
20
|
-
)
|
21
|
-
|
22
13
|
from airbyte_cdk.models import FailureType
|
23
14
|
from airbyte_cdk.sources.file_based.config.file_based_stream_config import FileBasedStreamConfig
|
24
15
|
from airbyte_cdk.sources.file_based.config.unstructured_format import (
|
@@ -37,6 +28,14 @@ from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
37
28
|
from airbyte_cdk.sources.file_based.schema_helpers import SchemaType
|
38
29
|
from airbyte_cdk.utils import is_cloud_environment
|
39
30
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
31
|
+
from unstructured.file_utils.filetype import (
|
32
|
+
EXT_TO_FILETYPE,
|
33
|
+
FILETYPE_TO_MIMETYPE,
|
34
|
+
STR_TO_FILETYPE,
|
35
|
+
FileType,
|
36
|
+
detect_filetype,
|
37
|
+
)
|
38
|
+
import nltk
|
40
39
|
|
41
40
|
unstructured_partition_pdf = None
|
42
41
|
unstructured_partition_docx = None
|
@@ -6,8 +6,6 @@ from abc import abstractmethod
|
|
6
6
|
from functools import cache, cached_property, lru_cache
|
7
7
|
from typing import Any, Dict, Iterable, List, Mapping, Optional, Type
|
8
8
|
|
9
|
-
from deprecated import deprecated
|
10
|
-
|
11
9
|
from airbyte_cdk import AirbyteMessage
|
12
10
|
from airbyte_cdk.models import SyncMode
|
13
11
|
from airbyte_cdk.sources.file_based.availability_strategy import (
|
@@ -32,6 +30,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
32
30
|
from airbyte_cdk.sources.file_based.types import StreamSlice
|
33
31
|
from airbyte_cdk.sources.streams import Stream
|
34
32
|
from airbyte_cdk.sources.streams.checkpoint import Cursor
|
33
|
+
from deprecated import deprecated
|
35
34
|
|
36
35
|
|
37
36
|
class AbstractFileBasedStream(Stream):
|
@@ -7,8 +7,6 @@ import logging
|
|
7
7
|
from functools import cache, lru_cache
|
8
8
|
from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, MutableMapping, Optional, Union
|
9
9
|
|
10
|
-
from deprecated.classic import deprecated
|
11
|
-
|
12
10
|
from airbyte_cdk.models import (
|
13
11
|
AirbyteLogMessage,
|
14
12
|
AirbyteMessage,
|
@@ -41,10 +39,11 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
|
|
41
39
|
)
|
42
40
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
43
41
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
42
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
44
43
|
from airbyte_cdk.sources.streams.core import StreamData
|
45
|
-
from airbyte_cdk.sources.types import Record
|
46
44
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
47
45
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
46
|
+
from deprecated.classic import deprecated
|
48
47
|
|
49
48
|
if TYPE_CHECKING:
|
50
49
|
from airbyte_cdk.sources.file_based.stream.concurrent.cursor import (
|
@@ -248,7 +247,7 @@ class FileBasedStreamPartition(Partition):
|
|
248
247
|
self._stream.transformer.transform(
|
249
248
|
data_to_return, self._stream.get_json_schema()
|
250
249
|
)
|
251
|
-
yield Record(
|
250
|
+
yield Record(data_to_return, self)
|
252
251
|
elif (
|
253
252
|
isinstance(record_data, AirbyteMessage)
|
254
253
|
and record_data.type == Type.RECORD
|
@@ -266,7 +265,7 @@ class FileBasedStreamPartition(Partition):
|
|
266
265
|
else:
|
267
266
|
yield Record(
|
268
267
|
data=record_message_data,
|
269
|
-
|
268
|
+
partition=self,
|
270
269
|
is_file_transfer_message=self._use_file_transfer(),
|
271
270
|
)
|
272
271
|
else:
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py
CHANGED
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.file_based.stream.cursor import AbstractFileBasedCursor
|
|
12
12
|
from airbyte_cdk.sources.file_based.types import StreamState
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
15
|
-
from airbyte_cdk.sources.
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
16
16
|
|
17
17
|
if TYPE_CHECKING:
|
18
18
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -19,7 +19,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
|
|
19
19
|
from airbyte_cdk.sources.message.repository import MessageRepository
|
20
20
|
from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
|
21
21
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
22
|
-
from airbyte_cdk.sources.
|
22
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
23
23
|
|
24
24
|
if TYPE_CHECKING:
|
25
25
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -16,7 +16,7 @@ from airbyte_cdk.sources.file_based.types import StreamState
|
|
16
16
|
from airbyte_cdk.sources.message import MessageRepository
|
17
17
|
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
18
18
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
19
|
-
from airbyte_cdk.sources.
|
19
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
20
20
|
|
21
21
|
if TYPE_CHECKING:
|
22
22
|
from airbyte_cdk.sources.file_based.stream.concurrent.adapters import FileBasedStreamPartition
|
@@ -14,8 +14,9 @@ from urllib import parse
|
|
14
14
|
|
15
15
|
import requests
|
16
16
|
import requests_cache
|
17
|
-
from pyrate_limiter import InMemoryBucket, Limiter
|
17
|
+
from pyrate_limiter import InMemoryBucket, Limiter
|
18
18
|
from pyrate_limiter import Rate as PyRateRate
|
19
|
+
from pyrate_limiter import RateItem, TimeClock
|
19
20
|
from pyrate_limiter.exceptions import BucketFullException
|
20
21
|
|
21
22
|
# prevents mypy from complaining about missing session attributes in LimiterMixin
|
@@ -5,13 +5,12 @@
|
|
5
5
|
from abc import ABC, abstractmethod
|
6
6
|
from typing import Any, Iterable, Mapping, Optional
|
7
7
|
|
8
|
-
from deprecated.classic import deprecated
|
9
|
-
|
10
8
|
from airbyte_cdk.models import AirbyteStream
|
11
9
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
12
10
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability
|
13
11
|
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor
|
14
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
13
|
+
from deprecated.classic import deprecated
|
15
14
|
|
16
15
|
|
17
16
|
@deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
|
@@ -8,8 +8,6 @@ import logging
|
|
8
8
|
from functools import lru_cache
|
9
9
|
from typing import Any, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
|
10
10
|
|
11
|
-
from deprecated.classic import deprecated
|
12
|
-
|
13
11
|
from airbyte_cdk.models import (
|
14
12
|
AirbyteLogMessage,
|
15
13
|
AirbyteMessage,
|
@@ -39,10 +37,12 @@ from airbyte_cdk.sources.streams.concurrent.helpers import (
|
|
39
37
|
)
|
40
38
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
41
39
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition_generator import PartitionGenerator
|
40
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
42
41
|
from airbyte_cdk.sources.streams.core import StreamData
|
43
|
-
from airbyte_cdk.sources.types import Record
|
44
42
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig
|
45
43
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
44
|
+
from deprecated.classic import deprecated
|
45
|
+
|
46
46
|
from airbyte_cdk.utils.slice_hasher import SliceHasher
|
47
47
|
|
48
48
|
"""
|
@@ -294,11 +294,7 @@ class StreamPartition(Partition):
|
|
294
294
|
self._stream.transformer.transform(
|
295
295
|
data_to_return, self._stream.get_json_schema()
|
296
296
|
)
|
297
|
-
yield Record(
|
298
|
-
data=data_to_return,
|
299
|
-
stream_name=self.stream_name(),
|
300
|
-
associated_slice=self._slice,
|
301
|
-
)
|
297
|
+
yield Record(data_to_return, self)
|
302
298
|
else:
|
303
299
|
self._message_repository.emit_message(record_data)
|
304
300
|
except Exception as e:
|
@@ -6,9 +6,8 @@ import logging
|
|
6
6
|
from abc import ABC, abstractmethod
|
7
7
|
from typing import Optional
|
8
8
|
|
9
|
-
from deprecated.classic import deprecated
|
10
|
-
|
11
9
|
from airbyte_cdk.sources.source import ExperimentalClassWarning
|
10
|
+
from deprecated.classic import deprecated
|
12
11
|
|
13
12
|
|
14
13
|
class StreamAvailability(ABC):
|
@@ -3,7 +3,6 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import functools
|
6
|
-
import logging
|
7
6
|
from abc import ABC, abstractmethod
|
8
7
|
from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Protocol, Tuple
|
9
8
|
|
@@ -11,13 +10,12 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
11
10
|
from airbyte_cdk.sources.message import MessageRepository
|
12
11
|
from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
|
13
12
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
13
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.partitions.stream_slicer import StreamSlicer
|
15
15
|
from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import (
|
16
16
|
AbstractStreamStateConverter,
|
17
17
|
)
|
18
|
-
from airbyte_cdk.sources.types import
|
19
|
-
|
20
|
-
LOGGER = logging.getLogger("airbyte")
|
18
|
+
from airbyte_cdk.sources.types import StreamSlice
|
21
19
|
|
22
20
|
|
23
21
|
def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
|
@@ -175,11 +173,9 @@ class ConcurrentCursor(Cursor):
|
|
175
173
|
self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
|
176
174
|
self._lookback_window = lookback_window
|
177
175
|
self._slice_range = slice_range
|
178
|
-
self._most_recent_cursor_value_per_partition: MutableMapping[
|
176
|
+
self._most_recent_cursor_value_per_partition: MutableMapping[Partition, Any] = {}
|
179
177
|
self._has_closed_at_least_one_slice = False
|
180
178
|
self._cursor_granularity = cursor_granularity
|
181
|
-
# Flag to track if the logger has been triggered (per stream)
|
182
|
-
self._should_be_synced_logger_triggered = False
|
183
179
|
|
184
180
|
@property
|
185
181
|
def state(self) -> MutableMapping[str, Any]:
|
@@ -214,12 +210,12 @@ class ConcurrentCursor(Cursor):
|
|
214
210
|
|
215
211
|
def observe(self, record: Record) -> None:
|
216
212
|
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
217
|
-
record.
|
213
|
+
record.partition
|
218
214
|
)
|
219
215
|
cursor_value = self._extract_cursor_value(record)
|
220
216
|
|
221
217
|
if most_recent_cursor_value is None or most_recent_cursor_value < cursor_value:
|
222
|
-
self._most_recent_cursor_value_per_partition[record.
|
218
|
+
self._most_recent_cursor_value_per_partition[record.partition] = cursor_value
|
223
219
|
|
224
220
|
def _extract_cursor_value(self, record: Record) -> Any:
|
225
221
|
return self._connector_state_converter.parse_value(self._cursor_field.extract_value(record))
|
@@ -235,9 +231,7 @@ class ConcurrentCursor(Cursor):
|
|
235
231
|
self._has_closed_at_least_one_slice = True
|
236
232
|
|
237
233
|
def _add_slice_to_state(self, partition: Partition) -> None:
|
238
|
-
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(
|
239
|
-
partition.to_slice()
|
240
|
-
)
|
234
|
+
most_recent_cursor_value = self._most_recent_cursor_value_per_partition.get(partition)
|
241
235
|
|
242
236
|
if self._slice_boundary_fields:
|
243
237
|
if "slices" not in self.state:
|
@@ -448,21 +442,3 @@ class ConcurrentCursor(Cursor):
|
|
448
442
|
return lower + step
|
449
443
|
except OverflowError:
|
450
444
|
return self._end_provider()
|
451
|
-
|
452
|
-
def should_be_synced(self, record: Record) -> bool:
|
453
|
-
"""
|
454
|
-
Determines if a record should be synced based on its cursor value.
|
455
|
-
:param record: The record to evaluate
|
456
|
-
|
457
|
-
:return: True if the record's cursor value falls within the sync boundaries
|
458
|
-
"""
|
459
|
-
try:
|
460
|
-
record_cursor_value: CursorValueType = self._extract_cursor_value(record) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
461
|
-
except ValueError:
|
462
|
-
if not self._should_be_synced_logger_triggered:
|
463
|
-
LOGGER.warning(
|
464
|
-
f"Could not find cursor field `{self.cursor_field.cursor_field_key}` in record. The incremental sync will assume it needs to be synced"
|
465
|
-
)
|
466
|
-
self._should_be_synced_logger_triggered = True
|
467
|
-
return True
|
468
|
-
return self.start <= record_cursor_value <= self._end_provider()
|
@@ -0,0 +1,35 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
from typing import TYPE_CHECKING, Any, Mapping
|
6
|
+
|
7
|
+
if TYPE_CHECKING:
|
8
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
9
|
+
|
10
|
+
|
11
|
+
class Record:
|
12
|
+
"""
|
13
|
+
Represents a record read from a stream.
|
14
|
+
"""
|
15
|
+
|
16
|
+
def __init__(
|
17
|
+
self,
|
18
|
+
data: Mapping[str, Any],
|
19
|
+
partition: "Partition",
|
20
|
+
is_file_transfer_message: bool = False,
|
21
|
+
):
|
22
|
+
self.data = data
|
23
|
+
self.partition = partition
|
24
|
+
self.is_file_transfer_message = is_file_transfer_message
|
25
|
+
|
26
|
+
def __eq__(self, other: Any) -> bool:
|
27
|
+
if not isinstance(other, Record):
|
28
|
+
return False
|
29
|
+
return (
|
30
|
+
self.data == other.data
|
31
|
+
and self.partition.stream_name() == other.partition.stream_name()
|
32
|
+
)
|
33
|
+
|
34
|
+
def __repr__(self) -> str:
|
35
|
+
return f"Record(data={self.data}, stream_name={self.partition.stream_name()})"
|
@@ -8,7 +8,7 @@ from airbyte_cdk.sources.concurrent_source.partition_generation_completed_sentin
|
|
8
8
|
PartitionGenerationCompletedSentinel,
|
9
9
|
)
|
10
10
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
11
|
-
from airbyte_cdk.sources.
|
11
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
12
12
|
|
13
13
|
|
14
14
|
class PartitionCompleteSentinel:
|
@@ -7,7 +7,6 @@ from datetime import datetime, timedelta, timezone
|
|
7
7
|
from typing import Any, Callable, List, MutableMapping, Optional, Tuple
|
8
8
|
|
9
9
|
import pendulum
|
10
|
-
from pendulum.datetime import DateTime
|
11
10
|
|
12
11
|
# FIXME We would eventually like the Concurrent package do be agnostic of the declarative package. However, this is a breaking change and
|
13
12
|
# the goal in the short term is only to fix the issue we are seeing for source-declarative-manifest.
|
@@ -17,6 +16,7 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_sta
|
|
17
16
|
AbstractStreamStateConverter,
|
18
17
|
ConcurrencyCompatibleStateType,
|
19
18
|
)
|
19
|
+
from pendulum.datetime import DateTime
|
20
20
|
|
21
21
|
|
22
22
|
class DateTimeStreamStateConverter(AbstractStreamStateConverter):
|
@@ -10,8 +10,6 @@ from dataclasses import dataclass
|
|
10
10
|
from functools import cached_property, lru_cache
|
11
11
|
from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
|
12
12
|
|
13
|
-
from deprecated import deprecated
|
14
|
-
|
15
13
|
import airbyte_cdk.sources.utils.casing as casing
|
16
14
|
from airbyte_cdk.models import (
|
17
15
|
AirbyteMessage,
|
@@ -37,6 +35,7 @@ from airbyte_cdk.sources.types import StreamSlice
|
|
37
35
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, ResourceSchemaLoader
|
38
36
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
39
37
|
from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
|
38
|
+
from deprecated import deprecated
|
40
39
|
|
41
40
|
# A stream's read method can return one of the following types:
|
42
41
|
# Mapping[str, Any]: The content of an AirbyteRecordMessage
|
@@ -4,13 +4,12 @@
|
|
4
4
|
|
5
5
|
from typing import Mapping, Type, Union
|
6
6
|
|
7
|
-
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
8
|
-
|
9
7
|
from airbyte_cdk.models import FailureType
|
10
8
|
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
11
9
|
ErrorResolution,
|
12
10
|
ResponseAction,
|
13
11
|
)
|
12
|
+
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
14
13
|
|
15
14
|
DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution] = {
|
16
15
|
InvalidSchema: ErrorResolution(
|
@@ -5,10 +5,9 @@ from enum import Enum
|
|
5
5
|
from typing import Optional, Union
|
6
6
|
|
7
7
|
import requests
|
8
|
-
from requests import HTTPError
|
9
|
-
|
10
8
|
from airbyte_cdk.models import FailureType
|
11
9
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
10
|
+
from requests import HTTPError
|
12
11
|
|
13
12
|
|
14
13
|
class ResponseAction(Enum):
|
@@ -9,9 +9,6 @@ from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optio
|
|
9
9
|
from urllib.parse import urljoin
|
10
10
|
|
11
11
|
import requests
|
12
|
-
from deprecated import deprecated
|
13
|
-
from requests.auth import AuthBase
|
14
|
-
|
15
12
|
from airbyte_cdk.models import AirbyteMessage, FailureType, SyncMode
|
16
13
|
from airbyte_cdk.models import Type as MessageType
|
17
14
|
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
|
@@ -36,6 +33,8 @@ from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
|
36
33
|
from airbyte_cdk.sources.streams.http.http_client import HttpClient
|
37
34
|
from airbyte_cdk.sources.types import Record, StreamSlice
|
38
35
|
from airbyte_cdk.sources.utils.types import JsonType
|
36
|
+
from deprecated import deprecated
|
37
|
+
from requests.auth import AuthBase
|
39
38
|
|
40
39
|
# list of all possible HTTP methods which can be used for sending of request bodies
|
41
40
|
BODY_REQUEST_METHODS = ("GET", "POST", "PUT", "PATCH")
|