airbyte-cdk 6.7.0__py3-none-any.whl → 6.7.0.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/cli/source_declarative_manifest/_run.py +1 -2
- airbyte_cdk/config_observation.py +1 -2
- airbyte_cdk/connector.py +0 -1
- airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
- airbyte_cdk/connector_builder/main.py +1 -2
- airbyte_cdk/destinations/destination.py +1 -2
- airbyte_cdk/destinations/vector_db_based/config.py +1 -2
- airbyte_cdk/destinations/vector_db_based/document_processor.py +3 -4
- airbyte_cdk/destinations/vector_db_based/embedder.py +4 -5
- airbyte_cdk/entrypoint.py +2 -3
- airbyte_cdk/logger.py +1 -2
- airbyte_cdk/models/airbyte_protocol.py +1 -2
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
- airbyte_cdk/sources/config.py +1 -2
- airbyte_cdk/sources/declarative/auth/jwt.py +0 -1
- airbyte_cdk/sources/declarative/auth/oauth.py +0 -1
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +0 -1
- airbyte_cdk/sources/declarative/auth/token.py +1 -2
- airbyte_cdk/sources/declarative/auth/token_provider.py +2 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +4 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +13 -13
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +2 -3
- airbyte_cdk/sources/declarative/decoders/noop_decoder.py +0 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +0 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +0 -1
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +0 -1
- airbyte_cdk/sources/declarative/extractors/http_selector.py +0 -1
- airbyte_cdk/sources/declarative/extractors/record_filter.py +48 -6
- airbyte_cdk/sources/declarative/extractors/record_selector.py +4 -32
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +1 -2
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +2 -5
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +4 -5
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +3 -4
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +2 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +0 -1
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +1 -2
- airbyte_cdk/sources/declarative/requesters/http_requester.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +0 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +3 -9
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +1 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +0 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +1 -2
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -12
- airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +4 -7
- airbyte_cdk/sources/declarative/transformations/add_fields.py +0 -1
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +0 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/embedded/tools.py +0 -1
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +1 -2
- airbyte_cdk/sources/file_based/config/avro_format.py +1 -2
- airbyte_cdk/sources/file_based/config/csv_format.py +1 -2
- airbyte_cdk/sources/file_based/config/excel_format.py +1 -2
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +1 -2
- airbyte_cdk/sources/file_based/config/jsonl_format.py +1 -2
- airbyte_cdk/sources/file_based/config/parquet_format.py +1 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +1 -2
- airbyte_cdk/sources/file_based/file_based_source.py +1 -2
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +1 -2
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +0 -1
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +1 -2
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +8 -9
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +1 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +4 -5
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
- airbyte_cdk/sources/http_logger.py +0 -1
- airbyte_cdk/sources/streams/call_rate.py +2 -1
- airbyte_cdk/sources/streams/concurrent/abstract_stream.py +1 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +4 -8
- airbyte_cdk/sources/streams/concurrent/availability_strategy.py +1 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +6 -30
- airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +35 -0
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
- airbyte_cdk/sources/streams/core.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +1 -2
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +1 -2
- airbyte_cdk/sources/streams/http/http.py +2 -3
- airbyte_cdk/sources/streams/http/http_client.py +2 -49
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +0 -1
- airbyte_cdk/sources/types.py +1 -14
- airbyte_cdk/sources/utils/schema_helpers.py +2 -3
- airbyte_cdk/sql/secrets.py +1 -2
- airbyte_cdk/sql/shared/sql_processor.py +6 -8
- airbyte_cdk/test/entrypoint_wrapper.py +3 -4
- airbyte_cdk/test/mock_http/mocker.py +0 -1
- airbyte_cdk/utils/schema_inferrer.py +1 -2
- airbyte_cdk/utils/slice_hasher.py +1 -1
- airbyte_cdk/utils/traced_exception.py +1 -2
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/METADATA +2 -9
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/RECORD +121 -120
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.7.0.dist-info → airbyte_cdk-6.7.0.dev4.dist-info}/entry_points.txt +0 -0
@@ -25,8 +25,6 @@ from datetime import datetime
|
|
25
25
|
from pathlib import Path
|
26
26
|
from typing import Any, cast
|
27
27
|
|
28
|
-
from orjson import orjson
|
29
|
-
|
30
28
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
31
29
|
from airbyte_cdk.models import (
|
32
30
|
AirbyteErrorTraceMessage,
|
@@ -44,6 +42,7 @@ from airbyte_cdk.sources.declarative.concurrent_declarative_source import (
|
|
44
42
|
)
|
45
43
|
from airbyte_cdk.sources.declarative.yaml_declarative_source import YamlDeclarativeSource
|
46
44
|
from airbyte_cdk.sources.source import TState
|
45
|
+
from orjson import orjson
|
47
46
|
|
48
47
|
|
49
48
|
class SourceLocalYaml(YamlDeclarativeSource):
|
@@ -10,8 +10,6 @@ import time
|
|
10
10
|
from copy import copy
|
11
11
|
from typing import Any, List, MutableMapping
|
12
12
|
|
13
|
-
from orjson import orjson
|
14
|
-
|
15
13
|
from airbyte_cdk.models import (
|
16
14
|
AirbyteControlConnectorConfigMessage,
|
17
15
|
AirbyteControlMessage,
|
@@ -20,6 +18,7 @@ from airbyte_cdk.models import (
|
|
20
18
|
OrchestratorType,
|
21
19
|
Type,
|
22
20
|
)
|
21
|
+
from orjson import orjson
|
23
22
|
|
24
23
|
|
25
24
|
class ObservedDict(dict): # type: ignore # disallow_any_generics is set to True, and dict is equivalent to dict[Any]
|
airbyte_cdk/connector.py
CHANGED
@@ -12,8 +12,8 @@ from airbyte_cdk.models import (
|
|
12
12
|
AirbyteRecordMessage,
|
13
13
|
AirbyteStateMessage,
|
14
14
|
ConfiguredAirbyteCatalog,
|
15
|
-
Type,
|
16
15
|
)
|
16
|
+
from airbyte_cdk.models import Type
|
17
17
|
from airbyte_cdk.models import Type as MessageType
|
18
18
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
19
19
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
@@ -6,8 +6,6 @@
|
|
6
6
|
import sys
|
7
7
|
from typing import Any, List, Mapping, Optional, Tuple
|
8
8
|
|
9
|
-
from orjson import orjson
|
10
|
-
|
11
9
|
from airbyte_cdk.connector import BaseConnector
|
12
10
|
from airbyte_cdk.connector_builder.connector_builder_handler import (
|
13
11
|
TestReadLimits,
|
@@ -27,6 +25,7 @@ from airbyte_cdk.models import (
|
|
27
25
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
28
26
|
from airbyte_cdk.sources.source import Source
|
29
27
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
28
|
+
from orjson import orjson
|
30
29
|
|
31
30
|
|
32
31
|
def get_config_and_catalog_from_args(
|
@@ -9,8 +9,6 @@ import sys
|
|
9
9
|
from abc import ABC, abstractmethod
|
10
10
|
from typing import Any, Iterable, List, Mapping
|
11
11
|
|
12
|
-
from orjson import orjson
|
13
|
-
|
14
12
|
from airbyte_cdk.connector import Connector
|
15
13
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
16
14
|
from airbyte_cdk.models import (
|
@@ -22,6 +20,7 @@ from airbyte_cdk.models import (
|
|
22
20
|
)
|
23
21
|
from airbyte_cdk.sources.utils.schema_helpers import check_config_against_spec_or_exit
|
24
22
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
23
|
+
from orjson import orjson
|
25
24
|
|
26
25
|
logger = logging.getLogger("airbyte")
|
27
26
|
|
@@ -5,10 +5,9 @@
|
|
5
5
|
from typing import Any, Dict, List, Literal, Optional, Union
|
6
6
|
|
7
7
|
import dpath
|
8
|
-
from pydantic.v1 import BaseModel, Field
|
9
|
-
|
10
8
|
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
11
9
|
from airbyte_cdk.utils.spec_schema_transformations import resolve_refs
|
10
|
+
from pydantic.v1 import BaseModel, Field
|
12
11
|
|
13
12
|
|
14
13
|
class SeparatorSplitterConfigModel(BaseModel):
|
@@ -8,10 +8,6 @@ from dataclasses import dataclass
|
|
8
8
|
from typing import Any, Dict, List, Mapping, Optional, Tuple
|
9
9
|
|
10
10
|
import dpath
|
11
|
-
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
|
12
|
-
from langchain.utils import stringify_dict
|
13
|
-
from langchain_core.documents.base import Document
|
14
|
-
|
15
11
|
from airbyte_cdk.destinations.vector_db_based.config import (
|
16
12
|
ProcessingConfigModel,
|
17
13
|
SeparatorSplitterConfigModel,
|
@@ -25,6 +21,9 @@ from airbyte_cdk.models import (
|
|
25
21
|
DestinationSyncMode,
|
26
22
|
)
|
27
23
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
|
24
|
+
from langchain.text_splitter import Language, RecursiveCharacterTextSplitter
|
25
|
+
from langchain.utils import stringify_dict
|
26
|
+
from langchain_core.documents.base import Document
|
28
27
|
|
29
28
|
METADATA_STREAM_FIELD = "_ab_stream"
|
30
29
|
METADATA_RECORD_ID_FIELD = "_ab_record_id"
|
@@ -7,11 +7,6 @@ from abc import ABC, abstractmethod
|
|
7
7
|
from dataclasses import dataclass
|
8
8
|
from typing import List, Optional, Union, cast
|
9
9
|
|
10
|
-
from langchain.embeddings.cohere import CohereEmbeddings
|
11
|
-
from langchain.embeddings.fake import FakeEmbeddings
|
12
|
-
from langchain.embeddings.localai import LocalAIEmbeddings
|
13
|
-
from langchain.embeddings.openai import OpenAIEmbeddings
|
14
|
-
|
15
10
|
from airbyte_cdk.destinations.vector_db_based.config import (
|
16
11
|
AzureOpenAIEmbeddingConfigModel,
|
17
12
|
CohereEmbeddingConfigModel,
|
@@ -24,6 +19,10 @@ from airbyte_cdk.destinations.vector_db_based.config import (
|
|
24
19
|
from airbyte_cdk.destinations.vector_db_based.utils import create_chunks, format_exception
|
25
20
|
from airbyte_cdk.models import AirbyteRecordMessage
|
26
21
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException, FailureType
|
22
|
+
from langchain.embeddings.cohere import CohereEmbeddings
|
23
|
+
from langchain.embeddings.fake import FakeEmbeddings
|
24
|
+
from langchain.embeddings.localai import LocalAIEmbeddings
|
25
|
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
27
26
|
|
28
27
|
|
29
28
|
@dataclass
|
airbyte_cdk/entrypoint.py
CHANGED
@@ -16,9 +16,6 @@ from typing import Any, DefaultDict, Iterable, List, Mapping, Optional
|
|
16
16
|
from urllib.parse import urlparse
|
17
17
|
|
18
18
|
import requests
|
19
|
-
from orjson import orjson
|
20
|
-
from requests import PreparedRequest, Response, Session
|
21
|
-
|
22
19
|
from airbyte_cdk.connector import TConfig
|
23
20
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
24
21
|
from airbyte_cdk.logger import init_logger
|
@@ -41,6 +38,8 @@ from airbyte_cdk.utils import is_cloud_environment, message_utils
|
|
41
38
|
from airbyte_cdk.utils.airbyte_secrets_utils import get_secrets, update_secrets
|
42
39
|
from airbyte_cdk.utils.constants import ENV_REQUEST_CACHE_PATH
|
43
40
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
41
|
+
from orjson import orjson
|
42
|
+
from requests import PreparedRequest, Response, Session
|
44
43
|
|
45
44
|
logger = init_logger("airbyte")
|
46
45
|
|
airbyte_cdk/logger.py
CHANGED
@@ -7,8 +7,6 @@ import logging
|
|
7
7
|
import logging.config
|
8
8
|
from typing import Any, Callable, Mapping, Optional, Tuple
|
9
9
|
|
10
|
-
from orjson import orjson
|
11
|
-
|
12
10
|
from airbyte_cdk.models import (
|
13
11
|
AirbyteLogMessage,
|
14
12
|
AirbyteMessage,
|
@@ -17,6 +15,7 @@ from airbyte_cdk.models import (
|
|
17
15
|
Type,
|
18
16
|
)
|
19
17
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
18
|
+
from orjson import orjson
|
20
19
|
|
21
20
|
LOGGING_CONFIG = {
|
22
21
|
"version": 1,
|
@@ -5,11 +5,10 @@
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
6
|
from typing import Annotated, Any, Dict, List, Mapping, Optional, Union
|
7
7
|
|
8
|
+
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
8
9
|
from airbyte_protocol_dataclasses.models import * # noqa: F403 # Allow '*'
|
9
10
|
from serpyco_rs.metadata import Alias
|
10
11
|
|
11
|
-
from airbyte_cdk.models.file_transfer_record_message import AirbyteFileTransferRecordMessage
|
12
|
-
|
13
12
|
# ruff: noqa: F405 # ignore fuzzy import issues with 'import *'
|
14
13
|
|
15
14
|
|
@@ -17,8 +17,8 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
|
|
17
17
|
from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
|
18
18
|
from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
20
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
20
21
|
from airbyte_cdk.sources.streams.concurrent.partitions.types import PartitionCompleteSentinel
|
21
|
-
from airbyte_cdk.sources.types import Record
|
22
22
|
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
23
23
|
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
24
24
|
from airbyte_cdk.utils import AirbyteTracedException
|
@@ -147,11 +147,11 @@ class ConcurrentReadProcessor:
|
|
147
147
|
# AbstractStreams are expected to return data as they are expected.
|
148
148
|
# Any transformation on the data should be done before reaching this point
|
149
149
|
message = stream_data_to_airbyte_message(
|
150
|
-
stream_name=record.stream_name,
|
150
|
+
stream_name=record.partition.stream_name(),
|
151
151
|
data_or_message=record.data,
|
152
152
|
is_file_transfer_message=record.is_file_transfer_message,
|
153
153
|
)
|
154
|
-
stream = self._stream_name_to_instance[record.stream_name]
|
154
|
+
stream = self._stream_name_to_instance[record.partition.stream_name()]
|
155
155
|
|
156
156
|
if message.type == MessageType.RECORD:
|
157
157
|
if self._record_counter[stream.name] == 0:
|
@@ -18,11 +18,11 @@ from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStrea
|
|
18
18
|
from airbyte_cdk.sources.streams.concurrent.partition_enqueuer import PartitionEnqueuer
|
19
19
|
from airbyte_cdk.sources.streams.concurrent.partition_reader import PartitionReader
|
20
20
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
21
|
+
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
21
22
|
from airbyte_cdk.sources.streams.concurrent.partitions.types import (
|
22
23
|
PartitionCompleteSentinel,
|
23
24
|
QueueItem,
|
24
25
|
)
|
25
|
-
from airbyte_cdk.sources.types import Record
|
26
26
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
27
27
|
|
28
28
|
|
airbyte_cdk/sources/config.py
CHANGED
@@ -8,7 +8,6 @@ from datetime import datetime
|
|
8
8
|
from typing import Any, Mapping, Optional, Union
|
9
9
|
|
10
10
|
import jwt
|
11
|
-
|
12
11
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
13
12
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
14
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
@@ -6,7 +6,6 @@ from dataclasses import InitVar, dataclass, field
|
|
6
6
|
from typing import Any, List, Mapping, Optional, Union
|
7
7
|
|
8
8
|
import pendulum
|
9
|
-
|
10
9
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
11
10
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_mapping import InterpolatedMapping
|
12
11
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
@@ -8,8 +8,6 @@ from dataclasses import InitVar, dataclass
|
|
8
8
|
from typing import Any, Mapping, Union
|
9
9
|
|
10
10
|
import requests
|
11
|
-
from cachetools import TTLCache, cached
|
12
|
-
|
13
11
|
from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
|
14
12
|
from airbyte_cdk.sources.declarative.auth.token_provider import TokenProvider
|
15
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
@@ -18,6 +16,7 @@ from airbyte_cdk.sources.declarative.requesters.request_option import (
|
|
18
16
|
RequestOptionType,
|
19
17
|
)
|
20
18
|
from airbyte_cdk.sources.types import Config
|
19
|
+
from cachetools import TTLCache, cached
|
21
20
|
|
22
21
|
|
23
22
|
@dataclass
|
@@ -10,9 +10,6 @@ from typing import Any, List, Mapping, Optional, Union
|
|
10
10
|
|
11
11
|
import dpath
|
12
12
|
import pendulum
|
13
|
-
from isodate import Duration
|
14
|
-
from pendulum import DateTime
|
15
|
-
|
16
13
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
17
14
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
18
15
|
from airbyte_cdk.sources.declarative.exceptions import ReadException
|
@@ -21,6 +18,8 @@ from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
|
21
18
|
from airbyte_cdk.sources.http_logger import format_http_message
|
22
19
|
from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
|
23
20
|
from airbyte_cdk.sources.types import Config
|
21
|
+
from isodate import Duration
|
22
|
+
from pendulum import DateTime
|
24
23
|
|
25
24
|
|
26
25
|
class TokenProvider:
|
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import logging
|
6
|
-
from typing import Any,
|
6
|
+
from typing import Any, Generic, Iterator, List, Mapping, Optional, Tuple, Union, Callable
|
7
7
|
|
8
8
|
from airbyte_cdk.models import (
|
9
9
|
AirbyteCatalog,
|
@@ -27,16 +27,14 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
|
|
27
27
|
)
|
28
28
|
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
29
29
|
DatetimeBasedCursor as DatetimeBasedCursorModel,
|
30
|
-
)
|
31
|
-
from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
|
32
30
|
DeclarativeStream as DeclarativeStreamModel,
|
33
31
|
)
|
34
32
|
from airbyte_cdk.sources.declarative.parsers.model_to_component_factory import (
|
35
|
-
ComponentDefinition,
|
36
33
|
ModelToComponentFactory,
|
34
|
+
ComponentDefinition,
|
37
35
|
)
|
38
36
|
from airbyte_cdk.sources.declarative.requesters import HttpRequester
|
39
|
-
from airbyte_cdk.sources.declarative.retrievers import
|
37
|
+
from airbyte_cdk.sources.declarative.retrievers import SimpleRetriever, Retriever
|
40
38
|
from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_generator import (
|
41
39
|
DeclarativePartitionFactory,
|
42
40
|
StreamSlicerPartitionGenerator,
|
@@ -44,6 +42,7 @@ from airbyte_cdk.sources.declarative.stream_slicers.declarative_partition_genera
|
|
44
42
|
from airbyte_cdk.sources.declarative.transformations.add_fields import AddFields
|
45
43
|
from airbyte_cdk.sources.declarative.types import ConnectionDefinition
|
46
44
|
from airbyte_cdk.sources.source import TState
|
45
|
+
from airbyte_cdk.sources.types import Config, StreamState
|
47
46
|
from airbyte_cdk.sources.streams import Stream
|
48
47
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
49
48
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
@@ -51,7 +50,6 @@ from airbyte_cdk.sources.streams.concurrent.availability_strategy import (
|
|
51
50
|
)
|
52
51
|
from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
|
53
52
|
from airbyte_cdk.sources.streams.concurrent.helpers import get_primary_key_from_stream
|
54
|
-
from airbyte_cdk.sources.types import Config, StreamState
|
55
53
|
|
56
54
|
|
57
55
|
class ConcurrentDeclarativeSource(ManifestDeclarativeSource, Generic[TState]):
|
@@ -2790,21 +2790,21 @@ interpolation:
|
|
2790
2790
|
- created_at: "2020-01-01 00:00:00.000+00:00"
|
2791
2791
|
- updated_at: "2020-01-02 00:00:00.000+00:00"
|
2792
2792
|
macros:
|
2793
|
-
- title:
|
2793
|
+
- title: Now (UTC)
|
2794
2794
|
description: Returns the current date and time in the UTC timezone.
|
2795
2795
|
arguments: {}
|
2796
2796
|
return_type: Datetime
|
2797
2797
|
examples:
|
2798
2798
|
- "'{{ now_utc() }}' -> '2021-09-01 00:00:00+00:00'"
|
2799
2799
|
- "'{{ now_utc().strftime('%Y-%m-%d') }}' -> '2021-09-01'"
|
2800
|
-
- title:
|
2800
|
+
- title: Today (UTC)
|
2801
2801
|
description: Returns the current date in UTC timezone. The output is a date object.
|
2802
2802
|
arguments: {}
|
2803
2803
|
return_type: Date
|
2804
2804
|
examples:
|
2805
2805
|
- "'{{ today_utc() }}' -> '2021-09-01'"
|
2806
2806
|
- "'{{ today_utc().strftime('%Y/%m/%d')}}' -> '2021/09/01'"
|
2807
|
-
- title:
|
2807
|
+
- title: Timestamp
|
2808
2808
|
description: Converts a number or a string representing a datetime (formatted as ISO8601) to a timestamp. If the input is a number, it is converted to an int. If no timezone is specified, the string is interpreted as UTC.
|
2809
2809
|
arguments:
|
2810
2810
|
datetime: A string formatted as ISO8601 or an integer representing a unix timestamp
|
@@ -2815,7 +2815,7 @@ interpolation:
|
|
2815
2815
|
- "'{{ timestamp('2022-02-28T00:00:00Z') }}' -> 1646006400"
|
2816
2816
|
- "'{{ timestamp('2022-02-28 00:00:00Z') }}' -> 1646006400"
|
2817
2817
|
- "'{{ timestamp('2022-02-28T00:00:00-08:00') }}' -> 1646035200"
|
2818
|
-
- title:
|
2818
|
+
- title: Max
|
2819
2819
|
description: Returns the largest object of a iterable, or or two or more arguments.
|
2820
2820
|
arguments:
|
2821
2821
|
args: iterable or a sequence of two or more arguments
|
@@ -2823,7 +2823,7 @@ interpolation:
|
|
2823
2823
|
examples:
|
2824
2824
|
- "'{{ max(2, 3) }}' -> 3"
|
2825
2825
|
- "'{{ max([2, 3]) }}' -> 3"
|
2826
|
-
- title:
|
2826
|
+
- title: Day Delta
|
2827
2827
|
description: Returns the datetime of now() + num_days.
|
2828
2828
|
arguments:
|
2829
2829
|
num_days: The number of days to add to now
|
@@ -2833,8 +2833,8 @@ interpolation:
|
|
2833
2833
|
- "'{{ day_delta(1) }}' -> '2021-09-02T00:00:00.000000+0000'"
|
2834
2834
|
- "'{{ day_delta(-1) }}' -> '2021-08-31:00:00.000000+0000'"
|
2835
2835
|
- "'{{ day_delta(25, format='%Y-%m-%d') }}' -> '2021-09-02'"
|
2836
|
-
- title:
|
2837
|
-
description: Converts an ISO8601
|
2836
|
+
- title: Duration
|
2837
|
+
description: Converts an ISO8601 duratioin to datetime.timedelta.
|
2838
2838
|
arguments:
|
2839
2839
|
duration_string: "A string representing an ISO8601 duration. See https://www.digi.com/resources/documentation/digidocs//90001488-13/reference/r_iso_8601_duration_format.htm for more details."
|
2840
2840
|
return_type: datetime.timedelta
|
@@ -2842,7 +2842,7 @@ interpolation:
|
|
2842
2842
|
- "'{{ duration('P1D') }}' -> '1 day, 0:00:00'"
|
2843
2843
|
- "'{{ duration('P6DT23H') }}' -> '6 days, 23:00:00'"
|
2844
2844
|
- "'{{ (now_utc() - duration('P1D')).strftime('%Y-%m-%dT%H:%M:%SZ') }}' -> '2021-08-31T00:00:00Z'"
|
2845
|
-
- title:
|
2845
|
+
- title: Format Datetime
|
2846
2846
|
description: Converts a datetime or a datetime-string to the specified format.
|
2847
2847
|
arguments:
|
2848
2848
|
datetime: The datetime object or a string to convert. If datetime is a string, it must be formatted as ISO8601.
|
@@ -2854,7 +2854,7 @@ interpolation:
|
|
2854
2854
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ') }}"
|
2855
2855
|
- "{{ format_datetime(config['start_date'], '%Y-%m-%dT%H:%M:%S.%fZ', '%a, %d %b %Y %H:%M:%S %z') }}"
|
2856
2856
|
filters:
|
2857
|
-
- title:
|
2857
|
+
- title: Hash
|
2858
2858
|
description: Convert the specified value to a hashed string.
|
2859
2859
|
arguments:
|
2860
2860
|
hash_type: Valid hash type for converts ('md5' as default value).
|
@@ -2864,26 +2864,26 @@ interpolation:
|
|
2864
2864
|
- "{{ 'Test client_secret' | hash() }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
|
2865
2865
|
- "{{ 'Test client_secret' | hash('md5') }} -> '3032d57a12f76b61a820e47b9a5a0cbb'"
|
2866
2866
|
- "{{ 'Test client_secret' | hash('md5', salt='salt') }} -> '5011a0168579c2d94cbbe1c6ad14327c'"
|
2867
|
-
- title:
|
2867
|
+
- title: Base64 encoder
|
2868
2868
|
description: Convert the specified value to a string in the base64 format.
|
2869
2869
|
arguments: {}
|
2870
2870
|
return_type: str
|
2871
2871
|
examples:
|
2872
2872
|
- "{{ 'Test client_secret' | base64encode }} -> 'VGVzdCBjbGllbnRfc2VjcmV0'"
|
2873
|
-
- title:
|
2873
|
+
- title: Base64 decoder
|
2874
2874
|
description: Decodes the specified base64 format value into a common string.
|
2875
2875
|
arguments: {}
|
2876
2876
|
return_type: str
|
2877
2877
|
examples:
|
2878
2878
|
- "{{ 'ZmFrZSByZWZyZXNoX3Rva2VuIHZhbHVl' | base64decode }} -> 'fake refresh_token value'"
|
2879
|
-
- title:
|
2879
|
+
- title: String
|
2880
2880
|
description: Converts the specified value to a string.
|
2881
2881
|
arguments: {}
|
2882
2882
|
return_type: str
|
2883
2883
|
examples:
|
2884
2884
|
- '{{ 1 | string }} -> "1"'
|
2885
2885
|
- '{{ ["hello", "world" | string }} -> "["hello", "world"]"'
|
2886
|
-
- title:
|
2886
|
+
- title: Regex Search
|
2887
2887
|
description: Match the input string against a regular expression and return the first match.
|
2888
2888
|
arguments:
|
2889
2889
|
regex: The regular expression to search for. It must include a capture group.
|
@@ -5,12 +5,11 @@ import codecs
|
|
5
5
|
import logging
|
6
6
|
from dataclasses import InitVar, dataclass
|
7
7
|
from gzip import decompress
|
8
|
-
from typing import Any, Generator,
|
8
|
+
from typing import Any, Generator, Mapping, MutableMapping, List, Optional
|
9
9
|
|
10
|
-
import orjson
|
11
10
|
import requests
|
12
|
-
|
13
11
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
12
|
+
import orjson
|
14
13
|
|
15
14
|
logger = logging.getLogger("airbyte")
|
16
15
|
|
@@ -7,7 +7,6 @@ from typing import Any, Iterable, List, Mapping, MutableMapping, Union
|
|
7
7
|
|
8
8
|
import dpath
|
9
9
|
import requests
|
10
|
-
|
11
10
|
from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder
|
12
11
|
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
13
12
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
@@ -1,6 +1,7 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
+
import datetime
|
4
5
|
from dataclasses import InitVar, dataclass
|
5
6
|
from typing import Any, Iterable, Mapping, Optional, Union
|
6
7
|
|
@@ -10,7 +11,7 @@ from airbyte_cdk.sources.declarative.incremental import (
|
|
10
11
|
PerPartitionWithGlobalCursor,
|
11
12
|
)
|
12
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
13
|
-
from airbyte_cdk.sources.types import Config,
|
14
|
+
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
14
15
|
|
15
16
|
|
16
17
|
@dataclass
|
@@ -67,6 +68,20 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
67
68
|
self._date_time_based_cursor = date_time_based_cursor
|
68
69
|
self._substream_cursor = substream_cursor
|
69
70
|
|
71
|
+
@property
|
72
|
+
def _cursor_field(self) -> str:
|
73
|
+
return self._date_time_based_cursor.cursor_field.eval(self._date_time_based_cursor.config) # type: ignore # eval returns a string in this context
|
74
|
+
|
75
|
+
@property
|
76
|
+
def _start_date_from_config(self) -> datetime.datetime:
|
77
|
+
return self._date_time_based_cursor._start_datetime.get_datetime(
|
78
|
+
self._date_time_based_cursor.config
|
79
|
+
)
|
80
|
+
|
81
|
+
@property
|
82
|
+
def _end_datetime(self) -> datetime.datetime:
|
83
|
+
return self._date_time_based_cursor.select_best_end_datetime()
|
84
|
+
|
70
85
|
def filter_records(
|
71
86
|
self,
|
72
87
|
records: Iterable[Mapping[str, Any]],
|
@@ -74,14 +89,16 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
74
89
|
stream_slice: Optional[StreamSlice] = None,
|
75
90
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
76
91
|
) -> Iterable[Mapping[str, Any]]:
|
92
|
+
state_value = self._get_state_value(
|
93
|
+
stream_state, stream_slice or StreamSlice(partition={}, cursor_slice={})
|
94
|
+
)
|
95
|
+
filter_date: datetime.datetime = self._get_filter_date(state_value)
|
77
96
|
records = (
|
78
97
|
record
|
79
98
|
for record in records
|
80
|
-
if
|
81
|
-
|
82
|
-
|
83
|
-
Record(data=record, associated_slice=stream_slice, stream_name="")
|
84
|
-
)
|
99
|
+
if self._end_datetime
|
100
|
+
>= self._date_time_based_cursor.parse_date(record[self._cursor_field])
|
101
|
+
>= filter_date
|
85
102
|
)
|
86
103
|
if self.condition:
|
87
104
|
records = super().filter_records(
|
@@ -91,3 +108,28 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
91
108
|
next_page_token=next_page_token,
|
92
109
|
)
|
93
110
|
yield from records
|
111
|
+
|
112
|
+
def _get_state_value(
|
113
|
+
self, stream_state: StreamState, stream_slice: StreamSlice
|
114
|
+
) -> Optional[str]:
|
115
|
+
"""
|
116
|
+
Return cursor_value or None in case it was not found.
|
117
|
+
Cursor_value may be empty if:
|
118
|
+
1. It is an initial sync => no stream_state exist at all.
|
119
|
+
2. In Parent-child stream, and we already make initial sync, so stream_state is present.
|
120
|
+
During the second read, we receive one extra record from parent and therefore no stream_state for this record will be found.
|
121
|
+
|
122
|
+
:param StreamState stream_state: State
|
123
|
+
:param StreamSlice stream_slice: Current Stream slice
|
124
|
+
:return Optional[str]: cursor_value in case it was found, otherwise None.
|
125
|
+
"""
|
126
|
+
state = (self._substream_cursor or self._date_time_based_cursor).select_state(stream_slice)
|
127
|
+
|
128
|
+
return state.get(self._cursor_field) if state else None
|
129
|
+
|
130
|
+
def _get_filter_date(self, state_value: Optional[str]) -> datetime.datetime:
|
131
|
+
start_date_parsed = self._start_date_from_config
|
132
|
+
if state_value:
|
133
|
+
return max(start_date_parsed, self._date_time_based_cursor.parse_date(state_value))
|
134
|
+
else:
|
135
|
+
return start_date_parsed
|