airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +43 -0
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/__init__.py +2 -2
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +48 -13
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +14 -5
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +697 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +802 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/METADATA +8 -7
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/RECORD +200 -200
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.6.0.dist-info}/WHEEL +0 -0
@@ -1,14 +1,15 @@
|
|
1
1
|
#
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
|
-
|
4
|
+
import codecs
|
5
5
|
import logging
|
6
6
|
from dataclasses import InitVar, dataclass
|
7
|
-
from
|
7
|
+
from gzip import decompress
|
8
|
+
from typing import Any, Generator, Mapping, MutableMapping, List, Optional
|
8
9
|
|
9
10
|
import requests
|
10
11
|
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
11
|
-
|
12
|
+
import orjson
|
12
13
|
|
13
14
|
logger = logging.getLogger("airbyte")
|
14
15
|
|
@@ -24,21 +25,31 @@ class JsonDecoder(Decoder):
|
|
24
25
|
def is_stream_response(self) -> bool:
|
25
26
|
return False
|
26
27
|
|
27
|
-
def decode(
|
28
|
+
def decode(
|
29
|
+
self, response: requests.Response
|
30
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
28
31
|
"""
|
29
32
|
Given the response is an empty string or an emtpy list, the function will return a generator with an empty mapping.
|
30
33
|
"""
|
31
34
|
try:
|
32
35
|
body_json = response.json()
|
33
|
-
|
34
|
-
body_json = [body_json]
|
35
|
-
if len(body_json) == 0:
|
36
|
-
yield {}
|
37
|
-
else:
|
38
|
-
yield from body_json
|
36
|
+
yield from self.parse_body_json(body_json)
|
39
37
|
except requests.exceptions.JSONDecodeError:
|
40
|
-
logger.warning(
|
38
|
+
logger.warning(
|
39
|
+
f"Response cannot be parsed into json: {response.status_code=}, {response.text=}"
|
40
|
+
)
|
41
|
+
yield {}
|
42
|
+
|
43
|
+
@staticmethod
|
44
|
+
def parse_body_json(
|
45
|
+
body_json: MutableMapping[str, Any] | List[MutableMapping[str, Any]],
|
46
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
47
|
+
if not isinstance(body_json, list):
|
48
|
+
body_json = [body_json]
|
49
|
+
if len(body_json) == 0:
|
41
50
|
yield {}
|
51
|
+
else:
|
52
|
+
yield from body_json
|
42
53
|
|
43
54
|
|
44
55
|
@dataclass
|
@@ -52,7 +63,9 @@ class IterableDecoder(Decoder):
|
|
52
63
|
def is_stream_response(self) -> bool:
|
53
64
|
return True
|
54
65
|
|
55
|
-
def decode(
|
66
|
+
def decode(
|
67
|
+
self, response: requests.Response
|
68
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
56
69
|
for line in response.iter_lines():
|
57
70
|
yield {"record": line.decode()}
|
58
71
|
|
@@ -68,8 +81,30 @@ class JsonlDecoder(Decoder):
|
|
68
81
|
def is_stream_response(self) -> bool:
|
69
82
|
return True
|
70
83
|
|
71
|
-
def decode(
|
84
|
+
def decode(
|
85
|
+
self, response: requests.Response
|
86
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
72
87
|
# TODO???: set delimiter? usually it is `\n` but maybe it would be useful to set optional?
|
73
88
|
# https://github.com/airbytehq/airbyte-internal-issues/issues/8436
|
74
89
|
for record in response.iter_lines():
|
75
90
|
yield orjson.loads(record)
|
91
|
+
|
92
|
+
|
93
|
+
@dataclass
|
94
|
+
class GzipJsonDecoder(JsonDecoder):
|
95
|
+
encoding: Optional[str]
|
96
|
+
|
97
|
+
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
98
|
+
if self.encoding:
|
99
|
+
try:
|
100
|
+
codecs.lookup(self.encoding)
|
101
|
+
except LookupError:
|
102
|
+
raise ValueError(
|
103
|
+
f"Invalid encoding '{self.encoding}'. Please check provided encoding"
|
104
|
+
)
|
105
|
+
|
106
|
+
def decode(
|
107
|
+
self, response: requests.Response
|
108
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
109
|
+
raw_string = decompress(response.content).decode(encoding=self.encoding or "utf-8")
|
110
|
+
yield from self.parse_body_json(orjson.loads(raw_string))
|
@@ -28,7 +28,9 @@ class PaginationDecoderDecorator(Decoder):
|
|
28
28
|
def is_stream_response(self) -> bool:
|
29
29
|
return self._decoder.is_stream_response()
|
30
30
|
|
31
|
-
def decode(
|
31
|
+
def decode(
|
32
|
+
self, response: requests.Response
|
33
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
32
34
|
if self._decoder.is_stream_response():
|
33
35
|
logger.warning("Response is streamed and therefore will not be decoded for pagination.")
|
34
36
|
yield {}
|
@@ -78,7 +78,9 @@ class XmlDecoder(Decoder):
|
|
78
78
|
def is_stream_response(self) -> bool:
|
79
79
|
return False
|
80
80
|
|
81
|
-
def decode(
|
81
|
+
def decode(
|
82
|
+
self, response: requests.Response
|
83
|
+
) -> Generator[MutableMapping[str, Any], None, None]:
|
82
84
|
body_xml = response.text
|
83
85
|
try:
|
84
86
|
body_json = xmltodict.parse(body_xml)
|
@@ -89,5 +91,7 @@ class XmlDecoder(Decoder):
|
|
89
91
|
else:
|
90
92
|
yield from body_json
|
91
93
|
except ExpatError as exc:
|
92
|
-
logger.warning(
|
94
|
+
logger.warning(
|
95
|
+
f"Response cannot be parsed from XML: {response.status_code=}, {response.text=}, {exc=}"
|
96
|
+
)
|
93
97
|
yield {}
|
@@ -58,10 +58,14 @@ class DpathExtractor(RecordExtractor):
|
|
58
58
|
decoder: Decoder = field(default_factory=lambda: JsonDecoder(parameters={}))
|
59
59
|
|
60
60
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
61
|
-
self._field_path = [
|
61
|
+
self._field_path = [
|
62
|
+
InterpolatedString.create(path, parameters=parameters) for path in self.field_path
|
63
|
+
]
|
62
64
|
for path_index in range(len(self.field_path)):
|
63
65
|
if isinstance(self.field_path[path_index], str):
|
64
|
-
self._field_path[path_index] = InterpolatedString.create(
|
66
|
+
self._field_path[path_index] = InterpolatedString.create(
|
67
|
+
self.field_path[path_index], parameters=parameters
|
68
|
+
)
|
65
69
|
|
66
70
|
def extract_records(self, response: requests.Response) -> Iterable[MutableMapping[Any, Any]]:
|
67
71
|
for body in self.decoder.decode(response):
|
@@ -5,7 +5,11 @@ import datetime
|
|
5
5
|
from dataclasses import InitVar, dataclass
|
6
6
|
from typing import Any, Iterable, Mapping, Optional, Union
|
7
7
|
|
8
|
-
from airbyte_cdk.sources.declarative.incremental import
|
8
|
+
from airbyte_cdk.sources.declarative.incremental import (
|
9
|
+
DatetimeBasedCursor,
|
10
|
+
GlobalSubstreamCursor,
|
11
|
+
PerPartitionWithGlobalCursor,
|
12
|
+
)
|
9
13
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
|
10
14
|
from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
11
15
|
|
@@ -24,7 +28,9 @@ class RecordFilter:
|
|
24
28
|
condition: str = ""
|
25
29
|
|
26
30
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
27
|
-
self._filter_interpolator = InterpolatedBoolean(
|
31
|
+
self._filter_interpolator = InterpolatedBoolean(
|
32
|
+
condition=self.condition, parameters=parameters
|
33
|
+
)
|
28
34
|
|
29
35
|
def filter_records(
|
30
36
|
self,
|
@@ -68,7 +74,9 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
68
74
|
|
69
75
|
@property
|
70
76
|
def _start_date_from_config(self) -> datetime.datetime:
|
71
|
-
return self._date_time_based_cursor._start_datetime.get_datetime(
|
77
|
+
return self._date_time_based_cursor._start_datetime.get_datetime(
|
78
|
+
self._date_time_based_cursor.config
|
79
|
+
)
|
72
80
|
|
73
81
|
@property
|
74
82
|
def _end_datetime(self) -> datetime.datetime:
|
@@ -81,20 +89,29 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
|
|
81
89
|
stream_slice: Optional[StreamSlice] = None,
|
82
90
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
83
91
|
) -> Iterable[Mapping[str, Any]]:
|
84
|
-
state_value = self._get_state_value(
|
92
|
+
state_value = self._get_state_value(
|
93
|
+
stream_state, stream_slice or StreamSlice(partition={}, cursor_slice={})
|
94
|
+
)
|
85
95
|
filter_date: datetime.datetime = self._get_filter_date(state_value)
|
86
96
|
records = (
|
87
97
|
record
|
88
98
|
for record in records
|
89
|
-
if self._end_datetime
|
99
|
+
if self._end_datetime
|
100
|
+
>= self._date_time_based_cursor.parse_date(record[self._cursor_field])
|
101
|
+
>= filter_date
|
90
102
|
)
|
91
103
|
if self.condition:
|
92
104
|
records = super().filter_records(
|
93
|
-
records=records,
|
105
|
+
records=records,
|
106
|
+
stream_state=stream_state,
|
107
|
+
stream_slice=stream_slice,
|
108
|
+
next_page_token=next_page_token,
|
94
109
|
)
|
95
110
|
yield from records
|
96
111
|
|
97
|
-
def _get_state_value(
|
112
|
+
def _get_state_value(
|
113
|
+
self, stream_state: StreamState, stream_slice: StreamSlice
|
114
|
+
) -> Optional[str]:
|
98
115
|
"""
|
99
116
|
Return cursor_value or None in case it was not found.
|
100
117
|
Cursor_value may be empty if:
|
@@ -61,7 +61,9 @@ class RecordSelector(HttpSelector):
|
|
61
61
|
:return: List of Records selected from the response
|
62
62
|
"""
|
63
63
|
all_data: Iterable[Mapping[str, Any]] = self.extractor.extract_records(response)
|
64
|
-
yield from self.filter_and_transform(
|
64
|
+
yield from self.filter_and_transform(
|
65
|
+
all_data, stream_state, records_schema, stream_slice, next_page_token
|
66
|
+
)
|
65
67
|
|
66
68
|
def filter_and_transform(
|
67
69
|
self,
|
@@ -106,7 +108,10 @@ class RecordSelector(HttpSelector):
|
|
106
108
|
) -> Iterable[Mapping[str, Any]]:
|
107
109
|
if self.record_filter:
|
108
110
|
yield from self.record_filter.filter_records(
|
109
|
-
records,
|
111
|
+
records,
|
112
|
+
stream_state=stream_state,
|
113
|
+
stream_slice=stream_slice,
|
114
|
+
next_page_token=next_page_token,
|
110
115
|
)
|
111
116
|
else:
|
112
117
|
yield from records
|
@@ -119,5 +124,7 @@ class RecordSelector(HttpSelector):
|
|
119
124
|
) -> Iterable[Mapping[str, Any]]:
|
120
125
|
for record in records:
|
121
126
|
for transformation in self.transformations:
|
122
|
-
transformation.transform(
|
127
|
+
transformation.transform(
|
128
|
+
record, config=self.config, stream_state=stream_state, stream_slice=stream_slice
|
129
|
+
) # type: ignore # record has type Mapping[str, Any], but Dict[str, Any] expected
|
123
130
|
yield record
|
@@ -68,7 +68,9 @@ class ResponseToFileExtractor(RecordExtractor):
|
|
68
68
|
|
69
69
|
res = b.replace(b"\x00", b"")
|
70
70
|
if len(res) < len(b):
|
71
|
-
self.logger.warning(
|
71
|
+
self.logger.warning(
|
72
|
+
"Filter 'null' bytes from string, size reduced %d -> %d chars", len(b), len(res)
|
73
|
+
)
|
72
74
|
return res
|
73
75
|
|
74
76
|
def _save_to_file(self, response: requests.Response) -> Tuple[str, str]:
|
@@ -106,9 +108,13 @@ class ResponseToFileExtractor(RecordExtractor):
|
|
106
108
|
if os.path.isfile(tmp_file):
|
107
109
|
return tmp_file, response_encoding
|
108
110
|
else:
|
109
|
-
raise ValueError(
|
111
|
+
raise ValueError(
|
112
|
+
f"The IO/Error occured while verifying binary data. Tmp file {tmp_file} doesn't exist."
|
113
|
+
)
|
110
114
|
|
111
|
-
def _read_with_chunks(
|
115
|
+
def _read_with_chunks(
|
116
|
+
self, path: str, file_encoding: str, chunk_size: int = 100
|
117
|
+
) -> Iterable[Mapping[str, Any]]:
|
112
118
|
"""
|
113
119
|
Reads data from a file in chunks and yields each row as a dictionary.
|
114
120
|
|
@@ -126,7 +132,9 @@ class ResponseToFileExtractor(RecordExtractor):
|
|
126
132
|
|
127
133
|
try:
|
128
134
|
with open(path, "r", encoding=file_encoding) as data:
|
129
|
-
chunks = pd.read_csv(
|
135
|
+
chunks = pd.read_csv(
|
136
|
+
data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
|
137
|
+
)
|
130
138
|
for chunk in chunks:
|
131
139
|
chunk = chunk.replace({nan: None}).to_dict(orient="records")
|
132
140
|
for row in chunk:
|
@@ -140,7 +148,9 @@ class ResponseToFileExtractor(RecordExtractor):
|
|
140
148
|
# remove binary tmp file, after data is read
|
141
149
|
os.remove(path)
|
142
150
|
|
143
|
-
def extract_records(
|
151
|
+
def extract_records(
|
152
|
+
self, response: Optional[requests.Response] = None
|
153
|
+
) -> Iterable[Mapping[str, Any]]:
|
144
154
|
"""
|
145
155
|
Extracts records from the given response by:
|
146
156
|
1) Saving the result to a tmp file
|
@@ -13,7 +13,10 @@ from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDate
|
|
13
13
|
from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
|
14
14
|
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
|
15
15
|
from airbyte_cdk.sources.declarative.interpolation.jinja import JinjaInterpolation
|
16
|
-
from airbyte_cdk.sources.declarative.requesters.request_option import
|
16
|
+
from airbyte_cdk.sources.declarative.requesters.request_option import (
|
17
|
+
RequestOption,
|
18
|
+
RequestOptionType,
|
19
|
+
)
|
17
20
|
from airbyte_cdk.sources.message import MessageRepository
|
18
21
|
from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
|
19
22
|
from isodate import Duration, duration_isoformat, parse_duration
|
@@ -72,27 +75,41 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
72
75
|
cursor_datetime_formats: List[str] = field(default_factory=lambda: [])
|
73
76
|
|
74
77
|
def __post_init__(self, parameters: Mapping[str, Any]) -> None:
|
75
|
-
if (self.step and not self.cursor_granularity) or (
|
78
|
+
if (self.step and not self.cursor_granularity) or (
|
79
|
+
not self.step and self.cursor_granularity
|
80
|
+
):
|
76
81
|
raise ValueError(
|
77
82
|
f"If step is defined, cursor_granularity should be as well and vice-versa. "
|
78
83
|
f"Right now, step is `{self.step}` and cursor_granularity is `{self.cursor_granularity}`"
|
79
84
|
)
|
80
85
|
self._start_datetime = MinMaxDatetime.create(self.start_datetime, parameters)
|
81
|
-
self._end_datetime =
|
86
|
+
self._end_datetime = (
|
87
|
+
None if not self.end_datetime else MinMaxDatetime.create(self.end_datetime, parameters)
|
88
|
+
)
|
82
89
|
|
83
90
|
self._timezone = datetime.timezone.utc
|
84
91
|
self._interpolation = JinjaInterpolation()
|
85
92
|
|
86
93
|
self._step = (
|
87
|
-
self._parse_timedelta(
|
94
|
+
self._parse_timedelta(
|
95
|
+
InterpolatedString.create(self.step, parameters=parameters).eval(self.config)
|
96
|
+
)
|
88
97
|
if self.step
|
89
98
|
else datetime.timedelta.max
|
90
99
|
)
|
91
100
|
self._cursor_granularity = self._parse_timedelta(self.cursor_granularity)
|
92
101
|
self.cursor_field = InterpolatedString.create(self.cursor_field, parameters=parameters)
|
93
|
-
self._lookback_window =
|
94
|
-
|
95
|
-
|
102
|
+
self._lookback_window = (
|
103
|
+
InterpolatedString.create(self.lookback_window, parameters=parameters)
|
104
|
+
if self.lookback_window
|
105
|
+
else None
|
106
|
+
)
|
107
|
+
self._partition_field_start = InterpolatedString.create(
|
108
|
+
self.partition_field_start or "start_time", parameters=parameters
|
109
|
+
)
|
110
|
+
self._partition_field_end = InterpolatedString.create(
|
111
|
+
self.partition_field_end or "end_time", parameters=parameters
|
112
|
+
)
|
96
113
|
self._parser = DatetimeParser()
|
97
114
|
|
98
115
|
# If datetime format is not specified then start/end datetime should inherit it from the stream slicer
|
@@ -114,7 +131,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
114
131
|
|
115
132
|
:param stream_state: The state of the stream as returned by get_stream_state
|
116
133
|
"""
|
117
|
-
self._cursor =
|
134
|
+
self._cursor = (
|
135
|
+
stream_state.get(self.cursor_field.eval(self.config)) if stream_state else None
|
136
|
+
) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
118
137
|
|
119
138
|
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
120
139
|
"""
|
@@ -131,28 +150,38 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
131
150
|
|
132
151
|
start_field = self._partition_field_start.eval(self.config)
|
133
152
|
end_field = self._partition_field_end.eval(self.config)
|
134
|
-
is_highest_observed_cursor_value =
|
135
|
-
|
136
|
-
|
153
|
+
is_highest_observed_cursor_value = (
|
154
|
+
not self._highest_observed_cursor_field_value
|
155
|
+
or self.parse_date(record_cursor_value)
|
156
|
+
> self.parse_date(self._highest_observed_cursor_field_value)
|
157
|
+
)
|
137
158
|
if (
|
138
|
-
self._is_within_daterange_boundaries(
|
159
|
+
self._is_within_daterange_boundaries(
|
160
|
+
record, stream_slice.get(start_field), stream_slice.get(end_field)
|
161
|
+
) # type: ignore # we know that stream_slices for these cursors will use a string representing an unparsed date
|
139
162
|
and is_highest_observed_cursor_value
|
140
163
|
):
|
141
164
|
self._highest_observed_cursor_field_value = record_cursor_value
|
142
165
|
|
143
166
|
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
144
167
|
if stream_slice.partition:
|
145
|
-
raise ValueError(
|
168
|
+
raise ValueError(
|
169
|
+
f"Stream slice {stream_slice} should not have a partition. Got {stream_slice.partition}."
|
170
|
+
)
|
146
171
|
cursor_value_str_by_cursor_value_datetime = dict(
|
147
172
|
map(
|
148
173
|
# we need to ensure the cursor value is preserved as is in the state else the CATs might complain of something like
|
149
174
|
# 2023-01-04T17:30:19.000Z' <= '2023-01-04T17:30:19.000000Z'
|
150
175
|
lambda datetime_str: (self.parse_date(datetime_str), datetime_str), # type: ignore # because of the filter on the next line, this will only be called with a str
|
151
|
-
filter(
|
176
|
+
filter(
|
177
|
+
lambda item: item, [self._cursor, self._highest_observed_cursor_field_value]
|
178
|
+
),
|
152
179
|
)
|
153
180
|
)
|
154
181
|
self._cursor = (
|
155
|
-
cursor_value_str_by_cursor_value_datetime[
|
182
|
+
cursor_value_str_by_cursor_value_datetime[
|
183
|
+
max(cursor_value_str_by_cursor_value_datetime.keys())
|
184
|
+
]
|
156
185
|
if cursor_value_str_by_cursor_value_datetime
|
157
186
|
else None
|
158
187
|
)
|
@@ -175,11 +204,19 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
175
204
|
# through each slice and does not belong to a specific slice. We just return stream state as it is.
|
176
205
|
return self.get_stream_state()
|
177
206
|
|
178
|
-
def _calculate_earliest_possible_value(
|
179
|
-
|
180
|
-
|
207
|
+
def _calculate_earliest_possible_value(
|
208
|
+
self, end_datetime: datetime.datetime
|
209
|
+
) -> datetime.datetime:
|
210
|
+
lookback_delta = self._parse_timedelta(
|
211
|
+
self._lookback_window.eval(self.config) if self._lookback_window else "P0D"
|
212
|
+
)
|
213
|
+
earliest_possible_start_datetime = min(
|
214
|
+
self._start_datetime.get_datetime(self.config), end_datetime
|
215
|
+
)
|
181
216
|
try:
|
182
|
-
cursor_datetime =
|
217
|
+
cursor_datetime = (
|
218
|
+
self._calculate_cursor_datetime_from_state(self.get_stream_state()) - lookback_delta
|
219
|
+
)
|
183
220
|
except OverflowError:
|
184
221
|
# cursor_datetime defers to the minimum date if it does not exist in the state. Trying to subtract
|
185
222
|
# a timedelta from the minimum datetime results in an OverflowError
|
@@ -200,7 +237,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
200
237
|
return now
|
201
238
|
return min(self._end_datetime.get_datetime(self.config), now)
|
202
239
|
|
203
|
-
def _calculate_cursor_datetime_from_state(
|
240
|
+
def _calculate_cursor_datetime_from_state(
|
241
|
+
self, stream_state: Mapping[str, Any]
|
242
|
+
) -> datetime.datetime:
|
204
243
|
if self.cursor_field.eval(self.config, stream_state=stream_state) in stream_state: # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
205
244
|
return self.parse_date(stream_state[self.cursor_field.eval(self.config)]) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
206
245
|
return datetime.datetime.min.replace(tzinfo=datetime.timezone.utc)
|
@@ -209,7 +248,10 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
209
248
|
return self._parser.format(dt, self.datetime_format)
|
210
249
|
|
211
250
|
def _partition_daterange(
|
212
|
-
self,
|
251
|
+
self,
|
252
|
+
start: datetime.datetime,
|
253
|
+
end: datetime.datetime,
|
254
|
+
step: Union[datetime.timedelta, Duration],
|
213
255
|
) -> List[StreamSlice]:
|
214
256
|
start_field = self._partition_field_start.eval(self.config)
|
215
257
|
end_field = self._partition_field_end.eval(self.config)
|
@@ -220,7 +262,11 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
220
262
|
end_date = self._get_date(next_start - self._cursor_granularity, end, min)
|
221
263
|
dates.append(
|
222
264
|
StreamSlice(
|
223
|
-
partition={},
|
265
|
+
partition={},
|
266
|
+
cursor_slice={
|
267
|
+
start_field: self._format_datetime(start),
|
268
|
+
end_field: self._format_datetime(end_date),
|
269
|
+
},
|
224
270
|
)
|
225
271
|
)
|
226
272
|
start = next_start
|
@@ -231,7 +277,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
231
277
|
return start < end
|
232
278
|
return start <= end
|
233
279
|
|
234
|
-
def _evaluate_next_start_date_safely(
|
280
|
+
def _evaluate_next_start_date_safely(
|
281
|
+
self, start: datetime.datetime, step: datetime.timedelta
|
282
|
+
) -> datetime.datetime:
|
235
283
|
"""
|
236
284
|
Given that we set the default step at datetime.timedelta.max, we will generate an OverflowError when evaluating the next start_date
|
237
285
|
This method assumes that users would never enter a step that would generate an overflow. Given that would be the case, the code
|
@@ -308,7 +356,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
308
356
|
# Never update kwargs
|
309
357
|
return {}
|
310
358
|
|
311
|
-
def _get_request_options(
|
359
|
+
def _get_request_options(
|
360
|
+
self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]
|
361
|
+
) -> Mapping[str, Any]:
|
312
362
|
options: MutableMapping[str, Any] = {}
|
313
363
|
if not stream_slice:
|
314
364
|
return options
|
@@ -317,7 +367,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
317
367
|
self._partition_field_start.eval(self.config)
|
318
368
|
)
|
319
369
|
if self.end_time_option and self.end_time_option.inject_into == option_type:
|
320
|
-
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
|
370
|
+
options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(
|
371
|
+
self._partition_field_end.eval(self.config)
|
372
|
+
) # type: ignore # field_name is always casted to an interpolated string
|
321
373
|
return options
|
322
374
|
|
323
375
|
def should_be_synced(self, record: Record) -> bool:
|
@@ -330,11 +382,18 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
330
382
|
)
|
331
383
|
return True
|
332
384
|
latest_possible_cursor_value = self.select_best_end_datetime()
|
333
|
-
earliest_possible_cursor_value = self._calculate_earliest_possible_value(
|
334
|
-
|
385
|
+
earliest_possible_cursor_value = self._calculate_earliest_possible_value(
|
386
|
+
latest_possible_cursor_value
|
387
|
+
)
|
388
|
+
return self._is_within_daterange_boundaries(
|
389
|
+
record, earliest_possible_cursor_value, latest_possible_cursor_value
|
390
|
+
)
|
335
391
|
|
336
392
|
def _is_within_daterange_boundaries(
|
337
|
-
self,
|
393
|
+
self,
|
394
|
+
record: Record,
|
395
|
+
start_datetime_boundary: Union[datetime.datetime, str],
|
396
|
+
end_datetime_boundary: Union[datetime.datetime, str],
|
338
397
|
) -> bool:
|
339
398
|
cursor_field = self.cursor_field.eval(self.config) # type: ignore # cursor_field is converted to an InterpolatedString in __post_init__
|
340
399
|
record_cursor_value = record.get(cursor_field)
|
@@ -348,7 +407,9 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
348
407
|
start_datetime_boundary = self.parse_date(start_datetime_boundary)
|
349
408
|
if isinstance(end_datetime_boundary, str):
|
350
409
|
end_datetime_boundary = self.parse_date(end_datetime_boundary)
|
351
|
-
return
|
410
|
+
return (
|
411
|
+
start_datetime_boundary <= self.parse_date(record_cursor_value) <= end_datetime_boundary
|
412
|
+
)
|
352
413
|
|
353
414
|
def _send_log(self, level: Level, message: str) -> None:
|
354
415
|
if self.message_repository:
|
@@ -378,8 +439,12 @@ class DatetimeBasedCursor(DeclarativeCursor):
|
|
378
439
|
:param lookback_window_in_seconds: The lookback duration in seconds to potentially update to.
|
379
440
|
"""
|
380
441
|
runtime_lookback_window = duration_isoformat(timedelta(seconds=lookback_window_in_seconds))
|
381
|
-
config_lookback = parse_duration(
|
442
|
+
config_lookback = parse_duration(
|
443
|
+
self._lookback_window.eval(self.config) if self._lookback_window else "P0D"
|
444
|
+
)
|
382
445
|
|
383
446
|
# Check if the new runtime lookback window is greater than the current config lookback
|
384
447
|
if parse_duration(runtime_lookback_window) > config_lookback:
|
385
|
-
self._lookback_window = InterpolatedString.create(
|
448
|
+
self._lookback_window = InterpolatedString.create(
|
449
|
+
runtime_lookback_window, parameters={}
|
450
|
+
)
|
@@ -84,7 +84,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
84
84
|
self._partition_router = partition_router
|
85
85
|
self._timer = Timer()
|
86
86
|
self._lock = threading.Lock()
|
87
|
-
self._slice_semaphore = threading.Semaphore(
|
87
|
+
self._slice_semaphore = threading.Semaphore(
|
88
|
+
0
|
89
|
+
) # Start with 0, indicating no slices being tracked
|
88
90
|
self._all_slices_yielded = False
|
89
91
|
self._lookback_window: Optional[int] = None
|
90
92
|
self._current_partition: Optional[Mapping[str, Any]] = None
|
@@ -116,7 +118,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
116
118
|
)
|
117
119
|
|
118
120
|
self.start_slices_generation()
|
119
|
-
for slice, last, state in iterate_with_last_flag_and_state(
|
121
|
+
for slice, last, state in iterate_with_last_flag_and_state(
|
122
|
+
slice_generator, self._partition_router.get_stream_state
|
123
|
+
):
|
120
124
|
self._parent_state = state
|
121
125
|
self.register_slice(last)
|
122
126
|
yield slice
|
@@ -124,7 +128,8 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
124
128
|
|
125
129
|
def generate_slices_from_partition(self, partition: StreamSlice) -> Iterable[StreamSlice]:
|
126
130
|
slice_generator = (
|
127
|
-
StreamSlice(partition=partition, cursor_slice=cursor_slice)
|
131
|
+
StreamSlice(partition=partition, cursor_slice=cursor_slice)
|
132
|
+
for cursor_slice in self._stream_cursor.stream_slices()
|
128
133
|
)
|
129
134
|
|
130
135
|
yield from slice_generator
|
@@ -199,10 +204,14 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
199
204
|
if hasattr(self._stream_cursor, "set_runtime_lookback_window"):
|
200
205
|
self._stream_cursor.set_runtime_lookback_window(lookback_window)
|
201
206
|
else:
|
202
|
-
raise ValueError(
|
207
|
+
raise ValueError(
|
208
|
+
"The cursor class for Global Substream Cursor does not have a set_runtime_lookback_window method"
|
209
|
+
)
|
203
210
|
|
204
211
|
def observe(self, stream_slice: StreamSlice, record: Record) -> None:
|
205
|
-
self._stream_cursor.observe(
|
212
|
+
self._stream_cursor.observe(
|
213
|
+
StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), record
|
214
|
+
)
|
206
215
|
|
207
216
|
def close_slice(self, stream_slice: StreamSlice, *args: Any) -> None:
|
208
217
|
"""
|
@@ -220,7 +229,9 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
220
229
|
self._slice_semaphore.acquire()
|
221
230
|
if self._all_slices_yielded and self._slice_semaphore._value == 0:
|
222
231
|
self._lookback_window = self._timer.finish()
|
223
|
-
self._stream_cursor.close_slice(
|
232
|
+
self._stream_cursor.close_slice(
|
233
|
+
StreamSlice(partition={}, cursor_slice=stream_slice.cursor_slice), *args
|
234
|
+
)
|
224
235
|
|
225
236
|
def get_stream_state(self) -> StreamState:
|
226
237
|
state: dict[str, Any] = {"state": self._stream_cursor.get_stream_state()}
|
@@ -322,12 +333,15 @@ class GlobalSubstreamCursor(DeclarativeCursor):
|
|
322
333
|
|
323
334
|
def is_greater_than_or_equal(self, first: Record, second: Record) -> bool:
|
324
335
|
return self._stream_cursor.is_greater_than_or_equal(
|
325
|
-
self._convert_record_to_cursor_record(first),
|
336
|
+
self._convert_record_to_cursor_record(first),
|
337
|
+
self._convert_record_to_cursor_record(second),
|
326
338
|
)
|
327
339
|
|
328
340
|
@staticmethod
|
329
341
|
def _convert_record_to_cursor_record(record: Record) -> Record:
|
330
342
|
return Record(
|
331
343
|
record.data,
|
332
|
-
StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
|
344
|
+
StreamSlice(partition={}, cursor_slice=record.associated_slice.cursor_slice)
|
345
|
+
if record.associated_slice
|
346
|
+
else None,
|
333
347
|
)
|