airbyte-cdk 0.68.4__py3-none-any.whl → 0.69.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +27 -7
- airbyte_cdk/sources/connector_state_manager.py +0 -1
- airbyte_cdk/sources/file_based/file_based_source.py +4 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/{file_based_noop_cursor.py → file_based_final_state_cursor.py} +21 -6
- airbyte_cdk/sources/streams/concurrent/adapters.py +2 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +27 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +7 -3
- airbyte_cdk/test/entrypoint_wrapper.py +1 -1
- airbyte_cdk/utils/message_utils.py +17 -0
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/RECORD +30 -28
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -2
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +128 -37
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +3 -3
- unit_tests/sources/file_based/test_file_based_scenarios.py +13 -6
- unit_tests/sources/file_based/test_scenarios.py +32 -3
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +16 -14
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +5 -4
- unit_tests/sources/streams/concurrent/test_default_stream.py +8 -6
- unit_tests/sources/streams/test_stream_read.py +3 -2
- unit_tests/sources/test_concurrent_source.py +7 -5
- unit_tests/sources/test_source_read.py +2 -3
- unit_tests/test/test_entrypoint_wrapper.py +9 -6
- unit_tests/utils/test_message_utils.py +91 -0
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
|
|
12
12
|
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability, StreamAvailable, StreamUnavailable
|
15
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor,
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
|
16
16
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
17
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
18
18
|
from airbyte_protocol.models import AirbyteStream
|
@@ -42,10 +42,11 @@ MESSAGE_FROM_REPOSITORY = Mock()
|
|
42
42
|
|
43
43
|
|
44
44
|
class _MockStream(AbstractStream):
|
45
|
-
def __init__(self, name: str, available: bool = True, json_schema: Dict[str, Any] = {}):
|
45
|
+
def __init__(self, name: str, message_repository: MessageRepository, available: bool = True, json_schema: Dict[str, Any] = {}):
|
46
46
|
self._name = name
|
47
47
|
self._available = available
|
48
48
|
self._json_schema = json_schema
|
49
|
+
self._message_repository = message_repository
|
49
50
|
|
50
51
|
def generate_partitions(self) -> Iterable[Partition]:
|
51
52
|
yield _MockPartition(self._name)
|
@@ -75,7 +76,7 @@ class _MockStream(AbstractStream):
|
|
75
76
|
|
76
77
|
@property
|
77
78
|
def cursor(self) -> Cursor:
|
78
|
-
return
|
79
|
+
return FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository)
|
79
80
|
|
80
81
|
|
81
82
|
class _MockPartition(Partition):
|
@@ -103,8 +104,9 @@ class _MockPartition(Partition):
|
|
103
104
|
|
104
105
|
|
105
106
|
def test_concurrent_source_reading_from_no_streams():
|
106
|
-
|
107
|
-
|
107
|
+
message_repository = InMemoryMessageRepository()
|
108
|
+
stream = _MockStream("my_stream", message_repository,False, {})
|
109
|
+
source = _MockSource(message_repository=message_repository)
|
108
110
|
messages = []
|
109
111
|
for m in source.read([stream]):
|
110
112
|
messages.append(m)
|
@@ -27,7 +27,7 @@ from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import Conc
|
|
27
27
|
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
28
28
|
from airbyte_cdk.sources.streams import Stream
|
29
29
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
30
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import
|
30
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
31
31
|
from airbyte_cdk.sources.streams.core import StreamData
|
32
32
|
from airbyte_cdk.utils import AirbyteTracedException
|
33
33
|
from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import NeverLogSliceLogger
|
@@ -409,9 +409,8 @@ def _init_sources(stream_slice_to_partitions, state, logger):
|
|
409
409
|
|
410
410
|
|
411
411
|
def _init_source(stream_slice_to_partitions, state, logger, source):
|
412
|
-
cursor = NoopCursor()
|
413
412
|
streams = [
|
414
|
-
StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state,
|
413
|
+
StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state, FinalStateCursor(stream_name=f"stream{i}", stream_namespace=None, message_repository=InMemoryMessageRepository()))
|
415
414
|
for i, stream_slices in enumerate(stream_slice_to_partitions)
|
416
415
|
]
|
417
416
|
source.set_streams(streams)
|
@@ -3,7 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import logging
|
5
5
|
import os
|
6
|
-
from typing import Any, Iterator, List
|
6
|
+
from typing import Any, Iterator, List, Mapping
|
7
7
|
from unittest import TestCase
|
8
8
|
from unittest.mock import Mock, patch
|
9
9
|
|
@@ -16,7 +16,9 @@ from airbyte_protocol.models import (
|
|
16
16
|
AirbyteLogMessage,
|
17
17
|
AirbyteMessage,
|
18
18
|
AirbyteRecordMessage,
|
19
|
+
AirbyteStateBlob,
|
19
20
|
AirbyteStateMessage,
|
21
|
+
AirbyteStreamState,
|
20
22
|
AirbyteStreamStatus,
|
21
23
|
AirbyteStreamStatusTraceMessage,
|
22
24
|
AirbyteTraceMessage,
|
@@ -28,8 +30,8 @@ from airbyte_protocol.models import (
|
|
28
30
|
)
|
29
31
|
|
30
32
|
|
31
|
-
def _a_state_message(
|
32
|
-
return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(
|
33
|
+
def _a_state_message(stream_name: str, stream_state: Mapping[str, Any]) -> AirbyteMessage:
|
34
|
+
return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(stream=AirbyteStreamState(stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob(**stream_state))))
|
33
35
|
|
34
36
|
|
35
37
|
def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteMessage:
|
@@ -49,7 +51,7 @@ def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteM
|
|
49
51
|
_A_RECORD = AirbyteMessage(
|
50
52
|
type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data={"record key": "record value"}, emitted_at=0)
|
51
53
|
)
|
52
|
-
_A_STATE_MESSAGE = _a_state_message({"state key": "state value for _A_STATE_MESSAGE"})
|
54
|
+
_A_STATE_MESSAGE = _a_state_message("stream_name", {"state key": "state value for _A_STATE_MESSAGE"})
|
53
55
|
_A_LOG = AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="This is an Airbyte log message"))
|
54
56
|
_AN_ERROR_MESSAGE = AirbyteMessage(
|
55
57
|
type=Type.TRACE,
|
@@ -176,8 +178,9 @@ class EntrypointWrapperTest(TestCase):
|
|
176
178
|
|
177
179
|
@patch("airbyte_cdk.test.entrypoint_wrapper.AirbyteEntrypoint")
|
178
180
|
def test_given_many_state_messages_and_records_when_read_then_output_has_records_and_state_message(self, entrypoint):
|
179
|
-
|
180
|
-
|
181
|
+
state_value = {"state_key": "last state value"}
|
182
|
+
last_emitted_state = AirbyteStreamState(stream_descriptor=StreamDescriptor(name="stream_name"), stream_state=AirbyteStateBlob(**state_value))
|
183
|
+
entrypoint.return_value.run.return_value = _to_entrypoint_output([_A_STATE_MESSAGE, _a_state_message("stream_name", state_value)])
|
181
184
|
|
182
185
|
output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE)
|
183
186
|
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
|
5
|
+
from airbyte_cdk.utils.message_utils import get_stream_descriptor
|
6
|
+
from airbyte_protocol.models import (
|
7
|
+
AirbyteControlConnectorConfigMessage,
|
8
|
+
AirbyteControlMessage,
|
9
|
+
AirbyteMessage,
|
10
|
+
AirbyteRecordMessage,
|
11
|
+
AirbyteStateBlob,
|
12
|
+
AirbyteStateMessage,
|
13
|
+
AirbyteStateStats,
|
14
|
+
AirbyteStateType,
|
15
|
+
AirbyteStreamState,
|
16
|
+
OrchestratorType,
|
17
|
+
StreamDescriptor,
|
18
|
+
Type,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
def test_get_record_message_stream_descriptor():
|
23
|
+
message = AirbyteMessage(
|
24
|
+
type=Type.RECORD,
|
25
|
+
record=AirbyteRecordMessage(
|
26
|
+
stream="test_stream",
|
27
|
+
namespace="test_namespace",
|
28
|
+
data={"id": "12345"},
|
29
|
+
emitted_at=1,
|
30
|
+
),
|
31
|
+
)
|
32
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
|
33
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
34
|
+
|
35
|
+
|
36
|
+
def test_get_record_message_stream_descriptor_no_namespace():
|
37
|
+
message = AirbyteMessage(
|
38
|
+
type=Type.RECORD,
|
39
|
+
record=AirbyteRecordMessage(
|
40
|
+
stream="test_stream", data={"id": "12345"}, emitted_at=1
|
41
|
+
),
|
42
|
+
)
|
43
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
|
44
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
45
|
+
|
46
|
+
|
47
|
+
def test_get_state_message_stream_descriptor():
|
48
|
+
message = AirbyteMessage(
|
49
|
+
type=Type.STATE,
|
50
|
+
state=AirbyteStateMessage(
|
51
|
+
type=AirbyteStateType.STREAM,
|
52
|
+
stream=AirbyteStreamState(
|
53
|
+
stream_descriptor=StreamDescriptor(
|
54
|
+
name="test_stream", namespace="test_namespace"
|
55
|
+
),
|
56
|
+
stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
|
57
|
+
),
|
58
|
+
sourceStats=AirbyteStateStats(recordCount=27.0),
|
59
|
+
),
|
60
|
+
)
|
61
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
|
62
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
63
|
+
|
64
|
+
|
65
|
+
def test_get_state_message_stream_descriptor_no_namespace():
|
66
|
+
message = AirbyteMessage(
|
67
|
+
type=Type.STATE,
|
68
|
+
state=AirbyteStateMessage(
|
69
|
+
type=AirbyteStateType.STREAM,
|
70
|
+
stream=AirbyteStreamState(
|
71
|
+
stream_descriptor=StreamDescriptor(name="test_stream"),
|
72
|
+
stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
|
73
|
+
),
|
74
|
+
sourceStats=AirbyteStateStats(recordCount=27.0),
|
75
|
+
),
|
76
|
+
)
|
77
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
|
78
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
79
|
+
|
80
|
+
|
81
|
+
def test_get_other_message_stream_descriptor_fails():
|
82
|
+
message = AirbyteMessage(
|
83
|
+
type=Type.CONTROL,
|
84
|
+
control=AirbyteControlMessage(
|
85
|
+
type=OrchestratorType.CONNECTOR_CONFIG,
|
86
|
+
emitted_at=10,
|
87
|
+
connectorConfig=AirbyteControlConnectorConfigMessage(config={"any config": "a config value"}),
|
88
|
+
),
|
89
|
+
)
|
90
|
+
with pytest.raises(NotImplementedError):
|
91
|
+
get_stream_descriptor(message)
|
File without changes
|
File without changes
|