airbyte-cdk 0.68.4__py3-none-any.whl → 0.69.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/entrypoint.py +27 -7
- airbyte_cdk/sources/connector_state_manager.py +0 -1
- airbyte_cdk/sources/file_based/file_based_source.py +4 -2
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +2 -2
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/{file_based_noop_cursor.py → file_based_final_state_cursor.py} +21 -6
- airbyte_cdk/sources/streams/concurrent/adapters.py +2 -2
- airbyte_cdk/sources/streams/concurrent/cursor.py +27 -3
- airbyte_cdk/sources/streams/concurrent/default_stream.py +7 -3
- airbyte_cdk/test/entrypoint_wrapper.py +1 -1
- airbyte_cdk/utils/message_utils.py +17 -0
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/RECORD +30 -28
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/WHEEL +1 -1
- unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -2
- unit_tests/sources/file_based/scenarios/csv_scenarios.py +128 -37
- unit_tests/sources/file_based/stream/concurrent/test_adapters.py +3 -3
- unit_tests/sources/file_based/test_file_based_scenarios.py +13 -6
- unit_tests/sources/file_based/test_scenarios.py +32 -3
- unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +16 -14
- unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +5 -4
- unit_tests/sources/streams/concurrent/test_default_stream.py +8 -6
- unit_tests/sources/streams/test_stream_read.py +3 -2
- unit_tests/sources/test_concurrent_source.py +7 -5
- unit_tests/sources/test_source_read.py +2 -3
- unit_tests/test/test_entrypoint_wrapper.py +9 -6
- unit_tests/utils/test_message_utils.py +91 -0
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
|
|
12
12
|
from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
|
13
13
|
from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
|
14
14
|
from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability, StreamAvailable, StreamUnavailable
|
15
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor,
|
15
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
|
16
16
|
from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
|
17
17
|
from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
|
18
18
|
from airbyte_protocol.models import AirbyteStream
|
@@ -42,10 +42,11 @@ MESSAGE_FROM_REPOSITORY = Mock()
|
|
42
42
|
|
43
43
|
|
44
44
|
class _MockStream(AbstractStream):
|
45
|
-
def __init__(self, name: str, available: bool = True, json_schema: Dict[str, Any] = {}):
|
45
|
+
def __init__(self, name: str, message_repository: MessageRepository, available: bool = True, json_schema: Dict[str, Any] = {}):
|
46
46
|
self._name = name
|
47
47
|
self._available = available
|
48
48
|
self._json_schema = json_schema
|
49
|
+
self._message_repository = message_repository
|
49
50
|
|
50
51
|
def generate_partitions(self) -> Iterable[Partition]:
|
51
52
|
yield _MockPartition(self._name)
|
@@ -75,7 +76,7 @@ class _MockStream(AbstractStream):
|
|
75
76
|
|
76
77
|
@property
|
77
78
|
def cursor(self) -> Cursor:
|
78
|
-
return
|
79
|
+
return FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository)
|
79
80
|
|
80
81
|
|
81
82
|
class _MockPartition(Partition):
|
@@ -103,8 +104,9 @@ class _MockPartition(Partition):
|
|
103
104
|
|
104
105
|
|
105
106
|
def test_concurrent_source_reading_from_no_streams():
|
106
|
-
|
107
|
-
|
107
|
+
message_repository = InMemoryMessageRepository()
|
108
|
+
stream = _MockStream("my_stream", message_repository,False, {})
|
109
|
+
source = _MockSource(message_repository=message_repository)
|
108
110
|
messages = []
|
109
111
|
for m in source.read([stream]):
|
110
112
|
messages.append(m)
|
@@ -27,7 +27,7 @@ from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import Conc
|
|
27
27
|
from airbyte_cdk.sources.message import InMemoryMessageRepository
|
28
28
|
from airbyte_cdk.sources.streams import Stream
|
29
29
|
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
|
30
|
-
from airbyte_cdk.sources.streams.concurrent.cursor import
|
30
|
+
from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
|
31
31
|
from airbyte_cdk.sources.streams.core import StreamData
|
32
32
|
from airbyte_cdk.utils import AirbyteTracedException
|
33
33
|
from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import NeverLogSliceLogger
|
@@ -409,9 +409,8 @@ def _init_sources(stream_slice_to_partitions, state, logger):
|
|
409
409
|
|
410
410
|
|
411
411
|
def _init_source(stream_slice_to_partitions, state, logger, source):
|
412
|
-
cursor = NoopCursor()
|
413
412
|
streams = [
|
414
|
-
StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state,
|
413
|
+
StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state, FinalStateCursor(stream_name=f"stream{i}", stream_namespace=None, message_repository=InMemoryMessageRepository()))
|
415
414
|
for i, stream_slices in enumerate(stream_slice_to_partitions)
|
416
415
|
]
|
417
416
|
source.set_streams(streams)
|
@@ -3,7 +3,7 @@
|
|
3
3
|
import json
|
4
4
|
import logging
|
5
5
|
import os
|
6
|
-
from typing import Any, Iterator, List
|
6
|
+
from typing import Any, Iterator, List, Mapping
|
7
7
|
from unittest import TestCase
|
8
8
|
from unittest.mock import Mock, patch
|
9
9
|
|
@@ -16,7 +16,9 @@ from airbyte_protocol.models import (
|
|
16
16
|
AirbyteLogMessage,
|
17
17
|
AirbyteMessage,
|
18
18
|
AirbyteRecordMessage,
|
19
|
+
AirbyteStateBlob,
|
19
20
|
AirbyteStateMessage,
|
21
|
+
AirbyteStreamState,
|
20
22
|
AirbyteStreamStatus,
|
21
23
|
AirbyteStreamStatusTraceMessage,
|
22
24
|
AirbyteTraceMessage,
|
@@ -28,8 +30,8 @@ from airbyte_protocol.models import (
|
|
28
30
|
)
|
29
31
|
|
30
32
|
|
31
|
-
def _a_state_message(
|
32
|
-
return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(
|
33
|
+
def _a_state_message(stream_name: str, stream_state: Mapping[str, Any]) -> AirbyteMessage:
|
34
|
+
return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(stream=AirbyteStreamState(stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob(**stream_state))))
|
33
35
|
|
34
36
|
|
35
37
|
def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteMessage:
|
@@ -49,7 +51,7 @@ def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteM
|
|
49
51
|
_A_RECORD = AirbyteMessage(
|
50
52
|
type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data={"record key": "record value"}, emitted_at=0)
|
51
53
|
)
|
52
|
-
_A_STATE_MESSAGE = _a_state_message({"state key": "state value for _A_STATE_MESSAGE"})
|
54
|
+
_A_STATE_MESSAGE = _a_state_message("stream_name", {"state key": "state value for _A_STATE_MESSAGE"})
|
53
55
|
_A_LOG = AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="This is an Airbyte log message"))
|
54
56
|
_AN_ERROR_MESSAGE = AirbyteMessage(
|
55
57
|
type=Type.TRACE,
|
@@ -176,8 +178,9 @@ class EntrypointWrapperTest(TestCase):
|
|
176
178
|
|
177
179
|
@patch("airbyte_cdk.test.entrypoint_wrapper.AirbyteEntrypoint")
|
178
180
|
def test_given_many_state_messages_and_records_when_read_then_output_has_records_and_state_message(self, entrypoint):
|
179
|
-
|
180
|
-
|
181
|
+
state_value = {"state_key": "last state value"}
|
182
|
+
last_emitted_state = AirbyteStreamState(stream_descriptor=StreamDescriptor(name="stream_name"), stream_state=AirbyteStateBlob(**state_value))
|
183
|
+
entrypoint.return_value.run.return_value = _to_entrypoint_output([_A_STATE_MESSAGE, _a_state_message("stream_name", state_value)])
|
181
184
|
|
182
185
|
output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE)
|
183
186
|
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
|
2
|
+
|
3
|
+
import pytest
|
4
|
+
from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
|
5
|
+
from airbyte_cdk.utils.message_utils import get_stream_descriptor
|
6
|
+
from airbyte_protocol.models import (
|
7
|
+
AirbyteControlConnectorConfigMessage,
|
8
|
+
AirbyteControlMessage,
|
9
|
+
AirbyteMessage,
|
10
|
+
AirbyteRecordMessage,
|
11
|
+
AirbyteStateBlob,
|
12
|
+
AirbyteStateMessage,
|
13
|
+
AirbyteStateStats,
|
14
|
+
AirbyteStateType,
|
15
|
+
AirbyteStreamState,
|
16
|
+
OrchestratorType,
|
17
|
+
StreamDescriptor,
|
18
|
+
Type,
|
19
|
+
)
|
20
|
+
|
21
|
+
|
22
|
+
def test_get_record_message_stream_descriptor():
|
23
|
+
message = AirbyteMessage(
|
24
|
+
type=Type.RECORD,
|
25
|
+
record=AirbyteRecordMessage(
|
26
|
+
stream="test_stream",
|
27
|
+
namespace="test_namespace",
|
28
|
+
data={"id": "12345"},
|
29
|
+
emitted_at=1,
|
30
|
+
),
|
31
|
+
)
|
32
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
|
33
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
34
|
+
|
35
|
+
|
36
|
+
def test_get_record_message_stream_descriptor_no_namespace():
|
37
|
+
message = AirbyteMessage(
|
38
|
+
type=Type.RECORD,
|
39
|
+
record=AirbyteRecordMessage(
|
40
|
+
stream="test_stream", data={"id": "12345"}, emitted_at=1
|
41
|
+
),
|
42
|
+
)
|
43
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
|
44
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
45
|
+
|
46
|
+
|
47
|
+
def test_get_state_message_stream_descriptor():
|
48
|
+
message = AirbyteMessage(
|
49
|
+
type=Type.STATE,
|
50
|
+
state=AirbyteStateMessage(
|
51
|
+
type=AirbyteStateType.STREAM,
|
52
|
+
stream=AirbyteStreamState(
|
53
|
+
stream_descriptor=StreamDescriptor(
|
54
|
+
name="test_stream", namespace="test_namespace"
|
55
|
+
),
|
56
|
+
stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
|
57
|
+
),
|
58
|
+
sourceStats=AirbyteStateStats(recordCount=27.0),
|
59
|
+
),
|
60
|
+
)
|
61
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
|
62
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
63
|
+
|
64
|
+
|
65
|
+
def test_get_state_message_stream_descriptor_no_namespace():
|
66
|
+
message = AirbyteMessage(
|
67
|
+
type=Type.STATE,
|
68
|
+
state=AirbyteStateMessage(
|
69
|
+
type=AirbyteStateType.STREAM,
|
70
|
+
stream=AirbyteStreamState(
|
71
|
+
stream_descriptor=StreamDescriptor(name="test_stream"),
|
72
|
+
stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
|
73
|
+
),
|
74
|
+
sourceStats=AirbyteStateStats(recordCount=27.0),
|
75
|
+
),
|
76
|
+
)
|
77
|
+
expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
|
78
|
+
assert get_stream_descriptor(message) == expected_descriptor
|
79
|
+
|
80
|
+
|
81
|
+
def test_get_other_message_stream_descriptor_fails():
|
82
|
+
message = AirbyteMessage(
|
83
|
+
type=Type.CONTROL,
|
84
|
+
control=AirbyteControlMessage(
|
85
|
+
type=OrchestratorType.CONNECTOR_CONFIG,
|
86
|
+
emitted_at=10,
|
87
|
+
connectorConfig=AirbyteControlConnectorConfigMessage(config={"any config": "a config value"}),
|
88
|
+
),
|
89
|
+
)
|
90
|
+
with pytest.raises(NotImplementedError):
|
91
|
+
get_stream_descriptor(message)
|
File without changes
|
File without changes
|