airbyte-cdk 0.68.4__py3-none-any.whl → 0.69.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. airbyte_cdk/entrypoint.py +27 -7
  2. airbyte_cdk/sources/connector_state_manager.py +0 -1
  3. airbyte_cdk/sources/file_based/file_based_source.py +4 -2
  4. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +2 -2
  5. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +2 -2
  6. airbyte_cdk/sources/file_based/stream/concurrent/cursor/{file_based_noop_cursor.py → file_based_final_state_cursor.py} +21 -6
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +2 -2
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +27 -3
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +7 -3
  10. airbyte_cdk/test/entrypoint_wrapper.py +1 -1
  11. airbyte_cdk/utils/message_utils.py +17 -0
  12. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/RECORD +30 -28
  14. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/WHEEL +1 -1
  15. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -2
  16. unit_tests/sources/file_based/scenarios/csv_scenarios.py +128 -37
  17. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +3 -3
  18. unit_tests/sources/file_based/test_file_based_scenarios.py +13 -6
  19. unit_tests/sources/file_based/test_scenarios.py +32 -3
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +16 -14
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +5 -4
  23. unit_tests/sources/streams/concurrent/test_default_stream.py +8 -6
  24. unit_tests/sources/streams/test_stream_read.py +3 -2
  25. unit_tests/sources/test_concurrent_source.py +7 -5
  26. unit_tests/sources/test_source_read.py +2 -3
  27. unit_tests/test/test_entrypoint_wrapper.py +9 -6
  28. unit_tests/utils/test_message_utils.py +91 -0
  29. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
12
12
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
13
13
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability, StreamAvailable, StreamUnavailable
15
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
16
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
17
17
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
18
18
  from airbyte_protocol.models import AirbyteStream
@@ -42,10 +42,11 @@ MESSAGE_FROM_REPOSITORY = Mock()
42
42
 
43
43
 
44
44
  class _MockStream(AbstractStream):
45
- def __init__(self, name: str, available: bool = True, json_schema: Dict[str, Any] = {}):
45
+ def __init__(self, name: str, message_repository: MessageRepository, available: bool = True, json_schema: Dict[str, Any] = {}):
46
46
  self._name = name
47
47
  self._available = available
48
48
  self._json_schema = json_schema
49
+ self._message_repository = message_repository
49
50
 
50
51
  def generate_partitions(self) -> Iterable[Partition]:
51
52
  yield _MockPartition(self._name)
@@ -75,7 +76,7 @@ class _MockStream(AbstractStream):
75
76
 
76
77
  @property
77
78
  def cursor(self) -> Cursor:
78
- return NoopCursor()
79
+ return FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository)
79
80
 
80
81
 
81
82
  class _MockPartition(Partition):
@@ -103,8 +104,9 @@ class _MockPartition(Partition):
103
104
 
104
105
 
105
106
  def test_concurrent_source_reading_from_no_streams():
106
- stream = _MockStream("my_stream", False, {})
107
- source = _MockSource()
107
+ message_repository = InMemoryMessageRepository()
108
+ stream = _MockStream("my_stream", message_repository,False, {})
109
+ source = _MockSource(message_repository=message_repository)
108
110
  messages = []
109
111
  for m in source.read([stream]):
110
112
  messages.append(m)
@@ -27,7 +27,7 @@ from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import Conc
27
27
  from airbyte_cdk.sources.message import InMemoryMessageRepository
28
28
  from airbyte_cdk.sources.streams import Stream
29
29
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
30
- from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
30
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
31
31
  from airbyte_cdk.sources.streams.core import StreamData
32
32
  from airbyte_cdk.utils import AirbyteTracedException
33
33
  from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import NeverLogSliceLogger
@@ -409,9 +409,8 @@ def _init_sources(stream_slice_to_partitions, state, logger):
409
409
 
410
410
 
411
411
  def _init_source(stream_slice_to_partitions, state, logger, source):
412
- cursor = NoopCursor()
413
412
  streams = [
414
- StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state, cursor)
413
+ StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state, FinalStateCursor(stream_name=f"stream{i}", stream_namespace=None, message_repository=InMemoryMessageRepository()))
415
414
  for i, stream_slices in enumerate(stream_slice_to_partitions)
416
415
  ]
417
416
  source.set_streams(streams)
@@ -3,7 +3,7 @@
3
3
  import json
4
4
  import logging
5
5
  import os
6
- from typing import Any, Iterator, List
6
+ from typing import Any, Iterator, List, Mapping
7
7
  from unittest import TestCase
8
8
  from unittest.mock import Mock, patch
9
9
 
@@ -16,7 +16,9 @@ from airbyte_protocol.models import (
16
16
  AirbyteLogMessage,
17
17
  AirbyteMessage,
18
18
  AirbyteRecordMessage,
19
+ AirbyteStateBlob,
19
20
  AirbyteStateMessage,
21
+ AirbyteStreamState,
20
22
  AirbyteStreamStatus,
21
23
  AirbyteStreamStatusTraceMessage,
22
24
  AirbyteTraceMessage,
@@ -28,8 +30,8 @@ from airbyte_protocol.models import (
28
30
  )
29
31
 
30
32
 
31
- def _a_state_message(state: Any) -> AirbyteMessage:
32
- return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state))
33
+ def _a_state_message(stream_name: str, stream_state: Mapping[str, Any]) -> AirbyteMessage:
34
+ return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(stream=AirbyteStreamState(stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob(**stream_state))))
33
35
 
34
36
 
35
37
  def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteMessage:
@@ -49,7 +51,7 @@ def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteM
49
51
  _A_RECORD = AirbyteMessage(
50
52
  type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data={"record key": "record value"}, emitted_at=0)
51
53
  )
52
- _A_STATE_MESSAGE = _a_state_message({"state key": "state value for _A_STATE_MESSAGE"})
54
+ _A_STATE_MESSAGE = _a_state_message("stream_name", {"state key": "state value for _A_STATE_MESSAGE"})
53
55
  _A_LOG = AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="This is an Airbyte log message"))
54
56
  _AN_ERROR_MESSAGE = AirbyteMessage(
55
57
  type=Type.TRACE,
@@ -176,8 +178,9 @@ class EntrypointWrapperTest(TestCase):
176
178
 
177
179
  @patch("airbyte_cdk.test.entrypoint_wrapper.AirbyteEntrypoint")
178
180
  def test_given_many_state_messages_and_records_when_read_then_output_has_records_and_state_message(self, entrypoint):
179
- last_emitted_state = {"last state key": "last state value"}
180
- entrypoint.return_value.run.return_value = _to_entrypoint_output([_A_STATE_MESSAGE, _a_state_message(last_emitted_state)])
181
+ state_value = {"state_key": "last state value"}
182
+ last_emitted_state = AirbyteStreamState(stream_descriptor=StreamDescriptor(name="stream_name"), stream_state=AirbyteStateBlob(**state_value))
183
+ entrypoint.return_value.run.return_value = _to_entrypoint_output([_A_STATE_MESSAGE, _a_state_message("stream_name", state_value)])
181
184
 
182
185
  output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE)
183
186
 
@@ -0,0 +1,91 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ import pytest
4
+ from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
5
+ from airbyte_cdk.utils.message_utils import get_stream_descriptor
6
+ from airbyte_protocol.models import (
7
+ AirbyteControlConnectorConfigMessage,
8
+ AirbyteControlMessage,
9
+ AirbyteMessage,
10
+ AirbyteRecordMessage,
11
+ AirbyteStateBlob,
12
+ AirbyteStateMessage,
13
+ AirbyteStateStats,
14
+ AirbyteStateType,
15
+ AirbyteStreamState,
16
+ OrchestratorType,
17
+ StreamDescriptor,
18
+ Type,
19
+ )
20
+
21
+
22
+ def test_get_record_message_stream_descriptor():
23
+ message = AirbyteMessage(
24
+ type=Type.RECORD,
25
+ record=AirbyteRecordMessage(
26
+ stream="test_stream",
27
+ namespace="test_namespace",
28
+ data={"id": "12345"},
29
+ emitted_at=1,
30
+ ),
31
+ )
32
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
33
+ assert get_stream_descriptor(message) == expected_descriptor
34
+
35
+
36
+ def test_get_record_message_stream_descriptor_no_namespace():
37
+ message = AirbyteMessage(
38
+ type=Type.RECORD,
39
+ record=AirbyteRecordMessage(
40
+ stream="test_stream", data={"id": "12345"}, emitted_at=1
41
+ ),
42
+ )
43
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
44
+ assert get_stream_descriptor(message) == expected_descriptor
45
+
46
+
47
+ def test_get_state_message_stream_descriptor():
48
+ message = AirbyteMessage(
49
+ type=Type.STATE,
50
+ state=AirbyteStateMessage(
51
+ type=AirbyteStateType.STREAM,
52
+ stream=AirbyteStreamState(
53
+ stream_descriptor=StreamDescriptor(
54
+ name="test_stream", namespace="test_namespace"
55
+ ),
56
+ stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
57
+ ),
58
+ sourceStats=AirbyteStateStats(recordCount=27.0),
59
+ ),
60
+ )
61
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
62
+ assert get_stream_descriptor(message) == expected_descriptor
63
+
64
+
65
+ def test_get_state_message_stream_descriptor_no_namespace():
66
+ message = AirbyteMessage(
67
+ type=Type.STATE,
68
+ state=AirbyteStateMessage(
69
+ type=AirbyteStateType.STREAM,
70
+ stream=AirbyteStreamState(
71
+ stream_descriptor=StreamDescriptor(name="test_stream"),
72
+ stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
73
+ ),
74
+ sourceStats=AirbyteStateStats(recordCount=27.0),
75
+ ),
76
+ )
77
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
78
+ assert get_stream_descriptor(message) == expected_descriptor
79
+
80
+
81
+ def test_get_other_message_stream_descriptor_fails():
82
+ message = AirbyteMessage(
83
+ type=Type.CONTROL,
84
+ control=AirbyteControlMessage(
85
+ type=OrchestratorType.CONNECTOR_CONFIG,
86
+ emitted_at=10,
87
+ connectorConfig=AirbyteControlConnectorConfigMessage(config={"any config": "a config value"}),
88
+ ),
89
+ )
90
+ with pytest.raises(NotImplementedError):
91
+ get_stream_descriptor(message)