airbyte-cdk 0.68.4__py3-none-any.whl → 0.69.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (30) hide show
  1. airbyte_cdk/entrypoint.py +27 -7
  2. airbyte_cdk/sources/connector_state_manager.py +0 -1
  3. airbyte_cdk/sources/file_based/file_based_source.py +4 -2
  4. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +2 -2
  5. airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py +2 -2
  6. airbyte_cdk/sources/file_based/stream/concurrent/cursor/{file_based_noop_cursor.py → file_based_final_state_cursor.py} +21 -6
  7. airbyte_cdk/sources/streams/concurrent/adapters.py +2 -2
  8. airbyte_cdk/sources/streams/concurrent/cursor.py +27 -3
  9. airbyte_cdk/sources/streams/concurrent/default_stream.py +7 -3
  10. airbyte_cdk/test/entrypoint_wrapper.py +1 -1
  11. airbyte_cdk/utils/message_utils.py +17 -0
  12. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/METADATA +1 -1
  13. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/RECORD +30 -28
  14. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/WHEEL +1 -1
  15. unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py +2 -2
  16. unit_tests/sources/file_based/scenarios/csv_scenarios.py +128 -37
  17. unit_tests/sources/file_based/stream/concurrent/test_adapters.py +3 -3
  18. unit_tests/sources/file_based/test_file_based_scenarios.py +13 -6
  19. unit_tests/sources/file_based/test_scenarios.py +32 -3
  20. unit_tests/sources/streams/concurrent/scenarios/stream_facade_builder.py +2 -2
  21. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_scenarios.py +16 -14
  22. unit_tests/sources/streams/concurrent/scenarios/thread_based_concurrent_stream_source_builder.py +5 -4
  23. unit_tests/sources/streams/concurrent/test_default_stream.py +8 -6
  24. unit_tests/sources/streams/test_stream_read.py +3 -2
  25. unit_tests/sources/test_concurrent_source.py +7 -5
  26. unit_tests/sources/test_source_read.py +2 -3
  27. unit_tests/test/test_entrypoint_wrapper.py +9 -6
  28. unit_tests/utils/test_message_utils.py +91 -0
  29. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/LICENSE.txt +0 -0
  30. {airbyte_cdk-0.68.4.dist-info → airbyte_cdk-0.69.1.dist-info}/top_level.txt +0 -0
@@ -12,7 +12,7 @@ from airbyte_cdk.sources.concurrent_source.thread_pool_manager import ThreadPool
12
12
  from airbyte_cdk.sources.message import InMemoryMessageRepository, MessageRepository
13
13
  from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
14
14
  from airbyte_cdk.sources.streams.concurrent.availability_strategy import StreamAvailability, StreamAvailable, StreamUnavailable
15
- from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, NoopCursor
15
+ from airbyte_cdk.sources.streams.concurrent.cursor import Cursor, FinalStateCursor
16
16
  from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
17
17
  from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
18
18
  from airbyte_protocol.models import AirbyteStream
@@ -42,10 +42,11 @@ MESSAGE_FROM_REPOSITORY = Mock()
42
42
 
43
43
 
44
44
  class _MockStream(AbstractStream):
45
- def __init__(self, name: str, available: bool = True, json_schema: Dict[str, Any] = {}):
45
+ def __init__(self, name: str, message_repository: MessageRepository, available: bool = True, json_schema: Dict[str, Any] = {}):
46
46
  self._name = name
47
47
  self._available = available
48
48
  self._json_schema = json_schema
49
+ self._message_repository = message_repository
49
50
 
50
51
  def generate_partitions(self) -> Iterable[Partition]:
51
52
  yield _MockPartition(self._name)
@@ -75,7 +76,7 @@ class _MockStream(AbstractStream):
75
76
 
76
77
  @property
77
78
  def cursor(self) -> Cursor:
78
- return NoopCursor()
79
+ return FinalStateCursor(stream_name=self._name, stream_namespace=None, message_repository=self._message_repository)
79
80
 
80
81
 
81
82
  class _MockPartition(Partition):
@@ -103,8 +104,9 @@ class _MockPartition(Partition):
103
104
 
104
105
 
105
106
  def test_concurrent_source_reading_from_no_streams():
106
- stream = _MockStream("my_stream", False, {})
107
- source = _MockSource()
107
+ message_repository = InMemoryMessageRepository()
108
+ stream = _MockStream("my_stream", message_repository,False, {})
109
+ source = _MockSource(message_repository=message_repository)
108
110
  messages = []
109
111
  for m in source.read([stream]):
110
112
  messages.append(m)
@@ -27,7 +27,7 @@ from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import Conc
27
27
  from airbyte_cdk.sources.message import InMemoryMessageRepository
28
28
  from airbyte_cdk.sources.streams import Stream
29
29
  from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
30
- from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
30
+ from airbyte_cdk.sources.streams.concurrent.cursor import FinalStateCursor
31
31
  from airbyte_cdk.sources.streams.core import StreamData
32
32
  from airbyte_cdk.utils import AirbyteTracedException
33
33
  from unit_tests.sources.streams.concurrent.scenarios.thread_based_concurrent_stream_source_builder import NeverLogSliceLogger
@@ -409,9 +409,8 @@ def _init_sources(stream_slice_to_partitions, state, logger):
409
409
 
410
410
 
411
411
  def _init_source(stream_slice_to_partitions, state, logger, source):
412
- cursor = NoopCursor()
413
412
  streams = [
414
- StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state, cursor)
413
+ StreamFacade.create_from_stream(_MockStream(stream_slices, f"stream{i}"), source, logger, state, FinalStateCursor(stream_name=f"stream{i}", stream_namespace=None, message_repository=InMemoryMessageRepository()))
415
414
  for i, stream_slices in enumerate(stream_slice_to_partitions)
416
415
  ]
417
416
  source.set_streams(streams)
@@ -3,7 +3,7 @@
3
3
  import json
4
4
  import logging
5
5
  import os
6
- from typing import Any, Iterator, List
6
+ from typing import Any, Iterator, List, Mapping
7
7
  from unittest import TestCase
8
8
  from unittest.mock import Mock, patch
9
9
 
@@ -16,7 +16,9 @@ from airbyte_protocol.models import (
16
16
  AirbyteLogMessage,
17
17
  AirbyteMessage,
18
18
  AirbyteRecordMessage,
19
+ AirbyteStateBlob,
19
20
  AirbyteStateMessage,
21
+ AirbyteStreamState,
20
22
  AirbyteStreamStatus,
21
23
  AirbyteStreamStatusTraceMessage,
22
24
  AirbyteTraceMessage,
@@ -28,8 +30,8 @@ from airbyte_protocol.models import (
28
30
  )
29
31
 
30
32
 
31
- def _a_state_message(state: Any) -> AirbyteMessage:
32
- return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state))
33
+ def _a_state_message(stream_name: str, stream_state: Mapping[str, Any]) -> AirbyteMessage:
34
+ return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(stream=AirbyteStreamState(stream_descriptor=StreamDescriptor(name=stream_name), stream_state=AirbyteStateBlob(**stream_state))))
33
35
 
34
36
 
35
37
  def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteMessage:
@@ -49,7 +51,7 @@ def _a_status_message(stream_name: str, status: AirbyteStreamStatus) -> AirbyteM
49
51
  _A_RECORD = AirbyteMessage(
50
52
  type=Type.RECORD, record=AirbyteRecordMessage(stream="stream", data={"record key": "record value"}, emitted_at=0)
51
53
  )
52
- _A_STATE_MESSAGE = _a_state_message({"state key": "state value for _A_STATE_MESSAGE"})
54
+ _A_STATE_MESSAGE = _a_state_message("stream_name", {"state key": "state value for _A_STATE_MESSAGE"})
53
55
  _A_LOG = AirbyteMessage(type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message="This is an Airbyte log message"))
54
56
  _AN_ERROR_MESSAGE = AirbyteMessage(
55
57
  type=Type.TRACE,
@@ -176,8 +178,9 @@ class EntrypointWrapperTest(TestCase):
176
178
 
177
179
  @patch("airbyte_cdk.test.entrypoint_wrapper.AirbyteEntrypoint")
178
180
  def test_given_many_state_messages_and_records_when_read_then_output_has_records_and_state_message(self, entrypoint):
179
- last_emitted_state = {"last state key": "last state value"}
180
- entrypoint.return_value.run.return_value = _to_entrypoint_output([_A_STATE_MESSAGE, _a_state_message(last_emitted_state)])
181
+ state_value = {"state_key": "last state value"}
182
+ last_emitted_state = AirbyteStreamState(stream_descriptor=StreamDescriptor(name="stream_name"), stream_state=AirbyteStateBlob(**state_value))
183
+ entrypoint.return_value.run.return_value = _to_entrypoint_output([_A_STATE_MESSAGE, _a_state_message("stream_name", state_value)])
181
184
 
182
185
  output = read(self._a_source, _A_CONFIG, _A_CATALOG, _A_STATE)
183
186
 
@@ -0,0 +1,91 @@
1
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
2
+
3
+ import pytest
4
+ from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
5
+ from airbyte_cdk.utils.message_utils import get_stream_descriptor
6
+ from airbyte_protocol.models import (
7
+ AirbyteControlConnectorConfigMessage,
8
+ AirbyteControlMessage,
9
+ AirbyteMessage,
10
+ AirbyteRecordMessage,
11
+ AirbyteStateBlob,
12
+ AirbyteStateMessage,
13
+ AirbyteStateStats,
14
+ AirbyteStateType,
15
+ AirbyteStreamState,
16
+ OrchestratorType,
17
+ StreamDescriptor,
18
+ Type,
19
+ )
20
+
21
+
22
+ def test_get_record_message_stream_descriptor():
23
+ message = AirbyteMessage(
24
+ type=Type.RECORD,
25
+ record=AirbyteRecordMessage(
26
+ stream="test_stream",
27
+ namespace="test_namespace",
28
+ data={"id": "12345"},
29
+ emitted_at=1,
30
+ ),
31
+ )
32
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
33
+ assert get_stream_descriptor(message) == expected_descriptor
34
+
35
+
36
+ def test_get_record_message_stream_descriptor_no_namespace():
37
+ message = AirbyteMessage(
38
+ type=Type.RECORD,
39
+ record=AirbyteRecordMessage(
40
+ stream="test_stream", data={"id": "12345"}, emitted_at=1
41
+ ),
42
+ )
43
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
44
+ assert get_stream_descriptor(message) == expected_descriptor
45
+
46
+
47
+ def test_get_state_message_stream_descriptor():
48
+ message = AirbyteMessage(
49
+ type=Type.STATE,
50
+ state=AirbyteStateMessage(
51
+ type=AirbyteStateType.STREAM,
52
+ stream=AirbyteStreamState(
53
+ stream_descriptor=StreamDescriptor(
54
+ name="test_stream", namespace="test_namespace"
55
+ ),
56
+ stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
57
+ ),
58
+ sourceStats=AirbyteStateStats(recordCount=27.0),
59
+ ),
60
+ )
61
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace="test_namespace")
62
+ assert get_stream_descriptor(message) == expected_descriptor
63
+
64
+
65
+ def test_get_state_message_stream_descriptor_no_namespace():
66
+ message = AirbyteMessage(
67
+ type=Type.STATE,
68
+ state=AirbyteStateMessage(
69
+ type=AirbyteStateType.STREAM,
70
+ stream=AirbyteStreamState(
71
+ stream_descriptor=StreamDescriptor(name="test_stream"),
72
+ stream_state=AirbyteStateBlob(updated_at="2024-02-02"),
73
+ ),
74
+ sourceStats=AirbyteStateStats(recordCount=27.0),
75
+ ),
76
+ )
77
+ expected_descriptor = HashableStreamDescriptor(name="test_stream", namespace=None)
78
+ assert get_stream_descriptor(message) == expected_descriptor
79
+
80
+
81
+ def test_get_other_message_stream_descriptor_fails():
82
+ message = AirbyteMessage(
83
+ type=Type.CONTROL,
84
+ control=AirbyteControlMessage(
85
+ type=OrchestratorType.CONNECTOR_CONFIG,
86
+ emitted_at=10,
87
+ connectorConfig=AirbyteControlConnectorConfigMessage(config={"any config": "a config value"}),
88
+ ),
89
+ )
90
+ with pytest.raises(NotImplementedError):
91
+ get_stream_descriptor(message)