airbyte-cdk 0.31.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/message_grouper.py +54 -21
- airbyte_cdk/connector_builder/models.py +7 -1
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/RECORD +9 -9
- unit_tests/connector_builder/test_connector_builder_handler.py +22 -4
- unit_tests/connector_builder/test_message_grouper.py +20 -5
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/top_level.txt +0 -0
@@ -9,11 +9,19 @@ from json import JSONDecodeError
|
|
9
9
|
from typing import Any, Iterable, Iterator, Mapping, Optional, Union
|
10
10
|
from urllib.parse import parse_qs, urlparse
|
11
11
|
|
12
|
-
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages, StreamReadSlices
|
13
|
-
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type
|
12
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages, StreamReadSlices
|
14
13
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
14
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
15
15
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
16
|
-
from airbyte_protocol.models.airbyte_protocol import
|
16
|
+
from airbyte_protocol.models.airbyte_protocol import (
|
17
|
+
AirbyteLogMessage,
|
18
|
+
AirbyteMessage,
|
19
|
+
AirbyteTraceMessage,
|
20
|
+
ConfiguredAirbyteCatalog,
|
21
|
+
Level,
|
22
|
+
TraceType,
|
23
|
+
)
|
24
|
+
from airbyte_protocol.models.airbyte_protocol import Type as MessageType
|
17
25
|
|
18
26
|
|
19
27
|
class MessageGrouper:
|
@@ -42,14 +50,18 @@ class MessageGrouper:
|
|
42
50
|
|
43
51
|
slices = []
|
44
52
|
log_messages = []
|
45
|
-
state = {} # No support for incremental sync
|
46
53
|
for message_group in self._get_message_groups(
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
self._read_stream(source, config, configured_catalog),
|
55
|
+
schema_inferrer,
|
56
|
+
record_limit,
|
50
57
|
):
|
51
58
|
if isinstance(message_group, AirbyteLogMessage):
|
52
|
-
log_messages.append({"message": message_group.message})
|
59
|
+
log_messages.append(LogMessage(**{"message": message_group.message, "level": message_group.level.value}))
|
60
|
+
elif isinstance(message_group, AirbyteTraceMessage):
|
61
|
+
if message_group.type == TraceType.ERROR:
|
62
|
+
error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
|
63
|
+
log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
|
64
|
+
|
53
65
|
else:
|
54
66
|
slices.append(message_group)
|
55
67
|
|
@@ -63,8 +75,8 @@ class MessageGrouper:
|
|
63
75
|
)
|
64
76
|
|
65
77
|
def _get_message_groups(
|
66
|
-
|
67
|
-
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]:
|
78
|
+
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
79
|
+
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage, AirbyteTraceMessage]]:
|
68
80
|
"""
|
69
81
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
70
82
|
and record messages belong to the prior request log message and when we encounter another request, append the latest
|
@@ -86,44 +98,56 @@ class MessageGrouper:
|
|
86
98
|
current_slice_pages = []
|
87
99
|
current_page_request: Optional[HttpRequest] = None
|
88
100
|
current_page_response: Optional[HttpResponse] = None
|
101
|
+
had_error = False
|
89
102
|
|
90
103
|
while records_count < limit and (message := next(messages, None)):
|
91
104
|
if self._need_to_close_page(at_least_one_page_in_group, message):
|
92
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
105
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, True)
|
93
106
|
current_page_request = None
|
94
107
|
current_page_response = None
|
95
108
|
|
96
|
-
if at_least_one_page_in_group and message.type ==
|
109
|
+
if at_least_one_page_in_group and message.type == MessageType.LOG and message.log.message.startswith("slice:"):
|
97
110
|
yield StreamReadSlices(pages=current_slice_pages)
|
98
111
|
current_slice_pages = []
|
99
112
|
at_least_one_page_in_group = False
|
100
|
-
elif message.type ==
|
113
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("request:"):
|
101
114
|
if not at_least_one_page_in_group:
|
102
115
|
at_least_one_page_in_group = True
|
103
116
|
current_page_request = self._create_request_from_log_message(message.log)
|
104
|
-
elif message.type ==
|
117
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("response:"):
|
105
118
|
current_page_response = self._create_response_from_log_message(message.log)
|
106
|
-
elif message.type ==
|
119
|
+
elif message.type == MessageType.LOG:
|
120
|
+
if message.log.level == Level.ERROR:
|
121
|
+
had_error = True
|
107
122
|
yield message.log
|
108
|
-
elif message.type ==
|
123
|
+
elif message.type == MessageType.TRACE:
|
124
|
+
if message.trace.type == TraceType.ERROR:
|
125
|
+
had_error = True
|
126
|
+
yield message.trace
|
127
|
+
elif message.type == MessageType.RECORD:
|
109
128
|
current_page_records.append(message.record.data)
|
110
129
|
records_count += 1
|
111
130
|
schema_inferrer.accumulate(message.record)
|
112
131
|
else:
|
113
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
132
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete=not had_error)
|
114
133
|
yield StreamReadSlices(pages=current_slice_pages)
|
115
134
|
|
116
135
|
@staticmethod
|
117
|
-
def _need_to_close_page(at_least_one_page_in_group, message):
|
136
|
+
def _need_to_close_page(at_least_one_page_in_group, message) -> bool:
|
118
137
|
return (
|
119
138
|
at_least_one_page_in_group
|
120
|
-
and message.type ==
|
139
|
+
and message.type == MessageType.LOG
|
121
140
|
and (message.log.message.startswith("request:") or message.log.message.startswith("slice:"))
|
122
141
|
)
|
123
142
|
|
124
143
|
@staticmethod
|
125
|
-
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records):
|
126
|
-
|
144
|
+
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete: bool):
|
145
|
+
"""
|
146
|
+
Close a page when parsing message groups
|
147
|
+
@param validate_page_complete: in some cases, we expect the CDK to not return a response. As of today, this will only happen before
|
148
|
+
an uncaught exception and therefore, the assumption is that `validate_page_complete=True` only on the last page that is being closed
|
149
|
+
"""
|
150
|
+
if validate_page_complete and (not current_page_request or not current_page_response):
|
127
151
|
raise ValueError("Every message grouping should have at least one request and response")
|
128
152
|
|
129
153
|
current_slice_pages.append(
|
@@ -131,6 +155,15 @@ class MessageGrouper:
|
|
131
155
|
)
|
132
156
|
current_page_records.clear()
|
133
157
|
|
158
|
+
def _read_stream(self, source, config, configured_catalog) -> Iterator[AirbyteMessage]:
|
159
|
+
# the generator can raise an exception
|
160
|
+
# iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
|
161
|
+
try:
|
162
|
+
yield from source.read(logger=self.logger, config=config, catalog=configured_catalog, state={})
|
163
|
+
except Exception as e:
|
164
|
+
error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
|
165
|
+
yield AirbyteTracedException.from_exception(e, message=error_message).as_airbyte_message()
|
166
|
+
|
134
167
|
def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]:
|
135
168
|
# TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the
|
136
169
|
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
|
@@ -50,9 +50,15 @@ class StreamReadSlicesInner:
|
|
50
50
|
state: Optional[Dict[str, Any]]
|
51
51
|
|
52
52
|
|
53
|
+
@dataclass
|
54
|
+
class LogMessage:
|
55
|
+
message: str
|
56
|
+
level: str
|
57
|
+
|
58
|
+
|
53
59
|
@dataclass
|
54
60
|
class StreamRead(object):
|
55
|
-
logs: List[
|
61
|
+
logs: List[LogMessage]
|
56
62
|
slices: List[StreamReadSlicesInner]
|
57
63
|
test_read_limit_reached: bool
|
58
64
|
inferred_schema: Optional[Dict[str, Any]]
|
@@ -8,8 +8,8 @@ airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
9
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=9iG2hmaJyBtLDtdwtQp8M19pG2HQjR7Yy6jnBDggXMk,4160
|
10
10
|
airbyte_cdk/connector_builder/main.py,sha256=IOijgSQ4A9KhqJplHSVPTrxH_cyGWW_9uNmSIwBo0l0,3021
|
11
|
-
airbyte_cdk/connector_builder/message_grouper.py,sha256
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=-0AYh21eZeNJyyFPvSB84NNKMZznSX7bf4Ygi3deWEU,10972
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=y0PJ-LwJk3e1RzRmMfjQSBP9ENx_a0wBcWNCjlW72Ks,1832
|
13
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
14
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
15
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -160,8 +160,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
|
|
160
160
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
161
161
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
162
162
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
163
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
164
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
163
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=fwOlQPdc9quPUoZYXg_GVXqdkw5TLWs0CuBjYEMAcfM,19745
|
164
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=1_DXyuqHmr8B7b1t8PSpmYWaHvYYAB2mMwpp_YAbYwc,22959
|
165
165
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
166
166
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
167
167
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -257,8 +257,8 @@ unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
257
257
|
unit_tests/utils/test_schema_inferrer.py,sha256=ckl17GlNOZInqgxni7Z2A0bg_p6JDy0GVFAG8ph67pw,3288
|
258
258
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
259
259
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
260
|
-
airbyte_cdk-0.31.
|
261
|
-
airbyte_cdk-0.31.
|
262
|
-
airbyte_cdk-0.31.
|
263
|
-
airbyte_cdk-0.31.
|
264
|
-
airbyte_cdk-0.31.
|
260
|
+
airbyte_cdk-0.31.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
261
|
+
airbyte_cdk-0.31.1.dist-info/METADATA,sha256=nSfqOPf-MueD0MHg9bTQU8PWrhP43lmxd6JK1IDNj-M,8902
|
262
|
+
airbyte_cdk-0.31.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
263
|
+
airbyte_cdk-0.31.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
264
|
+
airbyte_cdk-0.31.1.dist-info/RECORD,,
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import copy
|
6
|
+
import dataclasses
|
6
7
|
import json
|
7
8
|
from unittest import mock
|
8
9
|
from unittest.mock import patch
|
@@ -11,7 +12,7 @@ import pytest
|
|
11
12
|
from airbyte_cdk import connector_builder
|
12
13
|
from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, resolve_manifest
|
13
14
|
from airbyte_cdk.connector_builder.main import handle_connector_builder_request, handle_request, read_stream
|
14
|
-
from airbyte_cdk.connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
15
|
+
from airbyte_cdk.connector_builder.models import LogMessage, StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
15
16
|
from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
|
16
17
|
from airbyte_cdk.models import Type as MessageType
|
17
18
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
@@ -323,14 +324,31 @@ def test_read():
|
|
323
324
|
assert output_record == expected_airbyte_message
|
324
325
|
|
325
326
|
|
326
|
-
|
327
|
+
@patch("traceback.TracebackException.from_exception")
|
328
|
+
def test_read_returns_error_response(mock_from_exception):
|
327
329
|
class MockManifestDeclarativeSource:
|
328
330
|
def read(self, logger, config, catalog, state):
|
329
|
-
raise ValueError
|
331
|
+
raise ValueError("error_message")
|
332
|
+
|
333
|
+
stack_trace = "a stack trace"
|
334
|
+
mock_from_exception.return_value = stack_trace
|
330
335
|
|
331
336
|
source = MockManifestDeclarativeSource()
|
332
337
|
response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG))
|
333
|
-
|
338
|
+
|
339
|
+
expected_stream_read = StreamRead(logs=[LogMessage("error_message - a stack trace", "ERROR")],
|
340
|
+
slices=[StreamReadSlicesInner(
|
341
|
+
pages=[StreamReadSlicesInnerPagesInner(records=[], request=None, response=None)],
|
342
|
+
slice_descriptor=None, state=None)],
|
343
|
+
test_read_limit_reached=False,
|
344
|
+
inferred_schema=None)
|
345
|
+
|
346
|
+
expected_message = AirbyteMessage(
|
347
|
+
type=MessageType.RECORD,
|
348
|
+
record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(expected_stream_read), emitted_at=1),
|
349
|
+
)
|
350
|
+
response.record.emitted_at = 1
|
351
|
+
assert response == expected_message
|
334
352
|
|
335
353
|
|
336
354
|
@pytest.mark.parametrize(
|
@@ -8,7 +8,7 @@ from unittest.mock import MagicMock
|
|
8
8
|
|
9
9
|
import pytest
|
10
10
|
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
11
|
-
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages
|
11
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages
|
12
12
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level
|
13
13
|
from airbyte_cdk.models import Type as MessageType
|
14
14
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
@@ -17,7 +17,7 @@ MAX_PAGES_PER_SLICE = 4
|
|
17
17
|
MAX_SLICES = 3
|
18
18
|
|
19
19
|
MANIFEST = {
|
20
|
-
"version": "0.
|
20
|
+
"version": "0.30.0",
|
21
21
|
"type": "DeclarativeSource",
|
22
22
|
"definitions": {
|
23
23
|
"selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"},
|
@@ -167,9 +167,9 @@ def test_get_grouped_messages_with_logs():
|
|
167
167
|
),
|
168
168
|
]
|
169
169
|
expected_logs = [
|
170
|
-
{"message": "log message before the request"},
|
171
|
-
{"message": "log message during the page"},
|
172
|
-
{"message": "log message after the response"},
|
170
|
+
LogMessage(**{"message": "log message before the request", "level": "INFO"}),
|
171
|
+
LogMessage(**{"message": "log message during the page", "level": "INFO"}),
|
172
|
+
LogMessage(**{"message": "log message after the response", "level": "INFO"}),
|
173
173
|
]
|
174
174
|
|
175
175
|
mock_source = make_mock_source(
|
@@ -513,6 +513,21 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit
|
|
513
513
|
assert stream_read.test_read_limit_reached
|
514
514
|
|
515
515
|
|
516
|
+
def test_read_stream_returns_error_if_stream_does_not_exist():
|
517
|
+
mock_source = MagicMock()
|
518
|
+
mock_source.read.side_effect = ValueError("error")
|
519
|
+
|
520
|
+
full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
|
521
|
+
|
522
|
+
message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
523
|
+
actual_response = message_grouper.get_message_groups(source=mock_source, config=full_config,
|
524
|
+
configured_catalog=create_configured_catalog("not_in_manifest"))
|
525
|
+
|
526
|
+
assert 1 == len(actual_response.logs)
|
527
|
+
assert "Traceback" in actual_response.logs[0].message
|
528
|
+
assert "ERROR" in actual_response.logs[0].level
|
529
|
+
|
530
|
+
|
516
531
|
def make_mock_source(return_value: Iterator) -> MagicMock:
|
517
532
|
mock_source = MagicMock()
|
518
533
|
mock_source.read.return_value = return_value
|
File without changes
|
File without changes
|
File without changes
|