airbyte-cdk 0.31.0__py3-none-any.whl → 0.31.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/connector_builder/message_grouper.py +54 -21
- airbyte_cdk/connector_builder/models.py +7 -1
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/RECORD +9 -9
- unit_tests/connector_builder/test_connector_builder_handler.py +22 -4
- unit_tests/connector_builder/test_message_grouper.py +20 -5
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.31.0.dist-info → airbyte_cdk-0.31.1.dist-info}/top_level.txt +0 -0
@@ -9,11 +9,19 @@ from json import JSONDecodeError
|
|
9
9
|
from typing import Any, Iterable, Iterator, Mapping, Optional, Union
|
10
10
|
from urllib.parse import parse_qs, urlparse
|
11
11
|
|
12
|
-
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages, StreamReadSlices
|
13
|
-
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type
|
12
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages, StreamReadSlices
|
14
13
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
14
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
15
15
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
16
|
-
from airbyte_protocol.models.airbyte_protocol import
|
16
|
+
from airbyte_protocol.models.airbyte_protocol import (
|
17
|
+
AirbyteLogMessage,
|
18
|
+
AirbyteMessage,
|
19
|
+
AirbyteTraceMessage,
|
20
|
+
ConfiguredAirbyteCatalog,
|
21
|
+
Level,
|
22
|
+
TraceType,
|
23
|
+
)
|
24
|
+
from airbyte_protocol.models.airbyte_protocol import Type as MessageType
|
17
25
|
|
18
26
|
|
19
27
|
class MessageGrouper:
|
@@ -42,14 +50,18 @@ class MessageGrouper:
|
|
42
50
|
|
43
51
|
slices = []
|
44
52
|
log_messages = []
|
45
|
-
state = {} # No support for incremental sync
|
46
53
|
for message_group in self._get_message_groups(
|
47
|
-
|
48
|
-
|
49
|
-
|
54
|
+
self._read_stream(source, config, configured_catalog),
|
55
|
+
schema_inferrer,
|
56
|
+
record_limit,
|
50
57
|
):
|
51
58
|
if isinstance(message_group, AirbyteLogMessage):
|
52
|
-
log_messages.append({"message": message_group.message})
|
59
|
+
log_messages.append(LogMessage(**{"message": message_group.message, "level": message_group.level.value}))
|
60
|
+
elif isinstance(message_group, AirbyteTraceMessage):
|
61
|
+
if message_group.type == TraceType.ERROR:
|
62
|
+
error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
|
63
|
+
log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
|
64
|
+
|
53
65
|
else:
|
54
66
|
slices.append(message_group)
|
55
67
|
|
@@ -63,8 +75,8 @@ class MessageGrouper:
|
|
63
75
|
)
|
64
76
|
|
65
77
|
def _get_message_groups(
|
66
|
-
|
67
|
-
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]:
|
78
|
+
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
79
|
+
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage, AirbyteTraceMessage]]:
|
68
80
|
"""
|
69
81
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
70
82
|
and record messages belong to the prior request log message and when we encounter another request, append the latest
|
@@ -86,44 +98,56 @@ class MessageGrouper:
|
|
86
98
|
current_slice_pages = []
|
87
99
|
current_page_request: Optional[HttpRequest] = None
|
88
100
|
current_page_response: Optional[HttpResponse] = None
|
101
|
+
had_error = False
|
89
102
|
|
90
103
|
while records_count < limit and (message := next(messages, None)):
|
91
104
|
if self._need_to_close_page(at_least_one_page_in_group, message):
|
92
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
105
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, True)
|
93
106
|
current_page_request = None
|
94
107
|
current_page_response = None
|
95
108
|
|
96
|
-
if at_least_one_page_in_group and message.type ==
|
109
|
+
if at_least_one_page_in_group and message.type == MessageType.LOG and message.log.message.startswith("slice:"):
|
97
110
|
yield StreamReadSlices(pages=current_slice_pages)
|
98
111
|
current_slice_pages = []
|
99
112
|
at_least_one_page_in_group = False
|
100
|
-
elif message.type ==
|
113
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("request:"):
|
101
114
|
if not at_least_one_page_in_group:
|
102
115
|
at_least_one_page_in_group = True
|
103
116
|
current_page_request = self._create_request_from_log_message(message.log)
|
104
|
-
elif message.type ==
|
117
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("response:"):
|
105
118
|
current_page_response = self._create_response_from_log_message(message.log)
|
106
|
-
elif message.type ==
|
119
|
+
elif message.type == MessageType.LOG:
|
120
|
+
if message.log.level == Level.ERROR:
|
121
|
+
had_error = True
|
107
122
|
yield message.log
|
108
|
-
elif message.type ==
|
123
|
+
elif message.type == MessageType.TRACE:
|
124
|
+
if message.trace.type == TraceType.ERROR:
|
125
|
+
had_error = True
|
126
|
+
yield message.trace
|
127
|
+
elif message.type == MessageType.RECORD:
|
109
128
|
current_page_records.append(message.record.data)
|
110
129
|
records_count += 1
|
111
130
|
schema_inferrer.accumulate(message.record)
|
112
131
|
else:
|
113
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
132
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete=not had_error)
|
114
133
|
yield StreamReadSlices(pages=current_slice_pages)
|
115
134
|
|
116
135
|
@staticmethod
|
117
|
-
def _need_to_close_page(at_least_one_page_in_group, message):
|
136
|
+
def _need_to_close_page(at_least_one_page_in_group, message) -> bool:
|
118
137
|
return (
|
119
138
|
at_least_one_page_in_group
|
120
|
-
and message.type ==
|
139
|
+
and message.type == MessageType.LOG
|
121
140
|
and (message.log.message.startswith("request:") or message.log.message.startswith("slice:"))
|
122
141
|
)
|
123
142
|
|
124
143
|
@staticmethod
|
125
|
-
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records):
|
126
|
-
|
144
|
+
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete: bool):
|
145
|
+
"""
|
146
|
+
Close a page when parsing message groups
|
147
|
+
@param validate_page_complete: in some cases, we expect the CDK to not return a response. As of today, this will only happen before
|
148
|
+
an uncaught exception and therefore, the assumption is that `validate_page_complete=True` only on the last page that is being closed
|
149
|
+
"""
|
150
|
+
if validate_page_complete and (not current_page_request or not current_page_response):
|
127
151
|
raise ValueError("Every message grouping should have at least one request and response")
|
128
152
|
|
129
153
|
current_slice_pages.append(
|
@@ -131,6 +155,15 @@ class MessageGrouper:
|
|
131
155
|
)
|
132
156
|
current_page_records.clear()
|
133
157
|
|
158
|
+
def _read_stream(self, source, config, configured_catalog) -> Iterator[AirbyteMessage]:
|
159
|
+
# the generator can raise an exception
|
160
|
+
# iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
|
161
|
+
try:
|
162
|
+
yield from source.read(logger=self.logger, config=config, catalog=configured_catalog, state={})
|
163
|
+
except Exception as e:
|
164
|
+
error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
|
165
|
+
yield AirbyteTracedException.from_exception(e, message=error_message).as_airbyte_message()
|
166
|
+
|
134
167
|
def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]:
|
135
168
|
# TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the
|
136
169
|
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
|
@@ -50,9 +50,15 @@ class StreamReadSlicesInner:
|
|
50
50
|
state: Optional[Dict[str, Any]]
|
51
51
|
|
52
52
|
|
53
|
+
@dataclass
|
54
|
+
class LogMessage:
|
55
|
+
message: str
|
56
|
+
level: str
|
57
|
+
|
58
|
+
|
53
59
|
@dataclass
|
54
60
|
class StreamRead(object):
|
55
|
-
logs: List[
|
61
|
+
logs: List[LogMessage]
|
56
62
|
slices: List[StreamReadSlicesInner]
|
57
63
|
test_read_limit_reached: bool
|
58
64
|
inferred_schema: Optional[Dict[str, Any]]
|
@@ -8,8 +8,8 @@ airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
8
|
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
9
|
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=9iG2hmaJyBtLDtdwtQp8M19pG2HQjR7Yy6jnBDggXMk,4160
|
10
10
|
airbyte_cdk/connector_builder/main.py,sha256=IOijgSQ4A9KhqJplHSVPTrxH_cyGWW_9uNmSIwBo0l0,3021
|
11
|
-
airbyte_cdk/connector_builder/message_grouper.py,sha256
|
12
|
-
airbyte_cdk/connector_builder/models.py,sha256=
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=-0AYh21eZeNJyyFPvSB84NNKMZznSX7bf4Ygi3deWEU,10972
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=y0PJ-LwJk3e1RzRmMfjQSBP9ENx_a0wBcWNCjlW72Ks,1832
|
13
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
14
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
15
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -160,8 +160,8 @@ airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnT
|
|
160
160
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
161
161
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
162
162
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
163
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
164
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
163
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=fwOlQPdc9quPUoZYXg_GVXqdkw5TLWs0CuBjYEMAcfM,19745
|
164
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=1_DXyuqHmr8B7b1t8PSpmYWaHvYYAB2mMwpp_YAbYwc,22959
|
165
165
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
166
166
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
167
167
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -257,8 +257,8 @@ unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
257
257
|
unit_tests/utils/test_schema_inferrer.py,sha256=ckl17GlNOZInqgxni7Z2A0bg_p6JDy0GVFAG8ph67pw,3288
|
258
258
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
259
259
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
260
|
-
airbyte_cdk-0.31.
|
261
|
-
airbyte_cdk-0.31.
|
262
|
-
airbyte_cdk-0.31.
|
263
|
-
airbyte_cdk-0.31.
|
264
|
-
airbyte_cdk-0.31.
|
260
|
+
airbyte_cdk-0.31.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
261
|
+
airbyte_cdk-0.31.1.dist-info/METADATA,sha256=nSfqOPf-MueD0MHg9bTQU8PWrhP43lmxd6JK1IDNj-M,8902
|
262
|
+
airbyte_cdk-0.31.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
263
|
+
airbyte_cdk-0.31.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
264
|
+
airbyte_cdk-0.31.1.dist-info/RECORD,,
|
@@ -3,6 +3,7 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import copy
|
6
|
+
import dataclasses
|
6
7
|
import json
|
7
8
|
from unittest import mock
|
8
9
|
from unittest.mock import patch
|
@@ -11,7 +12,7 @@ import pytest
|
|
11
12
|
from airbyte_cdk import connector_builder
|
12
13
|
from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, resolve_manifest
|
13
14
|
from airbyte_cdk.connector_builder.main import handle_connector_builder_request, handle_request, read_stream
|
14
|
-
from airbyte_cdk.connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
15
|
+
from airbyte_cdk.connector_builder.models import LogMessage, StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
15
16
|
from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
|
16
17
|
from airbyte_cdk.models import Type as MessageType
|
17
18
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
@@ -323,14 +324,31 @@ def test_read():
|
|
323
324
|
assert output_record == expected_airbyte_message
|
324
325
|
|
325
326
|
|
326
|
-
|
327
|
+
@patch("traceback.TracebackException.from_exception")
|
328
|
+
def test_read_returns_error_response(mock_from_exception):
|
327
329
|
class MockManifestDeclarativeSource:
|
328
330
|
def read(self, logger, config, catalog, state):
|
329
|
-
raise ValueError
|
331
|
+
raise ValueError("error_message")
|
332
|
+
|
333
|
+
stack_trace = "a stack trace"
|
334
|
+
mock_from_exception.return_value = stack_trace
|
330
335
|
|
331
336
|
source = MockManifestDeclarativeSource()
|
332
337
|
response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG))
|
333
|
-
|
338
|
+
|
339
|
+
expected_stream_read = StreamRead(logs=[LogMessage("error_message - a stack trace", "ERROR")],
|
340
|
+
slices=[StreamReadSlicesInner(
|
341
|
+
pages=[StreamReadSlicesInnerPagesInner(records=[], request=None, response=None)],
|
342
|
+
slice_descriptor=None, state=None)],
|
343
|
+
test_read_limit_reached=False,
|
344
|
+
inferred_schema=None)
|
345
|
+
|
346
|
+
expected_message = AirbyteMessage(
|
347
|
+
type=MessageType.RECORD,
|
348
|
+
record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(expected_stream_read), emitted_at=1),
|
349
|
+
)
|
350
|
+
response.record.emitted_at = 1
|
351
|
+
assert response == expected_message
|
334
352
|
|
335
353
|
|
336
354
|
@pytest.mark.parametrize(
|
@@ -8,7 +8,7 @@ from unittest.mock import MagicMock
|
|
8
8
|
|
9
9
|
import pytest
|
10
10
|
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
11
|
-
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages
|
11
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages
|
12
12
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level
|
13
13
|
from airbyte_cdk.models import Type as MessageType
|
14
14
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
@@ -17,7 +17,7 @@ MAX_PAGES_PER_SLICE = 4
|
|
17
17
|
MAX_SLICES = 3
|
18
18
|
|
19
19
|
MANIFEST = {
|
20
|
-
"version": "0.
|
20
|
+
"version": "0.30.0",
|
21
21
|
"type": "DeclarativeSource",
|
22
22
|
"definitions": {
|
23
23
|
"selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"},
|
@@ -167,9 +167,9 @@ def test_get_grouped_messages_with_logs():
|
|
167
167
|
),
|
168
168
|
]
|
169
169
|
expected_logs = [
|
170
|
-
{"message": "log message before the request"},
|
171
|
-
{"message": "log message during the page"},
|
172
|
-
{"message": "log message after the response"},
|
170
|
+
LogMessage(**{"message": "log message before the request", "level": "INFO"}),
|
171
|
+
LogMessage(**{"message": "log message during the page", "level": "INFO"}),
|
172
|
+
LogMessage(**{"message": "log message after the response", "level": "INFO"}),
|
173
173
|
]
|
174
174
|
|
175
175
|
mock_source = make_mock_source(
|
@@ -513,6 +513,21 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit
|
|
513
513
|
assert stream_read.test_read_limit_reached
|
514
514
|
|
515
515
|
|
516
|
+
def test_read_stream_returns_error_if_stream_does_not_exist():
|
517
|
+
mock_source = MagicMock()
|
518
|
+
mock_source.read.side_effect = ValueError("error")
|
519
|
+
|
520
|
+
full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
|
521
|
+
|
522
|
+
message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
523
|
+
actual_response = message_grouper.get_message_groups(source=mock_source, config=full_config,
|
524
|
+
configured_catalog=create_configured_catalog("not_in_manifest"))
|
525
|
+
|
526
|
+
assert 1 == len(actual_response.logs)
|
527
|
+
assert "Traceback" in actual_response.logs[0].message
|
528
|
+
assert "ERROR" in actual_response.logs[0].level
|
529
|
+
|
530
|
+
|
516
531
|
def make_mock_source(return_value: Iterator) -> MagicMock:
|
517
532
|
mock_source = MagicMock()
|
518
533
|
mock_source.read.return_value = return_value
|
File without changes
|
File without changes
|
File without changes
|