airbyte-cdk 0.30.4__py3-none-any.whl → 0.31.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {connector_builder → airbyte_cdk/connector_builder}/connector_builder_handler.py +9 -10
- {connector_builder → airbyte_cdk/connector_builder}/main.py +1 -1
- {connector_builder → airbyte_cdk/connector_builder}/message_grouper.py +63 -27
- {connector_builder → airbyte_cdk/connector_builder}/models.py +7 -1
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/RECORD +12 -12
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/top_level.txt +0 -1
- unit_tests/connector_builder/test_connector_builder_handler.py +33 -13
- unit_tests/connector_builder/test_message_grouper.py +45 -23
- {connector_builder → airbyte_cdk/connector_builder}/__init__.py +0 -0
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@ from datetime import datetime
|
|
7
7
|
from typing import Any, Dict, List, Mapping
|
8
8
|
from urllib.parse import urljoin
|
9
9
|
|
10
|
+
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
10
11
|
from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
|
11
12
|
from airbyte_cdk.models import Type
|
12
13
|
from airbyte_cdk.models import Type as MessageType
|
@@ -15,7 +16,6 @@ from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
|
15
16
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
16
17
|
from airbyte_cdk.sources.streams.http import HttpStream
|
17
18
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
18
|
-
from connector_builder.message_grouper import MessageGrouper
|
19
19
|
|
20
20
|
DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
|
21
21
|
DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5
|
@@ -31,13 +31,14 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured
|
|
31
31
|
handler = MessageGrouper(max_pages_per_slice, max_slices)
|
32
32
|
stream_name = configured_catalog.streams[0].stream.name # The connector builder only supports a single stream
|
33
33
|
stream_read = handler.get_message_groups(source, config, configured_catalog, max_records)
|
34
|
-
return AirbyteMessage(
|
35
|
-
|
36
|
-
stream=stream_name,
|
37
|
-
|
38
|
-
))
|
34
|
+
return AirbyteMessage(
|
35
|
+
type=MessageType.RECORD,
|
36
|
+
record=AirbyteRecordMessage(data=dataclasses.asdict(stream_read), stream=stream_name, emitted_at=_emitted_at()),
|
37
|
+
)
|
39
38
|
except Exception as exc:
|
40
|
-
error = AirbyteTracedException.from_exception(
|
39
|
+
error = AirbyteTracedException.from_exception(
|
40
|
+
exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}"
|
41
|
+
)
|
41
42
|
return error.as_airbyte_message()
|
42
43
|
|
43
44
|
|
@@ -85,9 +86,7 @@ def _get_http_streams(source: ManifestDeclarativeSource, config: Dict[str, Any])
|
|
85
86
|
f"A declarative stream should only have a retriever of type HttpStream, but received: {stream.retriever.__class__}"
|
86
87
|
)
|
87
88
|
else:
|
88
|
-
raise TypeError(
|
89
|
-
f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}"
|
90
|
-
)
|
89
|
+
raise TypeError(f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}")
|
91
90
|
return http_streams
|
92
91
|
|
93
92
|
|
@@ -7,11 +7,11 @@ import sys
|
|
7
7
|
from typing import Any, List, Mapping, Optional, Tuple
|
8
8
|
|
9
9
|
from airbyte_cdk.connector import BaseConnector
|
10
|
+
from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, read_stream, resolve_manifest
|
10
11
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
11
12
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog
|
12
13
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
13
14
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
14
|
-
from connector_builder.connector_builder_handler import list_streams, read_stream, resolve_manifest
|
15
15
|
|
16
16
|
|
17
17
|
def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource:
|
@@ -9,11 +9,19 @@ from json import JSONDecodeError
|
|
9
9
|
from typing import Any, Iterable, Iterator, Mapping, Optional, Union
|
10
10
|
from urllib.parse import parse_qs, urlparse
|
11
11
|
|
12
|
-
from airbyte_cdk.models import
|
12
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages, StreamReadSlices
|
13
13
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
14
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
14
15
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
15
|
-
from airbyte_protocol.models.airbyte_protocol import
|
16
|
-
|
16
|
+
from airbyte_protocol.models.airbyte_protocol import (
|
17
|
+
AirbyteLogMessage,
|
18
|
+
AirbyteMessage,
|
19
|
+
AirbyteTraceMessage,
|
20
|
+
ConfiguredAirbyteCatalog,
|
21
|
+
Level,
|
22
|
+
TraceType,
|
23
|
+
)
|
24
|
+
from airbyte_protocol.models.airbyte_protocol import Type as MessageType
|
17
25
|
|
18
26
|
|
19
27
|
class MessageGrouper:
|
@@ -24,12 +32,13 @@ class MessageGrouper:
|
|
24
32
|
self._max_slices = max_slices
|
25
33
|
self._max_record_limit = max_record_limit
|
26
34
|
|
27
|
-
def get_message_groups(
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
35
|
+
def get_message_groups(
|
36
|
+
self,
|
37
|
+
source: DeclarativeSource,
|
38
|
+
config: Mapping[str, Any],
|
39
|
+
configured_catalog: ConfiguredAirbyteCatalog,
|
40
|
+
record_limit: Optional[int] = None,
|
41
|
+
) -> StreamRead:
|
33
42
|
if record_limit is not None and not (1 <= record_limit <= 1000):
|
34
43
|
raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
|
35
44
|
schema_inferrer = SchemaInferrer()
|
@@ -41,14 +50,18 @@ class MessageGrouper:
|
|
41
50
|
|
42
51
|
slices = []
|
43
52
|
log_messages = []
|
44
|
-
state = {} # No support for incremental sync
|
45
53
|
for message_group in self._get_message_groups(
|
46
|
-
|
54
|
+
self._read_stream(source, config, configured_catalog),
|
47
55
|
schema_inferrer,
|
48
56
|
record_limit,
|
49
57
|
):
|
50
58
|
if isinstance(message_group, AirbyteLogMessage):
|
51
|
-
log_messages.append({"message": message_group.message})
|
59
|
+
log_messages.append(LogMessage(**{"message": message_group.message, "level": message_group.level.value}))
|
60
|
+
elif isinstance(message_group, AirbyteTraceMessage):
|
61
|
+
if message_group.type == TraceType.ERROR:
|
62
|
+
error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
|
63
|
+
log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
|
64
|
+
|
52
65
|
else:
|
53
66
|
slices.append(message_group)
|
54
67
|
|
@@ -56,12 +69,14 @@ class MessageGrouper:
|
|
56
69
|
logs=log_messages,
|
57
70
|
slices=slices,
|
58
71
|
test_read_limit_reached=self._has_reached_limit(slices),
|
59
|
-
inferred_schema=schema_inferrer.get_stream_schema(
|
72
|
+
inferred_schema=schema_inferrer.get_stream_schema(
|
73
|
+
configured_catalog.streams[0].stream.name
|
74
|
+
), # The connector builder currently only supports reading from a single stream at a time
|
60
75
|
)
|
61
76
|
|
62
77
|
def _get_message_groups(
|
63
78
|
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
64
|
-
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]:
|
79
|
+
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage, AirbyteTraceMessage]]:
|
65
80
|
"""
|
66
81
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
67
82
|
and record messages belong to the prior request log message and when we encounter another request, append the latest
|
@@ -83,44 +98,56 @@ class MessageGrouper:
|
|
83
98
|
current_slice_pages = []
|
84
99
|
current_page_request: Optional[HttpRequest] = None
|
85
100
|
current_page_response: Optional[HttpResponse] = None
|
101
|
+
had_error = False
|
86
102
|
|
87
103
|
while records_count < limit and (message := next(messages, None)):
|
88
104
|
if self._need_to_close_page(at_least_one_page_in_group, message):
|
89
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
105
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, True)
|
90
106
|
current_page_request = None
|
91
107
|
current_page_response = None
|
92
108
|
|
93
|
-
if at_least_one_page_in_group and message.type ==
|
109
|
+
if at_least_one_page_in_group and message.type == MessageType.LOG and message.log.message.startswith("slice:"):
|
94
110
|
yield StreamReadSlices(pages=current_slice_pages)
|
95
111
|
current_slice_pages = []
|
96
112
|
at_least_one_page_in_group = False
|
97
|
-
elif message.type ==
|
113
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("request:"):
|
98
114
|
if not at_least_one_page_in_group:
|
99
115
|
at_least_one_page_in_group = True
|
100
116
|
current_page_request = self._create_request_from_log_message(message.log)
|
101
|
-
elif message.type ==
|
117
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("response:"):
|
102
118
|
current_page_response = self._create_response_from_log_message(message.log)
|
103
|
-
elif message.type ==
|
119
|
+
elif message.type == MessageType.LOG:
|
120
|
+
if message.log.level == Level.ERROR:
|
121
|
+
had_error = True
|
104
122
|
yield message.log
|
105
|
-
elif message.type ==
|
123
|
+
elif message.type == MessageType.TRACE:
|
124
|
+
if message.trace.type == TraceType.ERROR:
|
125
|
+
had_error = True
|
126
|
+
yield message.trace
|
127
|
+
elif message.type == MessageType.RECORD:
|
106
128
|
current_page_records.append(message.record.data)
|
107
129
|
records_count += 1
|
108
130
|
schema_inferrer.accumulate(message.record)
|
109
131
|
else:
|
110
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
132
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete=not had_error)
|
111
133
|
yield StreamReadSlices(pages=current_slice_pages)
|
112
134
|
|
113
135
|
@staticmethod
|
114
|
-
def _need_to_close_page(at_least_one_page_in_group, message):
|
136
|
+
def _need_to_close_page(at_least_one_page_in_group, message) -> bool:
|
115
137
|
return (
|
116
|
-
|
117
|
-
|
118
|
-
|
138
|
+
at_least_one_page_in_group
|
139
|
+
and message.type == MessageType.LOG
|
140
|
+
and (message.log.message.startswith("request:") or message.log.message.startswith("slice:"))
|
119
141
|
)
|
120
142
|
|
121
143
|
@staticmethod
|
122
|
-
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records):
|
123
|
-
|
144
|
+
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete: bool):
|
145
|
+
"""
|
146
|
+
Close a page when parsing message groups
|
147
|
+
@param validate_page_complete: in some cases, we expect the CDK to not return a response. As of today, this will only happen before
|
148
|
+
an uncaught exception and therefore, the assumption is that `validate_page_complete=True` only on the last page that is being closed
|
149
|
+
"""
|
150
|
+
if validate_page_complete and (not current_page_request or not current_page_response):
|
124
151
|
raise ValueError("Every message grouping should have at least one request and response")
|
125
152
|
|
126
153
|
current_slice_pages.append(
|
@@ -128,6 +155,15 @@ class MessageGrouper:
|
|
128
155
|
)
|
129
156
|
current_page_records.clear()
|
130
157
|
|
158
|
+
def _read_stream(self, source, config, configured_catalog) -> Iterator[AirbyteMessage]:
|
159
|
+
# the generator can raise an exception
|
160
|
+
# iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
|
161
|
+
try:
|
162
|
+
yield from source.read(logger=self.logger, config=config, catalog=configured_catalog, state={})
|
163
|
+
except Exception as e:
|
164
|
+
error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
|
165
|
+
yield AirbyteTracedException.from_exception(e, message=error_message).as_airbyte_message()
|
166
|
+
|
131
167
|
def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]:
|
132
168
|
# TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the
|
133
169
|
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
|
@@ -50,9 +50,15 @@ class StreamReadSlicesInner:
|
|
50
50
|
state: Optional[Dict[str, Any]]
|
51
51
|
|
52
52
|
|
53
|
+
@dataclass
|
54
|
+
class LogMessage:
|
55
|
+
message: str
|
56
|
+
level: str
|
57
|
+
|
58
|
+
|
53
59
|
@dataclass
|
54
60
|
class StreamRead(object):
|
55
|
-
logs: List[
|
61
|
+
logs: List[LogMessage]
|
56
62
|
slices: List[StreamReadSlicesInner]
|
57
63
|
test_read_limit_reached: bool
|
58
64
|
inferred_schema: Optional[Dict[str, Any]]
|
@@ -5,6 +5,11 @@ airbyte_cdk/entrypoint.py,sha256=9WkwB7Giqsz-VCaRkqCwrVIH9OfvULQs3-kz8IBfOh4,690
|
|
5
5
|
airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
|
6
6
|
airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
|
7
7
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
|
+
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=9iG2hmaJyBtLDtdwtQp8M19pG2HQjR7Yy6jnBDggXMk,4160
|
10
|
+
airbyte_cdk/connector_builder/main.py,sha256=IOijgSQ4A9KhqJplHSVPTrxH_cyGWW_9uNmSIwBo0l0,3021
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=-0AYh21eZeNJyyFPvSB84NNKMZznSX7bf4Ygi3deWEU,10972
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=y0PJ-LwJk3e1RzRmMfjQSBP9ENx_a0wBcWNCjlW72Ks,1832
|
8
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
9
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
10
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -152,16 +157,11 @@ airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2
|
|
152
157
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
153
158
|
airbyte_cdk/utils/schema_inferrer.py,sha256=LQLOlraFksg7_sqpJNhy9pS_K42GVxG634ogM_P2s5E,2361
|
154
159
|
airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnTQNDKyKGI,3216
|
155
|
-
connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
156
|
-
connector_builder/connector_builder_handler.py,sha256=QF0Rus1_NhKNfma8EtL6sVWSu8kySLDj2m5IcZ-9OsU,4167
|
157
|
-
connector_builder/main.py,sha256=6jnHUX5pVG3dVPC6D4oMGneoxzmbpBqZNB-rfj7_Mjc,3009
|
158
|
-
connector_builder/message_grouper.py,sha256=NkiDNvwXkP4EB-501hGpmZZCJjaJo7xYdGD2xmiGoH8,9121
|
159
|
-
connector_builder/models.py,sha256=mf9cAZaxFW7uFp0gyHiJpuGlAuGuq2zFvs76VMYqUkA,1765
|
160
160
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
161
161
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
162
162
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
163
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
164
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
163
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=fwOlQPdc9quPUoZYXg_GVXqdkw5TLWs0CuBjYEMAcfM,19745
|
164
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=1_DXyuqHmr8B7b1t8PSpmYWaHvYYAB2mMwpp_YAbYwc,22959
|
165
165
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
166
166
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
167
167
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -257,8 +257,8 @@ unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
257
257
|
unit_tests/utils/test_schema_inferrer.py,sha256=ckl17GlNOZInqgxni7Z2A0bg_p6JDy0GVFAG8ph67pw,3288
|
258
258
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
259
259
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
260
|
-
airbyte_cdk-0.
|
261
|
-
airbyte_cdk-0.
|
262
|
-
airbyte_cdk-0.
|
263
|
-
airbyte_cdk-0.
|
264
|
-
airbyte_cdk-0.
|
260
|
+
airbyte_cdk-0.31.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
261
|
+
airbyte_cdk-0.31.1.dist-info/METADATA,sha256=nSfqOPf-MueD0MHg9bTQU8PWrhP43lmxd6JK1IDNj-M,8902
|
262
|
+
airbyte_cdk-0.31.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
263
|
+
airbyte_cdk-0.31.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
264
|
+
airbyte_cdk-0.31.1.dist-info/RECORD,,
|
@@ -3,21 +3,22 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import copy
|
6
|
+
import dataclasses
|
6
7
|
import json
|
7
8
|
from unittest import mock
|
8
9
|
from unittest.mock import patch
|
9
10
|
|
10
|
-
import connector_builder
|
11
11
|
import pytest
|
12
|
+
from airbyte_cdk import connector_builder
|
13
|
+
from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, resolve_manifest
|
14
|
+
from airbyte_cdk.connector_builder.main import handle_connector_builder_request, handle_request, read_stream
|
15
|
+
from airbyte_cdk.connector_builder.models import LogMessage, StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
12
16
|
from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
|
13
17
|
from airbyte_cdk.models import Type as MessageType
|
14
18
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
15
19
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
16
20
|
from airbyte_cdk.sources.streams.core import Stream
|
17
21
|
from airbyte_cdk.sources.streams.http import HttpStream
|
18
|
-
from connector_builder.connector_builder_handler import list_streams, resolve_manifest
|
19
|
-
from connector_builder.main import handle_connector_builder_request, handle_request, read_stream
|
20
|
-
from connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
21
22
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
22
23
|
|
23
24
|
_stream_name = "stream_with_custom_requester"
|
@@ -215,7 +216,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file):
|
|
215
216
|
"primary_key": _stream_primary_key,
|
216
217
|
"url_base": _stream_url_base,
|
217
218
|
"$parameters": _stream_options,
|
218
|
-
"page_size": 10
|
219
|
+
"page_size": 10,
|
219
220
|
},
|
220
221
|
"name": _stream_name,
|
221
222
|
"primary_key": _stream_primary_key,
|
@@ -315,7 +316,7 @@ def test_read():
|
|
315
316
|
emitted_at=1,
|
316
317
|
),
|
317
318
|
)
|
318
|
-
with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read):
|
319
|
+
with patch("airbyte_cdk.connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read):
|
319
320
|
output_record = handle_connector_builder_request(
|
320
321
|
source, "test_read", config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)
|
321
322
|
)
|
@@ -323,14 +324,31 @@ def test_read():
|
|
323
324
|
assert output_record == expected_airbyte_message
|
324
325
|
|
325
326
|
|
326
|
-
|
327
|
+
@patch("traceback.TracebackException.from_exception")
|
328
|
+
def test_read_returns_error_response(mock_from_exception):
|
327
329
|
class MockManifestDeclarativeSource:
|
328
330
|
def read(self, logger, config, catalog, state):
|
329
|
-
raise ValueError
|
331
|
+
raise ValueError("error_message")
|
332
|
+
|
333
|
+
stack_trace = "a stack trace"
|
334
|
+
mock_from_exception.return_value = stack_trace
|
330
335
|
|
331
336
|
source = MockManifestDeclarativeSource()
|
332
337
|
response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG))
|
333
|
-
|
338
|
+
|
339
|
+
expected_stream_read = StreamRead(logs=[LogMessage("error_message - a stack trace", "ERROR")],
|
340
|
+
slices=[StreamReadSlicesInner(
|
341
|
+
pages=[StreamReadSlicesInnerPagesInner(records=[], request=None, response=None)],
|
342
|
+
slice_descriptor=None, state=None)],
|
343
|
+
test_read_limit_reached=False,
|
344
|
+
inferred_schema=None)
|
345
|
+
|
346
|
+
expected_message = AirbyteMessage(
|
347
|
+
type=MessageType.RECORD,
|
348
|
+
record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(expected_stream_read), emitted_at=1),
|
349
|
+
)
|
350
|
+
response.record.emitted_at = 1
|
351
|
+
assert response == expected_message
|
334
352
|
|
335
353
|
|
336
354
|
@pytest.mark.parametrize(
|
@@ -385,10 +403,12 @@ def test_list_streams(manifest_declarative_source):
|
|
385
403
|
|
386
404
|
assert result.type == MessageType.RECORD
|
387
405
|
assert result.record.stream == "list_streams"
|
388
|
-
assert result.record.data == {
|
389
|
-
|
390
|
-
|
391
|
-
|
406
|
+
assert result.record.data == {
|
407
|
+
"streams": [
|
408
|
+
{"name": "a name", "url": "https://a-url-base.com/a-path"},
|
409
|
+
{"name": "another name", "url": "https://another-url-base.com/another-path"},
|
410
|
+
]
|
411
|
+
}
|
392
412
|
|
393
413
|
|
394
414
|
def test_given_stream_is_not_declarative_stream_when_list_streams_then_return_exception_message(manifest_declarative_source):
|
@@ -7,17 +7,17 @@ from typing import Iterator
|
|
7
7
|
from unittest.mock import MagicMock
|
8
8
|
|
9
9
|
import pytest
|
10
|
+
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
11
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages
|
10
12
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level
|
11
13
|
from airbyte_cdk.models import Type as MessageType
|
12
|
-
from connector_builder.message_grouper import MessageGrouper
|
13
|
-
from connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages
|
14
14
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
15
15
|
|
16
16
|
MAX_PAGES_PER_SLICE = 4
|
17
17
|
MAX_SLICES = 3
|
18
18
|
|
19
19
|
MANIFEST = {
|
20
|
-
"version": "0.
|
20
|
+
"version": "0.30.0",
|
21
21
|
"type": "DeclarativeSource",
|
22
22
|
"definitions": {
|
23
23
|
"selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"},
|
@@ -124,8 +124,9 @@ def test_get_grouped_messages():
|
|
124
124
|
)
|
125
125
|
|
126
126
|
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
127
|
-
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
128
|
-
|
127
|
+
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
128
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
129
|
+
)
|
129
130
|
assert actual_response.inferred_schema == expected_schema
|
130
131
|
|
131
132
|
single_slice = actual_response.slices[0]
|
@@ -166,9 +167,9 @@ def test_get_grouped_messages_with_logs():
|
|
166
167
|
),
|
167
168
|
]
|
168
169
|
expected_logs = [
|
169
|
-
{"message": "log message before the request"},
|
170
|
-
{"message": "log message during the page"},
|
171
|
-
{"message": "log message after the response"},
|
170
|
+
LogMessage(**{"message": "log message before the request", "level": "INFO"}),
|
171
|
+
LogMessage(**{"message": "log message during the page", "level": "INFO"}),
|
172
|
+
LogMessage(**{"message": "log message after the response", "level": "INFO"}),
|
172
173
|
]
|
173
174
|
|
174
175
|
mock_source = make_mock_source(
|
@@ -187,8 +188,9 @@ def test_get_grouped_messages_with_logs():
|
|
187
188
|
|
188
189
|
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
189
190
|
|
190
|
-
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
191
|
-
|
191
|
+
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
192
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
193
|
+
)
|
192
194
|
single_slice = actual_response.slices[0]
|
193
195
|
for i, actual_page in enumerate(single_slice.pages):
|
194
196
|
assert actual_page == expected_pages[i]
|
@@ -229,9 +231,9 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi
|
|
229
231
|
record_limit = min(request_record_limit, max_record_limit)
|
230
232
|
|
231
233
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit)
|
232
|
-
actual_response: StreamRead = api.get_message_groups(
|
233
|
-
|
234
|
-
|
234
|
+
actual_response: StreamRead = api.get_message_groups(
|
235
|
+
mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras"), record_limit=request_record_limit
|
236
|
+
)
|
235
237
|
single_slice = actual_response.slices[0]
|
236
238
|
total_records = 0
|
237
239
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -270,8 +272,9 @@ def test_get_grouped_messages_default_record_limit(max_record_limit):
|
|
270
272
|
n_records = 2
|
271
273
|
|
272
274
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit)
|
273
|
-
actual_response: StreamRead = api.get_message_groups(
|
274
|
-
|
275
|
+
actual_response: StreamRead = api.get_message_groups(
|
276
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
277
|
+
)
|
275
278
|
single_slice = actual_response.slices[0]
|
276
279
|
total_records = 0
|
277
280
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -352,8 +355,9 @@ def test_get_grouped_messages_no_records():
|
|
352
355
|
|
353
356
|
message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
354
357
|
|
355
|
-
actual_response: StreamRead = message_grouper.get_message_groups(
|
356
|
-
|
358
|
+
actual_response: StreamRead = message_grouper.get_message_groups(
|
359
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
360
|
+
)
|
357
361
|
|
358
362
|
single_slice = actual_response.slices[0]
|
359
363
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -459,8 +463,9 @@ def test_get_grouped_messages_with_many_slices():
|
|
459
463
|
|
460
464
|
connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
461
465
|
|
462
|
-
stream_read: StreamRead = connecto_builder_handler.get_message_groups(
|
463
|
-
|
466
|
+
stream_read: StreamRead = connecto_builder_handler.get_message_groups(
|
467
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
468
|
+
)
|
464
469
|
|
465
470
|
assert not stream_read.test_read_limit_reached
|
466
471
|
assert len(stream_read.slices) == 2
|
@@ -484,8 +489,9 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi
|
|
484
489
|
|
485
490
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
486
491
|
|
487
|
-
stream_read: StreamRead = api.get_message_groups(
|
488
|
-
|
492
|
+
stream_read: StreamRead = api.get_message_groups(
|
493
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
494
|
+
)
|
489
495
|
|
490
496
|
assert stream_read.test_read_limit_reached
|
491
497
|
|
@@ -500,12 +506,28 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit
|
|
500
506
|
|
501
507
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
502
508
|
|
503
|
-
stream_read: StreamRead = api.get_message_groups(
|
504
|
-
|
509
|
+
stream_read: StreamRead = api.get_message_groups(
|
510
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
511
|
+
)
|
505
512
|
|
506
513
|
assert stream_read.test_read_limit_reached
|
507
514
|
|
508
515
|
|
516
|
+
def test_read_stream_returns_error_if_stream_does_not_exist():
|
517
|
+
mock_source = MagicMock()
|
518
|
+
mock_source.read.side_effect = ValueError("error")
|
519
|
+
|
520
|
+
full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
|
521
|
+
|
522
|
+
message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
523
|
+
actual_response = message_grouper.get_message_groups(source=mock_source, config=full_config,
|
524
|
+
configured_catalog=create_configured_catalog("not_in_manifest"))
|
525
|
+
|
526
|
+
assert 1 == len(actual_response.logs)
|
527
|
+
assert "Traceback" in actual_response.logs[0].message
|
528
|
+
assert "ERROR" in actual_response.logs[0].level
|
529
|
+
|
530
|
+
|
509
531
|
def make_mock_source(return_value: Iterator) -> MagicMock:
|
510
532
|
mock_source = MagicMock()
|
511
533
|
mock_source.read.return_value = return_value
|
File without changes
|
File without changes
|
File without changes
|