airbyte-cdk 0.30.4__py3-none-any.whl → 0.31.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- {connector_builder → airbyte_cdk/connector_builder}/connector_builder_handler.py +9 -10
- {connector_builder → airbyte_cdk/connector_builder}/main.py +1 -1
- {connector_builder → airbyte_cdk/connector_builder}/message_grouper.py +63 -27
- {connector_builder → airbyte_cdk/connector_builder}/models.py +7 -1
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/RECORD +12 -12
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/top_level.txt +0 -1
- unit_tests/connector_builder/test_connector_builder_handler.py +33 -13
- unit_tests/connector_builder/test_message_grouper.py +45 -23
- {connector_builder → airbyte_cdk/connector_builder}/__init__.py +0 -0
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.30.4.dist-info → airbyte_cdk-0.31.1.dist-info}/WHEEL +0 -0
@@ -7,6 +7,7 @@ from datetime import datetime
|
|
7
7
|
from typing import Any, Dict, List, Mapping
|
8
8
|
from urllib.parse import urljoin
|
9
9
|
|
10
|
+
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
10
11
|
from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
|
11
12
|
from airbyte_cdk.models import Type
|
12
13
|
from airbyte_cdk.models import Type as MessageType
|
@@ -15,7 +16,6 @@ from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
|
15
16
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
16
17
|
from airbyte_cdk.sources.streams.http import HttpStream
|
17
18
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
18
|
-
from connector_builder.message_grouper import MessageGrouper
|
19
19
|
|
20
20
|
DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
|
21
21
|
DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5
|
@@ -31,13 +31,14 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured
|
|
31
31
|
handler = MessageGrouper(max_pages_per_slice, max_slices)
|
32
32
|
stream_name = configured_catalog.streams[0].stream.name # The connector builder only supports a single stream
|
33
33
|
stream_read = handler.get_message_groups(source, config, configured_catalog, max_records)
|
34
|
-
return AirbyteMessage(
|
35
|
-
|
36
|
-
stream=stream_name,
|
37
|
-
|
38
|
-
))
|
34
|
+
return AirbyteMessage(
|
35
|
+
type=MessageType.RECORD,
|
36
|
+
record=AirbyteRecordMessage(data=dataclasses.asdict(stream_read), stream=stream_name, emitted_at=_emitted_at()),
|
37
|
+
)
|
39
38
|
except Exception as exc:
|
40
|
-
error = AirbyteTracedException.from_exception(
|
39
|
+
error = AirbyteTracedException.from_exception(
|
40
|
+
exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}"
|
41
|
+
)
|
41
42
|
return error.as_airbyte_message()
|
42
43
|
|
43
44
|
|
@@ -85,9 +86,7 @@ def _get_http_streams(source: ManifestDeclarativeSource, config: Dict[str, Any])
|
|
85
86
|
f"A declarative stream should only have a retriever of type HttpStream, but received: {stream.retriever.__class__}"
|
86
87
|
)
|
87
88
|
else:
|
88
|
-
raise TypeError(
|
89
|
-
f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}"
|
90
|
-
)
|
89
|
+
raise TypeError(f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}")
|
91
90
|
return http_streams
|
92
91
|
|
93
92
|
|
@@ -7,11 +7,11 @@ import sys
|
|
7
7
|
from typing import Any, List, Mapping, Optional, Tuple
|
8
8
|
|
9
9
|
from airbyte_cdk.connector import BaseConnector
|
10
|
+
from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, read_stream, resolve_manifest
|
10
11
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
11
12
|
from airbyte_cdk.models import ConfiguredAirbyteCatalog
|
12
13
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
13
14
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
14
|
-
from connector_builder.connector_builder_handler import list_streams, read_stream, resolve_manifest
|
15
15
|
|
16
16
|
|
17
17
|
def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource:
|
@@ -9,11 +9,19 @@ from json import JSONDecodeError
|
|
9
9
|
from typing import Any, Iterable, Iterator, Mapping, Optional, Union
|
10
10
|
from urllib.parse import parse_qs, urlparse
|
11
11
|
|
12
|
-
from airbyte_cdk.models import
|
12
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages, StreamReadSlices
|
13
13
|
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
14
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
14
15
|
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
|
15
|
-
from airbyte_protocol.models.airbyte_protocol import
|
16
|
-
|
16
|
+
from airbyte_protocol.models.airbyte_protocol import (
|
17
|
+
AirbyteLogMessage,
|
18
|
+
AirbyteMessage,
|
19
|
+
AirbyteTraceMessage,
|
20
|
+
ConfiguredAirbyteCatalog,
|
21
|
+
Level,
|
22
|
+
TraceType,
|
23
|
+
)
|
24
|
+
from airbyte_protocol.models.airbyte_protocol import Type as MessageType
|
17
25
|
|
18
26
|
|
19
27
|
class MessageGrouper:
|
@@ -24,12 +32,13 @@ class MessageGrouper:
|
|
24
32
|
self._max_slices = max_slices
|
25
33
|
self._max_record_limit = max_record_limit
|
26
34
|
|
27
|
-
def get_message_groups(
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
35
|
+
def get_message_groups(
|
36
|
+
self,
|
37
|
+
source: DeclarativeSource,
|
38
|
+
config: Mapping[str, Any],
|
39
|
+
configured_catalog: ConfiguredAirbyteCatalog,
|
40
|
+
record_limit: Optional[int] = None,
|
41
|
+
) -> StreamRead:
|
33
42
|
if record_limit is not None and not (1 <= record_limit <= 1000):
|
34
43
|
raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
|
35
44
|
schema_inferrer = SchemaInferrer()
|
@@ -41,14 +50,18 @@ class MessageGrouper:
|
|
41
50
|
|
42
51
|
slices = []
|
43
52
|
log_messages = []
|
44
|
-
state = {} # No support for incremental sync
|
45
53
|
for message_group in self._get_message_groups(
|
46
|
-
|
54
|
+
self._read_stream(source, config, configured_catalog),
|
47
55
|
schema_inferrer,
|
48
56
|
record_limit,
|
49
57
|
):
|
50
58
|
if isinstance(message_group, AirbyteLogMessage):
|
51
|
-
log_messages.append({"message": message_group.message})
|
59
|
+
log_messages.append(LogMessage(**{"message": message_group.message, "level": message_group.level.value}))
|
60
|
+
elif isinstance(message_group, AirbyteTraceMessage):
|
61
|
+
if message_group.type == TraceType.ERROR:
|
62
|
+
error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
|
63
|
+
log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
|
64
|
+
|
52
65
|
else:
|
53
66
|
slices.append(message_group)
|
54
67
|
|
@@ -56,12 +69,14 @@ class MessageGrouper:
|
|
56
69
|
logs=log_messages,
|
57
70
|
slices=slices,
|
58
71
|
test_read_limit_reached=self._has_reached_limit(slices),
|
59
|
-
inferred_schema=schema_inferrer.get_stream_schema(
|
72
|
+
inferred_schema=schema_inferrer.get_stream_schema(
|
73
|
+
configured_catalog.streams[0].stream.name
|
74
|
+
), # The connector builder currently only supports reading from a single stream at a time
|
60
75
|
)
|
61
76
|
|
62
77
|
def _get_message_groups(
|
63
78
|
self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
|
64
|
-
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]:
|
79
|
+
) -> Iterable[Union[StreamReadPages, AirbyteLogMessage, AirbyteTraceMessage]]:
|
65
80
|
"""
|
66
81
|
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
67
82
|
and record messages belong to the prior request log message and when we encounter another request, append the latest
|
@@ -83,44 +98,56 @@ class MessageGrouper:
|
|
83
98
|
current_slice_pages = []
|
84
99
|
current_page_request: Optional[HttpRequest] = None
|
85
100
|
current_page_response: Optional[HttpResponse] = None
|
101
|
+
had_error = False
|
86
102
|
|
87
103
|
while records_count < limit and (message := next(messages, None)):
|
88
104
|
if self._need_to_close_page(at_least_one_page_in_group, message):
|
89
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
105
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, True)
|
90
106
|
current_page_request = None
|
91
107
|
current_page_response = None
|
92
108
|
|
93
|
-
if at_least_one_page_in_group and message.type ==
|
109
|
+
if at_least_one_page_in_group and message.type == MessageType.LOG and message.log.message.startswith("slice:"):
|
94
110
|
yield StreamReadSlices(pages=current_slice_pages)
|
95
111
|
current_slice_pages = []
|
96
112
|
at_least_one_page_in_group = False
|
97
|
-
elif message.type ==
|
113
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("request:"):
|
98
114
|
if not at_least_one_page_in_group:
|
99
115
|
at_least_one_page_in_group = True
|
100
116
|
current_page_request = self._create_request_from_log_message(message.log)
|
101
|
-
elif message.type ==
|
117
|
+
elif message.type == MessageType.LOG and message.log.message.startswith("response:"):
|
102
118
|
current_page_response = self._create_response_from_log_message(message.log)
|
103
|
-
elif message.type ==
|
119
|
+
elif message.type == MessageType.LOG:
|
120
|
+
if message.log.level == Level.ERROR:
|
121
|
+
had_error = True
|
104
122
|
yield message.log
|
105
|
-
elif message.type ==
|
123
|
+
elif message.type == MessageType.TRACE:
|
124
|
+
if message.trace.type == TraceType.ERROR:
|
125
|
+
had_error = True
|
126
|
+
yield message.trace
|
127
|
+
elif message.type == MessageType.RECORD:
|
106
128
|
current_page_records.append(message.record.data)
|
107
129
|
records_count += 1
|
108
130
|
schema_inferrer.accumulate(message.record)
|
109
131
|
else:
|
110
|
-
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
|
132
|
+
self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete=not had_error)
|
111
133
|
yield StreamReadSlices(pages=current_slice_pages)
|
112
134
|
|
113
135
|
@staticmethod
|
114
|
-
def _need_to_close_page(at_least_one_page_in_group, message):
|
136
|
+
def _need_to_close_page(at_least_one_page_in_group, message) -> bool:
|
115
137
|
return (
|
116
|
-
|
117
|
-
|
118
|
-
|
138
|
+
at_least_one_page_in_group
|
139
|
+
and message.type == MessageType.LOG
|
140
|
+
and (message.log.message.startswith("request:") or message.log.message.startswith("slice:"))
|
119
141
|
)
|
120
142
|
|
121
143
|
@staticmethod
|
122
|
-
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records):
|
123
|
-
|
144
|
+
def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete: bool):
|
145
|
+
"""
|
146
|
+
Close a page when parsing message groups
|
147
|
+
@param validate_page_complete: in some cases, we expect the CDK to not return a response. As of today, this will only happen before
|
148
|
+
an uncaught exception and therefore, the assumption is that `validate_page_complete=True` only on the last page that is being closed
|
149
|
+
"""
|
150
|
+
if validate_page_complete and (not current_page_request or not current_page_response):
|
124
151
|
raise ValueError("Every message grouping should have at least one request and response")
|
125
152
|
|
126
153
|
current_slice_pages.append(
|
@@ -128,6 +155,15 @@ class MessageGrouper:
|
|
128
155
|
)
|
129
156
|
current_page_records.clear()
|
130
157
|
|
158
|
+
def _read_stream(self, source, config, configured_catalog) -> Iterator[AirbyteMessage]:
|
159
|
+
# the generator can raise an exception
|
160
|
+
# iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
|
161
|
+
try:
|
162
|
+
yield from source.read(logger=self.logger, config=config, catalog=configured_catalog, state={})
|
163
|
+
except Exception as e:
|
164
|
+
error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
|
165
|
+
yield AirbyteTracedException.from_exception(e, message=error_message).as_airbyte_message()
|
166
|
+
|
131
167
|
def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]:
|
132
168
|
# TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the
|
133
169
|
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
|
@@ -50,9 +50,15 @@ class StreamReadSlicesInner:
|
|
50
50
|
state: Optional[Dict[str, Any]]
|
51
51
|
|
52
52
|
|
53
|
+
@dataclass
|
54
|
+
class LogMessage:
|
55
|
+
message: str
|
56
|
+
level: str
|
57
|
+
|
58
|
+
|
53
59
|
@dataclass
|
54
60
|
class StreamRead(object):
|
55
|
-
logs: List[
|
61
|
+
logs: List[LogMessage]
|
56
62
|
slices: List[StreamReadSlicesInner]
|
57
63
|
test_read_limit_reached: bool
|
58
64
|
inferred_schema: Optional[Dict[str, Any]]
|
@@ -5,6 +5,11 @@ airbyte_cdk/entrypoint.py,sha256=9WkwB7Giqsz-VCaRkqCwrVIH9OfvULQs3-kz8IBfOh4,690
|
|
5
5
|
airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
|
6
6
|
airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
|
7
7
|
airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
8
|
+
airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
9
|
+
airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=9iG2hmaJyBtLDtdwtQp8M19pG2HQjR7Yy6jnBDggXMk,4160
|
10
|
+
airbyte_cdk/connector_builder/main.py,sha256=IOijgSQ4A9KhqJplHSVPTrxH_cyGWW_9uNmSIwBo0l0,3021
|
11
|
+
airbyte_cdk/connector_builder/message_grouper.py,sha256=-0AYh21eZeNJyyFPvSB84NNKMZznSX7bf4Ygi3deWEU,10972
|
12
|
+
airbyte_cdk/connector_builder/models.py,sha256=y0PJ-LwJk3e1RzRmMfjQSBP9ENx_a0wBcWNCjlW72Ks,1832
|
8
13
|
airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
|
9
14
|
airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
|
10
15
|
airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
|
@@ -152,16 +157,11 @@ airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2
|
|
152
157
|
airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
|
153
158
|
airbyte_cdk/utils/schema_inferrer.py,sha256=LQLOlraFksg7_sqpJNhy9pS_K42GVxG634ogM_P2s5E,2361
|
154
159
|
airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnTQNDKyKGI,3216
|
155
|
-
connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
156
|
-
connector_builder/connector_builder_handler.py,sha256=QF0Rus1_NhKNfma8EtL6sVWSu8kySLDj2m5IcZ-9OsU,4167
|
157
|
-
connector_builder/main.py,sha256=6jnHUX5pVG3dVPC6D4oMGneoxzmbpBqZNB-rfj7_Mjc,3009
|
158
|
-
connector_builder/message_grouper.py,sha256=NkiDNvwXkP4EB-501hGpmZZCJjaJo7xYdGD2xmiGoH8,9121
|
159
|
-
connector_builder/models.py,sha256=mf9cAZaxFW7uFp0gyHiJpuGlAuGuq2zFvs76VMYqUkA,1765
|
160
160
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
161
161
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
162
162
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
163
|
-
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=
|
164
|
-
unit_tests/connector_builder/test_message_grouper.py,sha256=
|
163
|
+
unit_tests/connector_builder/test_connector_builder_handler.py,sha256=fwOlQPdc9quPUoZYXg_GVXqdkw5TLWs0CuBjYEMAcfM,19745
|
164
|
+
unit_tests/connector_builder/test_message_grouper.py,sha256=1_DXyuqHmr8B7b1t8PSpmYWaHvYYAB2mMwpp_YAbYwc,22959
|
165
165
|
unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
|
166
166
|
unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
167
167
|
unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
|
@@ -257,8 +257,8 @@ unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
|
|
257
257
|
unit_tests/utils/test_schema_inferrer.py,sha256=ckl17GlNOZInqgxni7Z2A0bg_p6JDy0GVFAG8ph67pw,3288
|
258
258
|
unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
|
259
259
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
260
|
-
airbyte_cdk-0.
|
261
|
-
airbyte_cdk-0.
|
262
|
-
airbyte_cdk-0.
|
263
|
-
airbyte_cdk-0.
|
264
|
-
airbyte_cdk-0.
|
260
|
+
airbyte_cdk-0.31.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
261
|
+
airbyte_cdk-0.31.1.dist-info/METADATA,sha256=nSfqOPf-MueD0MHg9bTQU8PWrhP43lmxd6JK1IDNj-M,8902
|
262
|
+
airbyte_cdk-0.31.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
263
|
+
airbyte_cdk-0.31.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
264
|
+
airbyte_cdk-0.31.1.dist-info/RECORD,,
|
@@ -3,21 +3,22 @@
|
|
3
3
|
#
|
4
4
|
|
5
5
|
import copy
|
6
|
+
import dataclasses
|
6
7
|
import json
|
7
8
|
from unittest import mock
|
8
9
|
from unittest.mock import patch
|
9
10
|
|
10
|
-
import connector_builder
|
11
11
|
import pytest
|
12
|
+
from airbyte_cdk import connector_builder
|
13
|
+
from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, resolve_manifest
|
14
|
+
from airbyte_cdk.connector_builder.main import handle_connector_builder_request, handle_request, read_stream
|
15
|
+
from airbyte_cdk.connector_builder.models import LogMessage, StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
12
16
|
from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
|
13
17
|
from airbyte_cdk.models import Type as MessageType
|
14
18
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
15
19
|
from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
16
20
|
from airbyte_cdk.sources.streams.core import Stream
|
17
21
|
from airbyte_cdk.sources.streams.http import HttpStream
|
18
|
-
from connector_builder.connector_builder_handler import list_streams, resolve_manifest
|
19
|
-
from connector_builder.main import handle_connector_builder_request, handle_request, read_stream
|
20
|
-
from connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
|
21
22
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
22
23
|
|
23
24
|
_stream_name = "stream_with_custom_requester"
|
@@ -215,7 +216,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file):
|
|
215
216
|
"primary_key": _stream_primary_key,
|
216
217
|
"url_base": _stream_url_base,
|
217
218
|
"$parameters": _stream_options,
|
218
|
-
"page_size": 10
|
219
|
+
"page_size": 10,
|
219
220
|
},
|
220
221
|
"name": _stream_name,
|
221
222
|
"primary_key": _stream_primary_key,
|
@@ -315,7 +316,7 @@ def test_read():
|
|
315
316
|
emitted_at=1,
|
316
317
|
),
|
317
318
|
)
|
318
|
-
with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read):
|
319
|
+
with patch("airbyte_cdk.connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read):
|
319
320
|
output_record = handle_connector_builder_request(
|
320
321
|
source, "test_read", config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)
|
321
322
|
)
|
@@ -323,14 +324,31 @@ def test_read():
|
|
323
324
|
assert output_record == expected_airbyte_message
|
324
325
|
|
325
326
|
|
326
|
-
|
327
|
+
@patch("traceback.TracebackException.from_exception")
|
328
|
+
def test_read_returns_error_response(mock_from_exception):
|
327
329
|
class MockManifestDeclarativeSource:
|
328
330
|
def read(self, logger, config, catalog, state):
|
329
|
-
raise ValueError
|
331
|
+
raise ValueError("error_message")
|
332
|
+
|
333
|
+
stack_trace = "a stack trace"
|
334
|
+
mock_from_exception.return_value = stack_trace
|
330
335
|
|
331
336
|
source = MockManifestDeclarativeSource()
|
332
337
|
response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG))
|
333
|
-
|
338
|
+
|
339
|
+
expected_stream_read = StreamRead(logs=[LogMessage("error_message - a stack trace", "ERROR")],
|
340
|
+
slices=[StreamReadSlicesInner(
|
341
|
+
pages=[StreamReadSlicesInnerPagesInner(records=[], request=None, response=None)],
|
342
|
+
slice_descriptor=None, state=None)],
|
343
|
+
test_read_limit_reached=False,
|
344
|
+
inferred_schema=None)
|
345
|
+
|
346
|
+
expected_message = AirbyteMessage(
|
347
|
+
type=MessageType.RECORD,
|
348
|
+
record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(expected_stream_read), emitted_at=1),
|
349
|
+
)
|
350
|
+
response.record.emitted_at = 1
|
351
|
+
assert response == expected_message
|
334
352
|
|
335
353
|
|
336
354
|
@pytest.mark.parametrize(
|
@@ -385,10 +403,12 @@ def test_list_streams(manifest_declarative_source):
|
|
385
403
|
|
386
404
|
assert result.type == MessageType.RECORD
|
387
405
|
assert result.record.stream == "list_streams"
|
388
|
-
assert result.record.data == {
|
389
|
-
|
390
|
-
|
391
|
-
|
406
|
+
assert result.record.data == {
|
407
|
+
"streams": [
|
408
|
+
{"name": "a name", "url": "https://a-url-base.com/a-path"},
|
409
|
+
{"name": "another name", "url": "https://another-url-base.com/another-path"},
|
410
|
+
]
|
411
|
+
}
|
392
412
|
|
393
413
|
|
394
414
|
def test_given_stream_is_not_declarative_stream_when_list_streams_then_return_exception_message(manifest_declarative_source):
|
@@ -7,17 +7,17 @@ from typing import Iterator
|
|
7
7
|
from unittest.mock import MagicMock
|
8
8
|
|
9
9
|
import pytest
|
10
|
+
from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
|
11
|
+
from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages
|
10
12
|
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level
|
11
13
|
from airbyte_cdk.models import Type as MessageType
|
12
|
-
from connector_builder.message_grouper import MessageGrouper
|
13
|
-
from connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages
|
14
14
|
from unit_tests.connector_builder.utils import create_configured_catalog
|
15
15
|
|
16
16
|
MAX_PAGES_PER_SLICE = 4
|
17
17
|
MAX_SLICES = 3
|
18
18
|
|
19
19
|
MANIFEST = {
|
20
|
-
"version": "0.
|
20
|
+
"version": "0.30.0",
|
21
21
|
"type": "DeclarativeSource",
|
22
22
|
"definitions": {
|
23
23
|
"selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"},
|
@@ -124,8 +124,9 @@ def test_get_grouped_messages():
|
|
124
124
|
)
|
125
125
|
|
126
126
|
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
127
|
-
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
128
|
-
|
127
|
+
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
128
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
129
|
+
)
|
129
130
|
assert actual_response.inferred_schema == expected_schema
|
130
131
|
|
131
132
|
single_slice = actual_response.slices[0]
|
@@ -166,9 +167,9 @@ def test_get_grouped_messages_with_logs():
|
|
166
167
|
),
|
167
168
|
]
|
168
169
|
expected_logs = [
|
169
|
-
{"message": "log message before the request"},
|
170
|
-
{"message": "log message during the page"},
|
171
|
-
{"message": "log message after the response"},
|
170
|
+
LogMessage(**{"message": "log message before the request", "level": "INFO"}),
|
171
|
+
LogMessage(**{"message": "log message during the page", "level": "INFO"}),
|
172
|
+
LogMessage(**{"message": "log message after the response", "level": "INFO"}),
|
172
173
|
]
|
173
174
|
|
174
175
|
mock_source = make_mock_source(
|
@@ -187,8 +188,9 @@ def test_get_grouped_messages_with_logs():
|
|
187
188
|
|
188
189
|
connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
189
190
|
|
190
|
-
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
191
|
-
|
191
|
+
actual_response: StreamRead = connector_builder_handler.get_message_groups(
|
192
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
193
|
+
)
|
192
194
|
single_slice = actual_response.slices[0]
|
193
195
|
for i, actual_page in enumerate(single_slice.pages):
|
194
196
|
assert actual_page == expected_pages[i]
|
@@ -229,9 +231,9 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi
|
|
229
231
|
record_limit = min(request_record_limit, max_record_limit)
|
230
232
|
|
231
233
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit)
|
232
|
-
actual_response: StreamRead = api.get_message_groups(
|
233
|
-
|
234
|
-
|
234
|
+
actual_response: StreamRead = api.get_message_groups(
|
235
|
+
mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras"), record_limit=request_record_limit
|
236
|
+
)
|
235
237
|
single_slice = actual_response.slices[0]
|
236
238
|
total_records = 0
|
237
239
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -270,8 +272,9 @@ def test_get_grouped_messages_default_record_limit(max_record_limit):
|
|
270
272
|
n_records = 2
|
271
273
|
|
272
274
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit)
|
273
|
-
actual_response: StreamRead = api.get_message_groups(
|
274
|
-
|
275
|
+
actual_response: StreamRead = api.get_message_groups(
|
276
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
277
|
+
)
|
275
278
|
single_slice = actual_response.slices[0]
|
276
279
|
total_records = 0
|
277
280
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -352,8 +355,9 @@ def test_get_grouped_messages_no_records():
|
|
352
355
|
|
353
356
|
message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
354
357
|
|
355
|
-
actual_response: StreamRead = message_grouper.get_message_groups(
|
356
|
-
|
358
|
+
actual_response: StreamRead = message_grouper.get_message_groups(
|
359
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
360
|
+
)
|
357
361
|
|
358
362
|
single_slice = actual_response.slices[0]
|
359
363
|
for i, actual_page in enumerate(single_slice.pages):
|
@@ -459,8 +463,9 @@ def test_get_grouped_messages_with_many_slices():
|
|
459
463
|
|
460
464
|
connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
461
465
|
|
462
|
-
stream_read: StreamRead = connecto_builder_handler.get_message_groups(
|
463
|
-
|
466
|
+
stream_read: StreamRead = connecto_builder_handler.get_message_groups(
|
467
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
468
|
+
)
|
464
469
|
|
465
470
|
assert not stream_read.test_read_limit_reached
|
466
471
|
assert len(stream_read.slices) == 2
|
@@ -484,8 +489,9 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi
|
|
484
489
|
|
485
490
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
486
491
|
|
487
|
-
stream_read: StreamRead = api.get_message_groups(
|
488
|
-
|
492
|
+
stream_read: StreamRead = api.get_message_groups(
|
493
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
494
|
+
)
|
489
495
|
|
490
496
|
assert stream_read.test_read_limit_reached
|
491
497
|
|
@@ -500,12 +506,28 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit
|
|
500
506
|
|
501
507
|
api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
502
508
|
|
503
|
-
stream_read: StreamRead = api.get_message_groups(
|
504
|
-
|
509
|
+
stream_read: StreamRead = api.get_message_groups(
|
510
|
+
source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
|
511
|
+
)
|
505
512
|
|
506
513
|
assert stream_read.test_read_limit_reached
|
507
514
|
|
508
515
|
|
516
|
+
def test_read_stream_returns_error_if_stream_does_not_exist():
|
517
|
+
mock_source = MagicMock()
|
518
|
+
mock_source.read.side_effect = ValueError("error")
|
519
|
+
|
520
|
+
full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
|
521
|
+
|
522
|
+
message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
|
523
|
+
actual_response = message_grouper.get_message_groups(source=mock_source, config=full_config,
|
524
|
+
configured_catalog=create_configured_catalog("not_in_manifest"))
|
525
|
+
|
526
|
+
assert 1 == len(actual_response.logs)
|
527
|
+
assert "Traceback" in actual_response.logs[0].message
|
528
|
+
assert "ERROR" in actual_response.logs[0].level
|
529
|
+
|
530
|
+
|
509
531
|
def make_mock_source(return_value: Iterator) -> MagicMock:
|
510
532
|
mock_source = MagicMock()
|
511
533
|
mock_source.read.return_value = return_value
|
File without changes
|
File without changes
|
File without changes
|