airbyte-cdk 0.30.4__py3-none-any.whl → 0.31.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -7,6 +7,7 @@ from datetime import datetime
7
7
  from typing import Any, Dict, List, Mapping
8
8
  from urllib.parse import urljoin
9
9
 
10
+ from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
10
11
  from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
11
12
  from airbyte_cdk.models import Type
12
13
  from airbyte_cdk.models import Type as MessageType
@@ -15,7 +16,6 @@ from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
15
16
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
16
17
  from airbyte_cdk.sources.streams.http import HttpStream
17
18
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
18
- from connector_builder.message_grouper import MessageGrouper
19
19
 
20
20
  DEFAULT_MAXIMUM_NUMBER_OF_PAGES_PER_SLICE = 5
21
21
  DEFAULT_MAXIMUM_NUMBER_OF_SLICES = 5
@@ -31,13 +31,14 @@ def read_stream(source: DeclarativeSource, config: Mapping[str, Any], configured
31
31
  handler = MessageGrouper(max_pages_per_slice, max_slices)
32
32
  stream_name = configured_catalog.streams[0].stream.name # The connector builder only supports a single stream
33
33
  stream_read = handler.get_message_groups(source, config, configured_catalog, max_records)
34
- return AirbyteMessage(type=MessageType.RECORD, record=AirbyteRecordMessage(
35
- data=dataclasses.asdict(stream_read),
36
- stream=stream_name,
37
- emitted_at=_emitted_at()
38
- ))
34
+ return AirbyteMessage(
35
+ type=MessageType.RECORD,
36
+ record=AirbyteRecordMessage(data=dataclasses.asdict(stream_read), stream=stream_name, emitted_at=_emitted_at()),
37
+ )
39
38
  except Exception as exc:
40
- error = AirbyteTracedException.from_exception(exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}")
39
+ error = AirbyteTracedException.from_exception(
40
+ exc, message=f"Error reading stream with config={config} and catalog={configured_catalog}"
41
+ )
41
42
  return error.as_airbyte_message()
42
43
 
43
44
 
@@ -85,9 +86,7 @@ def _get_http_streams(source: ManifestDeclarativeSource, config: Dict[str, Any])
85
86
  f"A declarative stream should only have a retriever of type HttpStream, but received: {stream.retriever.__class__}"
86
87
  )
87
88
  else:
88
- raise TypeError(
89
- f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}"
90
- )
89
+ raise TypeError(f"A declarative source should only contain streams of type DeclarativeStream, but received: {stream.__class__}")
91
90
  return http_streams
92
91
 
93
92
 
@@ -7,11 +7,11 @@ import sys
7
7
  from typing import Any, List, Mapping, Optional, Tuple
8
8
 
9
9
  from airbyte_cdk.connector import BaseConnector
10
+ from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, read_stream, resolve_manifest
10
11
  from airbyte_cdk.entrypoint import AirbyteEntrypoint
11
12
  from airbyte_cdk.models import ConfiguredAirbyteCatalog
12
13
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
13
14
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
14
- from connector_builder.connector_builder_handler import list_streams, read_stream, resolve_manifest
15
15
 
16
16
 
17
17
  def create_source(config: Mapping[str, Any]) -> ManifestDeclarativeSource:
@@ -9,11 +9,19 @@ from json import JSONDecodeError
9
9
  from typing import Any, Iterable, Iterator, Mapping, Optional, Union
10
10
  from urllib.parse import parse_qs, urlparse
11
11
 
12
- from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Type
12
+ from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages, StreamReadSlices
13
13
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
14
+ from airbyte_cdk.utils import AirbyteTracedException
14
15
  from airbyte_cdk.utils.schema_inferrer import SchemaInferrer
15
- from airbyte_protocol.models.airbyte_protocol import ConfiguredAirbyteCatalog
16
- from connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages, StreamReadSlices
16
+ from airbyte_protocol.models.airbyte_protocol import (
17
+ AirbyteLogMessage,
18
+ AirbyteMessage,
19
+ AirbyteTraceMessage,
20
+ ConfiguredAirbyteCatalog,
21
+ Level,
22
+ TraceType,
23
+ )
24
+ from airbyte_protocol.models.airbyte_protocol import Type as MessageType
17
25
 
18
26
 
19
27
  class MessageGrouper:
@@ -24,12 +32,13 @@ class MessageGrouper:
24
32
  self._max_slices = max_slices
25
33
  self._max_record_limit = max_record_limit
26
34
 
27
- def get_message_groups(self,
28
- source: DeclarativeSource,
29
- config: Mapping[str, Any],
30
- configured_catalog: ConfiguredAirbyteCatalog,
31
- record_limit: Optional[int] = None,
32
- ) -> StreamRead:
35
+ def get_message_groups(
36
+ self,
37
+ source: DeclarativeSource,
38
+ config: Mapping[str, Any],
39
+ configured_catalog: ConfiguredAirbyteCatalog,
40
+ record_limit: Optional[int] = None,
41
+ ) -> StreamRead:
33
42
  if record_limit is not None and not (1 <= record_limit <= 1000):
34
43
  raise ValueError(f"Record limit must be between 1 and 1000. Got {record_limit}")
35
44
  schema_inferrer = SchemaInferrer()
@@ -41,14 +50,18 @@ class MessageGrouper:
41
50
 
42
51
  slices = []
43
52
  log_messages = []
44
- state = {} # No support for incremental sync
45
53
  for message_group in self._get_message_groups(
46
- source.read(self.logger, config, configured_catalog, state),
54
+ self._read_stream(source, config, configured_catalog),
47
55
  schema_inferrer,
48
56
  record_limit,
49
57
  ):
50
58
  if isinstance(message_group, AirbyteLogMessage):
51
- log_messages.append({"message": message_group.message})
59
+ log_messages.append(LogMessage(**{"message": message_group.message, "level": message_group.level.value}))
60
+ elif isinstance(message_group, AirbyteTraceMessage):
61
+ if message_group.type == TraceType.ERROR:
62
+ error_message = f"{message_group.error.message} - {message_group.error.stack_trace}"
63
+ log_messages.append(LogMessage(**{"message": error_message, "level": "ERROR"}))
64
+
52
65
  else:
53
66
  slices.append(message_group)
54
67
 
@@ -56,12 +69,14 @@ class MessageGrouper:
56
69
  logs=log_messages,
57
70
  slices=slices,
58
71
  test_read_limit_reached=self._has_reached_limit(slices),
59
- inferred_schema=schema_inferrer.get_stream_schema(configured_catalog.streams[0].stream.name) # The connector builder currently only supports reading from a single stream at a time
72
+ inferred_schema=schema_inferrer.get_stream_schema(
73
+ configured_catalog.streams[0].stream.name
74
+ ), # The connector builder currently only supports reading from a single stream at a time
60
75
  )
61
76
 
62
77
  def _get_message_groups(
63
78
  self, messages: Iterator[AirbyteMessage], schema_inferrer: SchemaInferrer, limit: int
64
- ) -> Iterable[Union[StreamReadPages, AirbyteLogMessage]]:
79
+ ) -> Iterable[Union[StreamReadPages, AirbyteLogMessage, AirbyteTraceMessage]]:
65
80
  """
66
81
  Message groups are partitioned according to when request log messages are received. Subsequent response log messages
67
82
  and record messages belong to the prior request log message and when we encounter another request, append the latest
@@ -83,44 +98,56 @@ class MessageGrouper:
83
98
  current_slice_pages = []
84
99
  current_page_request: Optional[HttpRequest] = None
85
100
  current_page_response: Optional[HttpResponse] = None
101
+ had_error = False
86
102
 
87
103
  while records_count < limit and (message := next(messages, None)):
88
104
  if self._need_to_close_page(at_least_one_page_in_group, message):
89
- self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
105
+ self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, True)
90
106
  current_page_request = None
91
107
  current_page_response = None
92
108
 
93
- if at_least_one_page_in_group and message.type == Type.LOG and message.log.message.startswith("slice:"):
109
+ if at_least_one_page_in_group and message.type == MessageType.LOG and message.log.message.startswith("slice:"):
94
110
  yield StreamReadSlices(pages=current_slice_pages)
95
111
  current_slice_pages = []
96
112
  at_least_one_page_in_group = False
97
- elif message.type == Type.LOG and message.log.message.startswith("request:"):
113
+ elif message.type == MessageType.LOG and message.log.message.startswith("request:"):
98
114
  if not at_least_one_page_in_group:
99
115
  at_least_one_page_in_group = True
100
116
  current_page_request = self._create_request_from_log_message(message.log)
101
- elif message.type == Type.LOG and message.log.message.startswith("response:"):
117
+ elif message.type == MessageType.LOG and message.log.message.startswith("response:"):
102
118
  current_page_response = self._create_response_from_log_message(message.log)
103
- elif message.type == Type.LOG:
119
+ elif message.type == MessageType.LOG:
120
+ if message.log.level == Level.ERROR:
121
+ had_error = True
104
122
  yield message.log
105
- elif message.type == Type.RECORD:
123
+ elif message.type == MessageType.TRACE:
124
+ if message.trace.type == TraceType.ERROR:
125
+ had_error = True
126
+ yield message.trace
127
+ elif message.type == MessageType.RECORD:
106
128
  current_page_records.append(message.record.data)
107
129
  records_count += 1
108
130
  schema_inferrer.accumulate(message.record)
109
131
  else:
110
- self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records)
132
+ self._close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete=not had_error)
111
133
  yield StreamReadSlices(pages=current_slice_pages)
112
134
 
113
135
  @staticmethod
114
- def _need_to_close_page(at_least_one_page_in_group, message):
136
+ def _need_to_close_page(at_least_one_page_in_group, message) -> bool:
115
137
  return (
116
- at_least_one_page_in_group
117
- and message.type == Type.LOG
118
- and (message.log.message.startswith("request:") or message.log.message.startswith("slice:"))
138
+ at_least_one_page_in_group
139
+ and message.type == MessageType.LOG
140
+ and (message.log.message.startswith("request:") or message.log.message.startswith("slice:"))
119
141
  )
120
142
 
121
143
  @staticmethod
122
- def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records):
123
- if not current_page_request or not current_page_response:
144
+ def _close_page(current_page_request, current_page_response, current_slice_pages, current_page_records, validate_page_complete: bool):
145
+ """
146
+ Close a page when parsing message groups
147
+ @param validate_page_complete: in some cases, we expect the CDK to not return a response. As of today, this will only happen before
148
+ an uncaught exception and therefore, the assumption is that `validate_page_complete=True` only on the last page that is being closed
149
+ """
150
+ if validate_page_complete and (not current_page_request or not current_page_response):
124
151
  raise ValueError("Every message grouping should have at least one request and response")
125
152
 
126
153
  current_slice_pages.append(
@@ -128,6 +155,15 @@ class MessageGrouper:
128
155
  )
129
156
  current_page_records.clear()
130
157
 
158
+ def _read_stream(self, source, config, configured_catalog) -> Iterator[AirbyteMessage]:
159
+ # the generator can raise an exception
160
+ # iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
161
+ try:
162
+ yield from source.read(logger=self.logger, config=config, catalog=configured_catalog, state={})
163
+ except Exception as e:
164
+ error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
165
+ yield AirbyteTracedException.from_exception(e, message=error_message).as_airbyte_message()
166
+
131
167
  def _create_request_from_log_message(self, log_message: AirbyteLogMessage) -> Optional[HttpRequest]:
132
168
  # TODO: As a temporary stopgap, the CDK emits request data as a log message string. Ideally this should come in the
133
169
  # form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
@@ -50,9 +50,15 @@ class StreamReadSlicesInner:
50
50
  state: Optional[Dict[str, Any]]
51
51
 
52
52
 
53
+ @dataclass
54
+ class LogMessage:
55
+ message: str
56
+ level: str
57
+
58
+
53
59
  @dataclass
54
60
  class StreamRead(object):
55
- logs: List[object]
61
+ logs: List[LogMessage]
56
62
  slices: List[StreamReadSlicesInner]
57
63
  test_read_limit_reached: bool
58
64
  inferred_schema: Optional[Dict[str, Any]]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.30.4
3
+ Version: 0.31.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -5,6 +5,11 @@ airbyte_cdk/entrypoint.py,sha256=9WkwB7Giqsz-VCaRkqCwrVIH9OfvULQs3-kz8IBfOh4,690
5
5
  airbyte_cdk/exception_handler.py,sha256=CwkiPdZ1WMOr3CBkvKFyHiyLerXGRqBrVlB4p0OImGI,1125
6
6
  airbyte_cdk/logger.py,sha256=4Mi2MEQi1uh59BP9Dxw_UEbZuxaJewqK_jvEU2b10nk,3985
7
7
  airbyte_cdk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
+ airbyte_cdk/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
9
+ airbyte_cdk/connector_builder/connector_builder_handler.py,sha256=9iG2hmaJyBtLDtdwtQp8M19pG2HQjR7Yy6jnBDggXMk,4160
10
+ airbyte_cdk/connector_builder/main.py,sha256=IOijgSQ4A9KhqJplHSVPTrxH_cyGWW_9uNmSIwBo0l0,3021
11
+ airbyte_cdk/connector_builder/message_grouper.py,sha256=-0AYh21eZeNJyyFPvSB84NNKMZznSX7bf4Ygi3deWEU,10972
12
+ airbyte_cdk/connector_builder/models.py,sha256=y0PJ-LwJk3e1RzRmMfjQSBP9ENx_a0wBcWNCjlW72Ks,1832
8
13
  airbyte_cdk/destinations/__init__.py,sha256=0Uxmz3iBAyZJdk_bqUVt2pb0UwRTpFjTnFE6fQFbWKY,126
9
14
  airbyte_cdk/destinations/destination.py,sha256=_tIMnKcRQbtIsjVvNOVjfbIxgCNLuBXQwQj8MyVm3BI,5420
10
15
  airbyte_cdk/models/__init__.py,sha256=LPQcYdDPwrCXiBPe_jexO4UAcbovIb1V9tHB6I7Un30,633
@@ -152,16 +157,11 @@ airbyte_cdk/utils/airbyte_secrets_utils.py,sha256=q3aDl8T10ufGbeqnUPqbZLxQcHdkf2
152
157
  airbyte_cdk/utils/event_timing.py,sha256=Hn5kCc9xGKLcV5EYpJCZwNiz9neKKu2WG8FJF_hy278,2377
153
158
  airbyte_cdk/utils/schema_inferrer.py,sha256=LQLOlraFksg7_sqpJNhy9pS_K42GVxG634ogM_P2s5E,2361
154
159
  airbyte_cdk/utils/traced_exception.py,sha256=9G2sG9eYkvn6Aa7rMuUW_KIRszRaTc_xdnTQNDKyKGI,3216
155
- connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
156
- connector_builder/connector_builder_handler.py,sha256=QF0Rus1_NhKNfma8EtL6sVWSu8kySLDj2m5IcZ-9OsU,4167
157
- connector_builder/main.py,sha256=6jnHUX5pVG3dVPC6D4oMGneoxzmbpBqZNB-rfj7_Mjc,3009
158
- connector_builder/message_grouper.py,sha256=NkiDNvwXkP4EB-501hGpmZZCJjaJo7xYdGD2xmiGoH8,9121
159
- connector_builder/models.py,sha256=mf9cAZaxFW7uFp0gyHiJpuGlAuGuq2zFvs76VMYqUkA,1765
160
160
  source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
162
162
  unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
163
- unit_tests/connector_builder/test_connector_builder_handler.py,sha256=kuw1QAIsGt8-R24Q7JkjsE6ErOy6HaDpPzIbK0d5KRk,18730
164
- unit_tests/connector_builder/test_message_grouper.py,sha256=1yOFy9Umr7Y1rIQN0Rvt6nHdDzdkmmncxoBD5PkRBr4,22623
163
+ unit_tests/connector_builder/test_connector_builder_handler.py,sha256=fwOlQPdc9quPUoZYXg_GVXqdkw5TLWs0CuBjYEMAcfM,19745
164
+ unit_tests/connector_builder/test_message_grouper.py,sha256=1_DXyuqHmr8B7b1t8PSpmYWaHvYYAB2mMwpp_YAbYwc,22959
165
165
  unit_tests/connector_builder/utils.py,sha256=AAggdGWP-mNuWOZUHLAVIbjTeIcdPo-3pbMm5zdYpS0,796
166
166
  unit_tests/destinations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
167
  unit_tests/destinations/test_destination.py,sha256=koG_j812KMkcIxoUH6XlAL3zsephZJmlHvyzJXm0dCs,10269
@@ -257,8 +257,8 @@ unit_tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,
257
257
  unit_tests/utils/test_schema_inferrer.py,sha256=ckl17GlNOZInqgxni7Z2A0bg_p6JDy0GVFAG8ph67pw,3288
258
258
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
259
259
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
260
- airbyte_cdk-0.30.4.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
261
- airbyte_cdk-0.30.4.dist-info/METADATA,sha256=RIkRi-qItoUoeznoJcGPfwmobM39pO2okwNIp-OHadA,8902
262
- airbyte_cdk-0.30.4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
263
- airbyte_cdk-0.30.4.dist-info/top_level.txt,sha256=1QIVQ1LzAH7ad20nfhHPxy0-7QJU22jfCrLJSLuaFog,69
264
- airbyte_cdk-0.30.4.dist-info/RECORD,,
260
+ airbyte_cdk-0.31.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
261
+ airbyte_cdk-0.31.1.dist-info/METADATA,sha256=nSfqOPf-MueD0MHg9bTQU8PWrhP43lmxd6JK1IDNj-M,8902
262
+ airbyte_cdk-0.31.1.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
263
+ airbyte_cdk-0.31.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
264
+ airbyte_cdk-0.31.1.dist-info/RECORD,,
@@ -1,4 +1,3 @@
1
1
  airbyte_cdk
2
- connector_builder
3
2
  source_declarative_manifest
4
3
  unit_tests
@@ -3,21 +3,22 @@
3
3
  #
4
4
 
5
5
  import copy
6
+ import dataclasses
6
7
  import json
7
8
  from unittest import mock
8
9
  from unittest.mock import patch
9
10
 
10
- import connector_builder
11
11
  import pytest
12
+ from airbyte_cdk import connector_builder
13
+ from airbyte_cdk.connector_builder.connector_builder_handler import list_streams, resolve_manifest
14
+ from airbyte_cdk.connector_builder.main import handle_connector_builder_request, handle_request, read_stream
15
+ from airbyte_cdk.connector_builder.models import LogMessage, StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
12
16
  from airbyte_cdk.models import AirbyteMessage, AirbyteRecordMessage, ConfiguredAirbyteCatalog
13
17
  from airbyte_cdk.models import Type as MessageType
14
18
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
15
19
  from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
16
20
  from airbyte_cdk.sources.streams.core import Stream
17
21
  from airbyte_cdk.sources.streams.http import HttpStream
18
- from connector_builder.connector_builder_handler import list_streams, resolve_manifest
19
- from connector_builder.main import handle_connector_builder_request, handle_request, read_stream
20
- from connector_builder.models import StreamRead, StreamReadSlicesInner, StreamReadSlicesInnerPagesInner
21
22
  from unit_tests.connector_builder.utils import create_configured_catalog
22
23
 
23
24
  _stream_name = "stream_with_custom_requester"
@@ -215,7 +216,7 @@ def test_resolve_manifest(valid_resolve_manifest_config_file):
215
216
  "primary_key": _stream_primary_key,
216
217
  "url_base": _stream_url_base,
217
218
  "$parameters": _stream_options,
218
- "page_size": 10
219
+ "page_size": 10,
219
220
  },
220
221
  "name": _stream_name,
221
222
  "primary_key": _stream_primary_key,
@@ -315,7 +316,7 @@ def test_read():
315
316
  emitted_at=1,
316
317
  ),
317
318
  )
318
- with patch("connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read):
319
+ with patch("airbyte_cdk.connector_builder.message_grouper.MessageGrouper.get_message_groups", return_value=stream_read):
319
320
  output_record = handle_connector_builder_request(
320
321
  source, "test_read", config, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG)
321
322
  )
@@ -323,14 +324,31 @@ def test_read():
323
324
  assert output_record == expected_airbyte_message
324
325
 
325
326
 
326
- def test_read_returns_error_response():
327
+ @patch("traceback.TracebackException.from_exception")
328
+ def test_read_returns_error_response(mock_from_exception):
327
329
  class MockManifestDeclarativeSource:
328
330
  def read(self, logger, config, catalog, state):
329
- raise ValueError
331
+ raise ValueError("error_message")
332
+
333
+ stack_trace = "a stack trace"
334
+ mock_from_exception.return_value = stack_trace
330
335
 
331
336
  source = MockManifestDeclarativeSource()
332
337
  response = read_stream(source, TEST_READ_CONFIG, ConfiguredAirbyteCatalog.parse_obj(CONFIGURED_CATALOG))
333
- assert "Error reading" in response.trace.error.message
338
+
339
+ expected_stream_read = StreamRead(logs=[LogMessage("error_message - a stack trace", "ERROR")],
340
+ slices=[StreamReadSlicesInner(
341
+ pages=[StreamReadSlicesInnerPagesInner(records=[], request=None, response=None)],
342
+ slice_descriptor=None, state=None)],
343
+ test_read_limit_reached=False,
344
+ inferred_schema=None)
345
+
346
+ expected_message = AirbyteMessage(
347
+ type=MessageType.RECORD,
348
+ record=AirbyteRecordMessage(stream=_stream_name, data=dataclasses.asdict(expected_stream_read), emitted_at=1),
349
+ )
350
+ response.record.emitted_at = 1
351
+ assert response == expected_message
334
352
 
335
353
 
336
354
  @pytest.mark.parametrize(
@@ -385,10 +403,12 @@ def test_list_streams(manifest_declarative_source):
385
403
 
386
404
  assert result.type == MessageType.RECORD
387
405
  assert result.record.stream == "list_streams"
388
- assert result.record.data == {"streams": [
389
- {"name": "a name", "url": "https://a-url-base.com/a-path"},
390
- {"name": "another name", "url": "https://another-url-base.com/another-path"}
391
- ]}
406
+ assert result.record.data == {
407
+ "streams": [
408
+ {"name": "a name", "url": "https://a-url-base.com/a-path"},
409
+ {"name": "another name", "url": "https://another-url-base.com/another-path"},
410
+ ]
411
+ }
392
412
 
393
413
 
394
414
  def test_given_stream_is_not_declarative_stream_when_list_streams_then_return_exception_message(manifest_declarative_source):
@@ -7,17 +7,17 @@ from typing import Iterator
7
7
  from unittest.mock import MagicMock
8
8
 
9
9
  import pytest
10
+ from airbyte_cdk.connector_builder.message_grouper import MessageGrouper
11
+ from airbyte_cdk.connector_builder.models import HttpRequest, HttpResponse, LogMessage, StreamRead, StreamReadPages
10
12
  from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, AirbyteRecordMessage, Level
11
13
  from airbyte_cdk.models import Type as MessageType
12
- from connector_builder.message_grouper import MessageGrouper
13
- from connector_builder.models import HttpRequest, HttpResponse, StreamRead, StreamReadPages
14
14
  from unit_tests.connector_builder.utils import create_configured_catalog
15
15
 
16
16
  MAX_PAGES_PER_SLICE = 4
17
17
  MAX_SLICES = 3
18
18
 
19
19
  MANIFEST = {
20
- "version": "0.1.0",
20
+ "version": "0.30.0",
21
21
  "type": "DeclarativeSource",
22
22
  "definitions": {
23
23
  "selector": {"extractor": {"field_path": ["items"], "type": "DpathExtractor"}, "type": "RecordSelector"},
@@ -124,8 +124,9 @@ def test_get_grouped_messages():
124
124
  )
125
125
 
126
126
  connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
127
- actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG,
128
- configured_catalog=create_configured_catalog("hashiras"))
127
+ actual_response: StreamRead = connector_builder_handler.get_message_groups(
128
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
129
+ )
129
130
  assert actual_response.inferred_schema == expected_schema
130
131
 
131
132
  single_slice = actual_response.slices[0]
@@ -166,9 +167,9 @@ def test_get_grouped_messages_with_logs():
166
167
  ),
167
168
  ]
168
169
  expected_logs = [
169
- {"message": "log message before the request"},
170
- {"message": "log message during the page"},
171
- {"message": "log message after the response"},
170
+ LogMessage(**{"message": "log message before the request", "level": "INFO"}),
171
+ LogMessage(**{"message": "log message during the page", "level": "INFO"}),
172
+ LogMessage(**{"message": "log message after the response", "level": "INFO"}),
172
173
  ]
173
174
 
174
175
  mock_source = make_mock_source(
@@ -187,8 +188,9 @@ def test_get_grouped_messages_with_logs():
187
188
 
188
189
  connector_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
189
190
 
190
- actual_response: StreamRead = connector_builder_handler.get_message_groups(source=mock_source, config=CONFIG,
191
- configured_catalog=create_configured_catalog("hashiras"))
191
+ actual_response: StreamRead = connector_builder_handler.get_message_groups(
192
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
193
+ )
192
194
  single_slice = actual_response.slices[0]
193
195
  for i, actual_page in enumerate(single_slice.pages):
194
196
  assert actual_page == expected_pages[i]
@@ -229,9 +231,9 @@ def test_get_grouped_messages_record_limit(request_record_limit, max_record_limi
229
231
  record_limit = min(request_record_limit, max_record_limit)
230
232
 
231
233
  api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit)
232
- actual_response: StreamRead = api.get_message_groups(mock_source, config=CONFIG,
233
- configured_catalog=create_configured_catalog("hashiras"),
234
- record_limit=request_record_limit)
234
+ actual_response: StreamRead = api.get_message_groups(
235
+ mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras"), record_limit=request_record_limit
236
+ )
235
237
  single_slice = actual_response.slices[0]
236
238
  total_records = 0
237
239
  for i, actual_page in enumerate(single_slice.pages):
@@ -270,8 +272,9 @@ def test_get_grouped_messages_default_record_limit(max_record_limit):
270
272
  n_records = 2
271
273
 
272
274
  api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES, max_record_limit=max_record_limit)
273
- actual_response: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG,
274
- configured_catalog=create_configured_catalog("hashiras"))
275
+ actual_response: StreamRead = api.get_message_groups(
276
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
277
+ )
275
278
  single_slice = actual_response.slices[0]
276
279
  total_records = 0
277
280
  for i, actual_page in enumerate(single_slice.pages):
@@ -352,8 +355,9 @@ def test_get_grouped_messages_no_records():
352
355
 
353
356
  message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
354
357
 
355
- actual_response: StreamRead = message_grouper.get_message_groups(source=mock_source, config=CONFIG,
356
- configured_catalog=create_configured_catalog("hashiras"))
358
+ actual_response: StreamRead = message_grouper.get_message_groups(
359
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
360
+ )
357
361
 
358
362
  single_slice = actual_response.slices[0]
359
363
  for i, actual_page in enumerate(single_slice.pages):
@@ -459,8 +463,9 @@ def test_get_grouped_messages_with_many_slices():
459
463
 
460
464
  connecto_builder_handler = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
461
465
 
462
- stream_read: StreamRead = connecto_builder_handler.get_message_groups(source=mock_source, config=CONFIG,
463
- configured_catalog=create_configured_catalog("hashiras"))
466
+ stream_read: StreamRead = connecto_builder_handler.get_message_groups(
467
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
468
+ )
464
469
 
465
470
  assert not stream_read.test_read_limit_reached
466
471
  assert len(stream_read.slices) == 2
@@ -484,8 +489,9 @@ def test_get_grouped_messages_given_maximum_number_of_slices_then_test_read_limi
484
489
 
485
490
  api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
486
491
 
487
- stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG,
488
- configured_catalog=create_configured_catalog("hashiras"))
492
+ stream_read: StreamRead = api.get_message_groups(
493
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
494
+ )
489
495
 
490
496
  assert stream_read.test_read_limit_reached
491
497
 
@@ -500,12 +506,28 @@ def test_get_grouped_messages_given_maximum_number_of_pages_then_test_read_limit
500
506
 
501
507
  api = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
502
508
 
503
- stream_read: StreamRead = api.get_message_groups(source=mock_source, config=CONFIG,
504
- configured_catalog=create_configured_catalog("hashiras"))
509
+ stream_read: StreamRead = api.get_message_groups(
510
+ source=mock_source, config=CONFIG, configured_catalog=create_configured_catalog("hashiras")
511
+ )
505
512
 
506
513
  assert stream_read.test_read_limit_reached
507
514
 
508
515
 
516
+ def test_read_stream_returns_error_if_stream_does_not_exist():
517
+ mock_source = MagicMock()
518
+ mock_source.read.side_effect = ValueError("error")
519
+
520
+ full_config = {**CONFIG, **{"__injected_declarative_manifest": MANIFEST}}
521
+
522
+ message_grouper = MessageGrouper(MAX_PAGES_PER_SLICE, MAX_SLICES)
523
+ actual_response = message_grouper.get_message_groups(source=mock_source, config=full_config,
524
+ configured_catalog=create_configured_catalog("not_in_manifest"))
525
+
526
+ assert 1 == len(actual_response.logs)
527
+ assert "Traceback" in actual_response.logs[0].message
528
+ assert "ERROR" in actual_response.logs[0].level
529
+
530
+
509
531
  def make_mock_source(return_value: Iterator) -> MagicMock:
510
532
  mock_source = MagicMock()
511
533
  mock_source.read.return_value = return_value