airbyte-cdk 0.62.2__py3-none-any.whl → 0.63.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/abstract_source.py +34 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -0
- airbyte_cdk/utils/traced_exception.py +16 -1
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/RECORD +11 -11
- unit_tests/sources/test_abstract_source.py +173 -29
- unit_tests/sources/test_integration_source.py +25 -8
- unit_tests/sources/test_source_read.py +1 -1
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,9 @@ from airbyte_cdk.models import (
|
|
14
14
|
AirbyteStreamStatus,
|
15
15
|
ConfiguredAirbyteCatalog,
|
16
16
|
ConfiguredAirbyteStream,
|
17
|
+
FailureType,
|
17
18
|
Status,
|
19
|
+
StreamDescriptor,
|
18
20
|
SyncMode,
|
19
21
|
)
|
20
22
|
from airbyte_cdk.models import Type as MessageType
|
@@ -27,6 +29,7 @@ from airbyte_cdk.sources.streams.http.http import HttpStream
|
|
27
29
|
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
28
30
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
|
29
31
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
32
|
+
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
30
33
|
from airbyte_cdk.utils.event_timing import create_timer
|
31
34
|
from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
|
32
35
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
@@ -133,11 +136,16 @@ class AbstractSource(Source, ABC):
|
|
133
136
|
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
134
137
|
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
|
135
138
|
except AirbyteTracedException as e:
|
139
|
+
logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
|
140
|
+
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
136
141
|
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
yield e.as_sanitized_airbyte_message(stream_descriptor=StreamDescriptor(name=configured_stream.stream.name))
|
143
|
+
stream_name_to_exception[stream_instance.name] = e
|
144
|
+
if self.stop_sync_on_stream_failure:
|
145
|
+
logger.info(
|
146
|
+
f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
|
147
|
+
)
|
148
|
+
break
|
141
149
|
except Exception as e:
|
142
150
|
yield from self._emit_queued_messages()
|
143
151
|
logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
|
@@ -145,15 +153,28 @@ class AbstractSource(Source, ABC):
|
|
145
153
|
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
|
146
154
|
display_message = stream_instance.get_error_display_message(e)
|
147
155
|
if display_message:
|
148
|
-
|
149
|
-
|
156
|
+
traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
|
157
|
+
else:
|
158
|
+
traced_exception = AirbyteTracedException.from_exception(e)
|
159
|
+
yield traced_exception.as_sanitized_airbyte_message(
|
160
|
+
stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
|
161
|
+
)
|
162
|
+
stream_name_to_exception[stream_instance.name] = traced_exception
|
163
|
+
if self.stop_sync_on_stream_failure:
|
164
|
+
logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
|
165
|
+
break
|
150
166
|
finally:
|
151
167
|
timer.finish_event()
|
152
168
|
logger.info(f"Finished syncing {configured_stream.stream.name}")
|
153
169
|
logger.info(timer.report())
|
154
170
|
|
155
|
-
if
|
156
|
-
|
171
|
+
if len(stream_name_to_exception) > 0:
|
172
|
+
error_message = self._generate_failed_streams_error_message(stream_name_to_exception)
|
173
|
+
logger.info(error_message)
|
174
|
+
# We still raise at least one exception when a stream raises an exception because the platform currently relies
|
175
|
+
# on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
|
176
|
+
# type because this combined error isn't actionable, but rather the previously emitted individual errors.
|
177
|
+
raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
|
157
178
|
logger.info(f"Finished syncing {self.name}")
|
158
179
|
|
159
180
|
@property
|
@@ -282,17 +303,17 @@ class AbstractSource(Source, ABC):
|
|
282
303
|
return _default_message_repository
|
283
304
|
|
284
305
|
@property
|
285
|
-
def
|
306
|
+
def stop_sync_on_stream_failure(self) -> bool:
|
286
307
|
"""
|
287
308
|
WARNING: This function is in-development which means it is subject to change. Use at your own risk.
|
288
309
|
|
289
|
-
By default, a source
|
290
|
-
|
291
|
-
|
310
|
+
By default, when a source encounters an exception while syncing a stream, it will emit an error trace message and then
|
311
|
+
continue syncing the next stream. This can be overwritten on a per-source basis so that the source will stop the sync
|
312
|
+
on the first error seen and emit a single error trace message for that stream.
|
292
313
|
"""
|
293
314
|
return False
|
294
315
|
|
295
316
|
@staticmethod
|
296
317
|
def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
|
297
|
-
failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
|
318
|
+
failures = ", ".join([f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exception in stream_failures.items()])
|
298
319
|
return f"During the sync, the following streams did not sync successfully: {failures}"
|
@@ -243,6 +243,9 @@ class FileBasedStreamPartition(Partition):
|
|
243
243
|
data_to_return = dict(record_data)
|
244
244
|
self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
|
245
245
|
yield Record(data_to_return, self.stream_name())
|
246
|
+
elif isinstance(record_data, AirbyteMessage) and record_data.type == Type.RECORD:
|
247
|
+
# `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
|
248
|
+
yield Record(record_data.record.data, self.stream_name())
|
246
249
|
else:
|
247
250
|
self._message_repository.emit_message(record_data)
|
248
251
|
except Exception as e:
|
@@ -13,6 +13,7 @@ from airbyte_cdk.models import (
|
|
13
13
|
AirbyteTraceMessage,
|
14
14
|
FailureType,
|
15
15
|
Status,
|
16
|
+
StreamDescriptor,
|
16
17
|
TraceType,
|
17
18
|
)
|
18
19
|
from airbyte_cdk.models import Type as MessageType
|
@@ -43,7 +44,7 @@ class AirbyteTracedException(Exception):
|
|
43
44
|
self._exception = exception
|
44
45
|
super().__init__(internal_message)
|
45
46
|
|
46
|
-
def as_airbyte_message(self) -> AirbyteMessage:
|
47
|
+
def as_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
|
47
48
|
"""
|
48
49
|
Builds an AirbyteTraceMessage from the exception
|
49
50
|
"""
|
@@ -60,6 +61,7 @@ class AirbyteTracedException(Exception):
|
|
60
61
|
internal_message=self.internal_message,
|
61
62
|
failure_type=self.failure_type,
|
62
63
|
stack_trace=stack_trace_str,
|
64
|
+
stream_descriptor=stream_descriptor,
|
63
65
|
),
|
64
66
|
)
|
65
67
|
|
@@ -88,3 +90,16 @@ class AirbyteTracedException(Exception):
|
|
88
90
|
:param exc: the exception that caused the error
|
89
91
|
"""
|
90
92
|
return cls(internal_message=str(exc), exception=exc, *args, **kwargs) # type: ignore # ignoring because of args and kwargs
|
93
|
+
|
94
|
+
def as_sanitized_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
|
95
|
+
"""
|
96
|
+
Builds an AirbyteTraceMessage from the exception and sanitizes any secrets from the message body
|
97
|
+
"""
|
98
|
+
error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor)
|
99
|
+
if error_message.trace.error.message:
|
100
|
+
error_message.trace.error.message = filter_secrets(error_message.trace.error.message)
|
101
|
+
if error_message.trace.error.internal_message:
|
102
|
+
error_message.trace.error.internal_message = filter_secrets(error_message.trace.error.internal_message)
|
103
|
+
if error_message.trace.error.stack_trace:
|
104
|
+
error_message.trace.error.stack_trace = filter_secrets(error_message.trace.error.stack_trace)
|
105
|
+
return error_message
|
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
|
|
24
24
|
airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
|
25
25
|
airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
|
26
26
|
airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
|
27
|
-
airbyte_cdk/sources/abstract_source.py,sha256=
|
27
|
+
airbyte_cdk/sources/abstract_source.py,sha256=Gie6CY-WztnUtOahoyMRlV8ON48eDIzjVG6fUKwCqvw,16127
|
28
28
|
airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
|
29
29
|
airbyte_cdk/sources/connector_state_manager.py,sha256=p9iwWbb5uqRbsrHsdZBMXKmyHgLVbsOcV3QQexBFnPE,11052
|
30
30
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
@@ -186,7 +186,7 @@ airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaF
|
|
186
186
|
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=cmO1SQt5PIQRNNoh2KBv6aeY8NEY9x2dlmiRwGwU1vg,6557
|
187
187
|
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=qS0DJzXlVew6armFDJ0eNcSxRCmkA7JWQYFl6gcv3dU,13113
|
188
188
|
airbyte_cdk/sources/file_based/stream/concurrent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
189
|
-
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=
|
189
|
+
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=rjf8htUotdAXWSGcFA0jFHJfaai_EnmQxncnxMWTN2A,13320
|
190
190
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py,sha256=WKEYXZwSla6xwp7k1mnyG3kl9xCzEZ9B3eE-cxIuzIM,310
|
191
191
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py,sha256=UYLE2A2RdV-5FaQ70naZZWY34l5AEJkIRlTH05-e_-k,1961
|
192
192
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py,sha256=jHiej28aKQJ3UmWXQxHRCK8xkzY5H0-zxQiVqFs5rAI,14389
|
@@ -269,7 +269,7 @@ airbyte_cdk/utils/oneof_option_config.py,sha256=N8EmWdYdwt0FM7fuShh6H8nj_r4KEL9t
|
|
269
269
|
airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
|
270
270
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=LGjSSk8lmBiC0GiHqxDwu_iMN6bCe05UMpz9e7nCw5E,741
|
271
271
|
airbyte_cdk/utils/stream_status_utils.py,sha256=k7OY6AkJW8ifyh7ZYetC5Yy1nxM6Mx3apOAviCjJh80,971
|
272
|
-
airbyte_cdk/utils/traced_exception.py,sha256=
|
272
|
+
airbyte_cdk/utils/traced_exception.py,sha256=IDYvUkbgkOMjusiuP0xU65mHzl5nLDkhA3o-FvNDfjI,4336
|
273
273
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
274
274
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
275
275
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
@@ -282,14 +282,14 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
282
282
|
unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
|
283
283
|
unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
|
284
284
|
unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
285
|
-
unit_tests/sources/test_abstract_source.py,sha256=
|
285
|
+
unit_tests/sources/test_abstract_source.py,sha256=wIcMNIB66bQnYnC8LY_OPLUibBL7fSg9Lm6jDqOWb4g,59544
|
286
286
|
unit_tests/sources/test_concurrent_source.py,sha256=3i7pSRetKSoP6LBpXyuXpWi2_VOwta_aTm_kgnDaLqk,3704
|
287
287
|
unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
|
288
288
|
unit_tests/sources/test_connector_state_manager.py,sha256=KAvYmuaWwg2kSnPNKri6Ne8TmLpsSimotsnDLLKkDD0,24369
|
289
289
|
unit_tests/sources/test_http_logger.py,sha256=VT6DqgspI3DcRnoBQkkQX0z4dF_AOiYZ5P_zxmMW8oU,9004
|
290
|
-
unit_tests/sources/test_integration_source.py,sha256=
|
290
|
+
unit_tests/sources/test_integration_source.py,sha256=qcWld9evB1rAjALWX8SDshGz7seYkN3HCamQ6KQ2Idw,4269
|
291
291
|
unit_tests/sources/test_source.py,sha256=W0I4umL_d_OToLYYiRkjkJR6e-cCYjdV8zKc3uLvF0k,27999
|
292
|
-
unit_tests/sources/test_source_read.py,sha256=
|
292
|
+
unit_tests/sources/test_source_read.py,sha256=n9XpVQLfsQH8eh6D99MDiNVBBKcf6UtouThDJcGH6SU,17186
|
293
293
|
unit_tests/sources/concurrent_source/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
294
294
|
unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py,sha256=zsGnMcEsBedjW8wahil6LNqniil-3NXhyZd5W-80Km0,3665
|
295
295
|
unit_tests/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
@@ -456,8 +456,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
456
456
|
unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
|
457
457
|
unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
|
458
458
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
459
|
-
airbyte_cdk-0.
|
460
|
-
airbyte_cdk-0.
|
461
|
-
airbyte_cdk-0.
|
462
|
-
airbyte_cdk-0.
|
463
|
-
airbyte_cdk-0.
|
459
|
+
airbyte_cdk-0.63.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
460
|
+
airbyte_cdk-0.63.1.dist-info/METADATA,sha256=-pgcWwY9lrlm3-6C9BxburyjPpElSbKPAep02SEB-hk,11073
|
461
|
+
airbyte_cdk-0.63.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
462
|
+
airbyte_cdk-0.63.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
463
|
+
airbyte_cdk-0.63.1.dist-info/RECORD,,
|
@@ -13,6 +13,7 @@ import pytest
|
|
13
13
|
from airbyte_cdk.models import (
|
14
14
|
AirbyteCatalog,
|
15
15
|
AirbyteConnectionStatus,
|
16
|
+
AirbyteErrorTraceMessage,
|
16
17
|
AirbyteLogMessage,
|
17
18
|
AirbyteMessage,
|
18
19
|
AirbyteRecordMessage,
|
@@ -27,6 +28,7 @@ from airbyte_cdk.models import (
|
|
27
28
|
ConfiguredAirbyteCatalog,
|
28
29
|
ConfiguredAirbyteStream,
|
29
30
|
DestinationSyncMode,
|
31
|
+
FailureType,
|
30
32
|
Level,
|
31
33
|
Status,
|
32
34
|
StreamDescriptor,
|
@@ -40,6 +42,7 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
40
42
|
from airbyte_cdk.sources.message import MessageRepository
|
41
43
|
from airbyte_cdk.sources.streams import IncrementalMixin, Stream
|
42
44
|
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
45
|
+
from airbyte_cdk.utils.airbyte_secrets_utils import update_secrets
|
43
46
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
44
47
|
from pytest import fixture
|
45
48
|
|
@@ -54,12 +57,14 @@ class MockSource(AbstractSource):
|
|
54
57
|
per_stream: bool = True,
|
55
58
|
message_repository: MessageRepository = None,
|
56
59
|
exception_on_missing_stream: bool = True,
|
60
|
+
stop_sync_on_stream_failure: bool = False,
|
57
61
|
):
|
58
62
|
self._streams = streams
|
59
63
|
self.check_lambda = check_lambda
|
60
64
|
self.per_stream = per_stream
|
61
65
|
self.exception_on_missing_stream = exception_on_missing_stream
|
62
66
|
self._message_repository = message_repository
|
67
|
+
self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
|
63
68
|
|
64
69
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
65
70
|
if self.check_lambda:
|
@@ -84,6 +89,12 @@ class MockSource(AbstractSource):
|
|
84
89
|
return self._message_repository
|
85
90
|
|
86
91
|
|
92
|
+
class MockSourceWithStopSyncFalseOverride(MockSource):
|
93
|
+
@property
|
94
|
+
def stop_sync_on_stream_failure(self) -> bool:
|
95
|
+
return False
|
96
|
+
|
97
|
+
|
87
98
|
class StreamNoStateMethod(Stream):
|
88
99
|
name = "managers"
|
89
100
|
primary_key = None
|
@@ -115,8 +126,11 @@ class StreamRaisesException(Stream):
|
|
115
126
|
name = "lamentations"
|
116
127
|
primary_key = None
|
117
128
|
|
129
|
+
def __init__(self, exception_to_raise):
|
130
|
+
self._exception_to_raise = exception_to_raise
|
131
|
+
|
118
132
|
def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
|
119
|
-
raise
|
133
|
+
raise self._exception_to_raise
|
120
134
|
|
121
135
|
|
122
136
|
MESSAGE_FROM_REPOSITORY = Mock()
|
@@ -291,7 +305,7 @@ def test_read_stream_emits_repository_message_on_error(mocker, message_repositor
|
|
291
305
|
|
292
306
|
source = MockSource(streams=[stream], message_repository=message_repository)
|
293
307
|
|
294
|
-
with pytest.raises(
|
308
|
+
with pytest.raises(AirbyteTracedException):
|
295
309
|
messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
|
296
310
|
assert MESSAGE_FROM_REPOSITORY in messages
|
297
311
|
|
@@ -306,14 +320,14 @@ def test_read_stream_with_error_gets_display_message(mocker):
|
|
306
320
|
catalog = ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])
|
307
321
|
|
308
322
|
# without get_error_display_message
|
309
|
-
with pytest.raises(
|
323
|
+
with pytest.raises(AirbyteTracedException):
|
310
324
|
list(source.read(logger, {}, catalog))
|
311
325
|
|
312
326
|
mocker.patch.object(MockStream, "get_error_display_message", return_value="my message")
|
313
327
|
|
314
|
-
with pytest.raises(AirbyteTracedException
|
328
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
315
329
|
list(source.read(logger, {}, catalog))
|
316
|
-
assert exc.value.message
|
330
|
+
assert "oh no!" in exc.value.message
|
317
331
|
|
318
332
|
|
319
333
|
GLOBAL_EMITTED_AT = 1
|
@@ -358,6 +372,22 @@ def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_stat
|
|
358
372
|
return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data))
|
359
373
|
|
360
374
|
|
375
|
+
def _as_error_trace(stream: str, error_message: str, internal_message: Optional[str], failure_type: Optional[FailureType], stack_trace: Optional[str]) -> AirbyteMessage:
|
376
|
+
trace_message = AirbyteTraceMessage(
|
377
|
+
emitted_at=datetime.datetime.now().timestamp() * 1000.0,
|
378
|
+
type=TraceType.ERROR,
|
379
|
+
error=AirbyteErrorTraceMessage(
|
380
|
+
stream_descriptor=StreamDescriptor(name=stream),
|
381
|
+
message=error_message,
|
382
|
+
internal_message=internal_message,
|
383
|
+
failure_type=failure_type,
|
384
|
+
stack_trace=stack_trace,
|
385
|
+
),
|
386
|
+
)
|
387
|
+
|
388
|
+
return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
|
389
|
+
|
390
|
+
|
361
391
|
def _configured_stream(stream: Stream, sync_mode: SyncMode):
|
362
392
|
return ConfiguredAirbyteStream(
|
363
393
|
stream=stream.as_airbyte_stream(),
|
@@ -1174,21 +1204,27 @@ def test_checkpoint_state_from_stream_instance():
|
|
1174
1204
|
)
|
1175
1205
|
|
1176
1206
|
|
1177
|
-
|
1207
|
+
@pytest.mark.parametrize(
|
1208
|
+
"exception_to_raise,expected_error_message,expected_internal_message",
|
1209
|
+
[
|
1210
|
+
pytest.param(AirbyteTracedException(message="I was born only to crash like Icarus"), "I was born only to crash like Icarus", None, id="test_raises_traced_exception"),
|
1211
|
+
pytest.param(Exception("Generic connector error message"), "Something went wrong in the connector. See the logs for more details.", "Generic connector error message", id="test_raises_generic_exception"),
|
1212
|
+
]
|
1213
|
+
)
|
1214
|
+
def test_continue_sync_with_failed_streams(mocker, exception_to_raise, expected_error_message, expected_internal_message):
|
1178
1215
|
"""
|
1179
|
-
Tests that running a sync for a connector with multiple streams
|
1180
|
-
|
1216
|
+
Tests that running a sync for a connector with multiple streams will continue syncing when one stream fails
|
1217
|
+
with an error. This source does not override the default behavior defined in the AbstractSource class.
|
1181
1218
|
"""
|
1182
1219
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1183
1220
|
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1184
|
-
s2 = StreamRaisesException()
|
1221
|
+
s2 = StreamRaisesException(exception_to_raise=exception_to_raise)
|
1185
1222
|
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1186
1223
|
|
1187
1224
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1188
1225
|
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|
1189
1226
|
|
1190
1227
|
src = MockSource(streams=[s1, s2, s3])
|
1191
|
-
mocker.patch.object(MockSource, "continue_sync_on_stream_failure", return_value=True)
|
1192
1228
|
catalog = ConfiguredAirbyteCatalog(
|
1193
1229
|
streams=[
|
1194
1230
|
_configured_stream(s1, SyncMode.full_refresh),
|
@@ -1205,6 +1241,7 @@ def test_continue_sync_with_failed_streams(mocker):
|
|
1205
1241
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1206
1242
|
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1207
1243
|
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1244
|
+
_as_error_trace("lamentations", expected_error_message, expected_internal_message, FailureType.system_error, None),
|
1208
1245
|
_as_stream_status("s3", AirbyteStreamStatus.STARTED),
|
1209
1246
|
_as_stream_status("s3", AirbyteStreamStatus.RUNNING),
|
1210
1247
|
*_as_records("s3", stream_output),
|
@@ -1212,26 +1249,75 @@ def test_continue_sync_with_failed_streams(mocker):
|
|
1212
1249
|
]
|
1213
1250
|
)
|
1214
1251
|
|
1215
|
-
messages = []
|
1216
1252
|
with pytest.raises(AirbyteTracedException) as exc:
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1253
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1254
|
+
messages = _fix_emitted_at(messages)
|
1255
|
+
|
1256
|
+
assert expected == messages
|
1221
1257
|
|
1222
|
-
messages = _fix_emitted_at(messages)
|
1223
|
-
assert expected == messages
|
1224
1258
|
assert "lamentations" in exc.value.message
|
1259
|
+
assert exc.value.failure_type == FailureType.config_error
|
1225
1260
|
|
1226
1261
|
|
1227
|
-
def
|
1262
|
+
def test_continue_sync_source_override_false(mocker):
|
1228
1263
|
"""
|
1229
|
-
Tests that running a sync for a connector
|
1230
|
-
|
1264
|
+
Tests that running a sync for a connector explicitly overriding the default AbstractSource.stop_sync_on_stream_failure
|
1265
|
+
property to be False which will continue syncing stream even if one encountered an exception.
|
1231
1266
|
"""
|
1267
|
+
update_secrets(["API_KEY_VALUE"])
|
1268
|
+
|
1232
1269
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1233
1270
|
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1234
|
-
s2 = StreamRaisesException()
|
1271
|
+
s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="I was born only to crash like Icarus"))
|
1272
|
+
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1273
|
+
|
1274
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1275
|
+
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|
1276
|
+
|
1277
|
+
src = MockSourceWithStopSyncFalseOverride(streams=[s1, s2, s3])
|
1278
|
+
catalog = ConfiguredAirbyteCatalog(
|
1279
|
+
streams=[
|
1280
|
+
_configured_stream(s1, SyncMode.full_refresh),
|
1281
|
+
_configured_stream(s2, SyncMode.full_refresh),
|
1282
|
+
_configured_stream(s3, SyncMode.full_refresh),
|
1283
|
+
]
|
1284
|
+
)
|
1285
|
+
|
1286
|
+
expected = _fix_emitted_at(
|
1287
|
+
[
|
1288
|
+
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
1289
|
+
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
1290
|
+
*_as_records("s1", stream_output),
|
1291
|
+
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1292
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1293
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1294
|
+
_as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
|
1295
|
+
_as_stream_status("s3", AirbyteStreamStatus.STARTED),
|
1296
|
+
_as_stream_status("s3", AirbyteStreamStatus.RUNNING),
|
1297
|
+
*_as_records("s3", stream_output),
|
1298
|
+
_as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
|
1299
|
+
]
|
1300
|
+
)
|
1301
|
+
|
1302
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
1303
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1304
|
+
messages = _fix_emitted_at(messages)
|
1305
|
+
|
1306
|
+
assert expected == messages
|
1307
|
+
|
1308
|
+
assert "lamentations" in exc.value.message
|
1309
|
+
assert exc.value.failure_type == FailureType.config_error
|
1310
|
+
|
1311
|
+
|
1312
|
+
def test_sync_error_trace_messages_obfuscate_secrets(mocker):
|
1313
|
+
"""
|
1314
|
+
Tests that exceptions emitted as trace messages by a source have secrets properly sanitized
|
1315
|
+
"""
|
1316
|
+
update_secrets(["API_KEY_VALUE"])
|
1317
|
+
|
1318
|
+
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1319
|
+
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1320
|
+
s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="My api_key value API_KEY_VALUE flew too close to the sun."))
|
1235
1321
|
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1236
1322
|
|
1237
1323
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
@@ -1254,15 +1340,73 @@ def test_stop_sync_with_failed_streams(mocker):
|
|
1254
1340
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1255
1341
|
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1256
1342
|
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1343
|
+
_as_error_trace("lamentations", "My api_key value **** flew too close to the sun.", None, FailureType.system_error, None),
|
1344
|
+
_as_stream_status("s3", AirbyteStreamStatus.STARTED),
|
1345
|
+
_as_stream_status("s3", AirbyteStreamStatus.RUNNING),
|
1346
|
+
*_as_records("s3", stream_output),
|
1347
|
+
_as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
|
1257
1348
|
]
|
1258
1349
|
)
|
1259
1350
|
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
# failed streams and that disrupts parsing the generator into the messages emitted before
|
1264
|
-
for message in src.read(logger, {}, catalog):
|
1265
|
-
messages.append(message)
|
1351
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
1352
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1353
|
+
messages = _fix_emitted_at(messages)
|
1266
1354
|
|
1267
|
-
|
1268
|
-
|
1355
|
+
assert expected == messages
|
1356
|
+
|
1357
|
+
assert "lamentations" in exc.value.message
|
1358
|
+
assert exc.value.failure_type == FailureType.config_error
|
1359
|
+
|
1360
|
+
|
1361
|
+
def test_continue_sync_with_failed_streams_with_override_false(mocker):
|
1362
|
+
"""
|
1363
|
+
Tests that running a sync for a connector with multiple streams and stop_sync_on_stream_failure enabled stops
|
1364
|
+
the sync when one stream fails with an error.
|
1365
|
+
"""
|
1366
|
+
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1367
|
+
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1368
|
+
s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
|
1369
|
+
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1370
|
+
|
1371
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1372
|
+
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|
1373
|
+
|
1374
|
+
src = MockSource(streams=[s1, s2, s3])
|
1375
|
+
mocker.patch.object(MockSource, "stop_sync_on_stream_failure", return_value=True)
|
1376
|
+
catalog = ConfiguredAirbyteCatalog(
|
1377
|
+
streams=[
|
1378
|
+
_configured_stream(s1, SyncMode.full_refresh),
|
1379
|
+
_configured_stream(s2, SyncMode.full_refresh),
|
1380
|
+
_configured_stream(s3, SyncMode.full_refresh),
|
1381
|
+
]
|
1382
|
+
)
|
1383
|
+
|
1384
|
+
expected = _fix_emitted_at(
|
1385
|
+
[
|
1386
|
+
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
1387
|
+
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
1388
|
+
*_as_records("s1", stream_output),
|
1389
|
+
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1390
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1391
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1392
|
+
_as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
|
1393
|
+
]
|
1394
|
+
)
|
1395
|
+
|
1396
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
1397
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1398
|
+
messages = _fix_emitted_at(messages)
|
1399
|
+
|
1400
|
+
assert expected == messages
|
1401
|
+
|
1402
|
+
assert "lamentations" in exc.value.message
|
1403
|
+
assert exc.value.failure_type == FailureType.config_error
|
1404
|
+
|
1405
|
+
|
1406
|
+
def _remove_stack_trace(message: AirbyteMessage) -> AirbyteMessage:
|
1407
|
+
"""
|
1408
|
+
Helper method that removes the stack trace from Airbyte trace messages to make asserting against expected records easier
|
1409
|
+
"""
|
1410
|
+
if message.trace and message.trace.error and message.trace.error.stack_trace:
|
1411
|
+
message.trace.error.stack_trace = None
|
1412
|
+
return message
|
@@ -2,7 +2,9 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
import json
|
5
6
|
import os
|
7
|
+
from typing import Any, List, Mapping
|
6
8
|
from unittest import mock
|
7
9
|
from unittest.mock import patch
|
8
10
|
|
@@ -22,9 +24,9 @@ from unit_tests.sources.fixtures.source_test_fixture import (
|
|
22
24
|
"deployment_mode, url_base, expected_records, expected_error",
|
23
25
|
[
|
24
26
|
pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
|
25
|
-
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [],
|
26
|
-
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [],
|
27
|
-
pytest.param("CLOUD", "https://localhost:80/api/v1/", [],
|
27
|
+
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
|
28
|
+
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
|
29
|
+
pytest.param("CLOUD", "https://localhost:80/api/v1/", [], "config_error", id="test_cloud_read_with_localhost"),
|
28
30
|
pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
|
29
31
|
pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
|
30
32
|
],
|
@@ -37,8 +39,10 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
|
|
37
39
|
with mock.patch.object(HttpTestStream, "url_base", url_base):
|
38
40
|
args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
|
39
41
|
if expected_error:
|
40
|
-
with pytest.raises(
|
42
|
+
with pytest.raises(AirbyteTracedException):
|
41
43
|
launch(source, args)
|
44
|
+
messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
|
45
|
+
assert contains_error_trace_message(messages, expected_error)
|
42
46
|
else:
|
43
47
|
launch(source, args)
|
44
48
|
|
@@ -47,14 +51,14 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
|
|
47
51
|
"deployment_mode, token_refresh_url, expected_records, expected_error",
|
48
52
|
[
|
49
53
|
pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
|
50
|
-
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [],
|
51
|
-
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [],
|
54
|
+
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
|
55
|
+
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
|
52
56
|
pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
|
53
57
|
pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
|
54
58
|
],
|
55
59
|
)
|
56
60
|
@patch.object(requests.Session, "send", fixture_mock_send)
|
57
|
-
def test_external_oauth_request_source(deployment_mode, token_refresh_url, expected_records, expected_error):
|
61
|
+
def test_external_oauth_request_source(capsys, deployment_mode, token_refresh_url, expected_records, expected_error):
|
58
62
|
oauth_authenticator = SourceFixtureOauthAuthenticator(
|
59
63
|
client_id="nora", client_secret="hae_sung", refresh_token="arthur", token_refresh_endpoint=token_refresh_url
|
60
64
|
)
|
@@ -63,7 +67,20 @@ def test_external_oauth_request_source(deployment_mode, token_refresh_url, expec
|
|
63
67
|
with mock.patch.dict(os.environ, {"DEPLOYMENT_MODE": deployment_mode}, clear=False): # clear=True clears the existing os.environ dict
|
64
68
|
args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
|
65
69
|
if expected_error:
|
66
|
-
with pytest.raises(
|
70
|
+
with pytest.raises(AirbyteTracedException):
|
67
71
|
launch(source, args)
|
72
|
+
messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
|
73
|
+
assert contains_error_trace_message(messages, expected_error)
|
68
74
|
else:
|
69
75
|
launch(source, args)
|
76
|
+
|
77
|
+
|
78
|
+
def contains_error_trace_message(messages: List[Mapping[str, Any]], expected_error: str) -> bool:
|
79
|
+
for message in messages:
|
80
|
+
if message.get("type") != "TRACE":
|
81
|
+
continue
|
82
|
+
elif message.get("trace").get("type") != "ERROR":
|
83
|
+
continue
|
84
|
+
elif message.get("trace").get("error").get("failure_type") == expected_error:
|
85
|
+
return True
|
86
|
+
return False
|
@@ -343,7 +343,7 @@ def test_concurrent_source_yields_the_same_messages_as_abstract_source_when_an_e
|
|
343
343
|
source, concurrent_source = _init_sources([stream_slice_to_partition], state, logger)
|
344
344
|
config = {}
|
345
345
|
catalog = _create_configured_catalog(source._streams)
|
346
|
-
messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state,
|
346
|
+
messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state, AirbyteTracedException)
|
347
347
|
messages_from_concurrent_source = _read_from_source(concurrent_source, logger, config, catalog, state, RuntimeError)
|
348
348
|
|
349
349
|
expected_messages = [
|
File without changes
|
File without changes
|
File without changes
|