airbyte-cdk 0.62.2__py3-none-any.whl → 0.63.1__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- airbyte_cdk/sources/abstract_source.py +34 -13
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +3 -0
- airbyte_cdk/utils/traced_exception.py +16 -1
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/METADATA +1 -1
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/RECORD +11 -11
- unit_tests/sources/test_abstract_source.py +173 -29
- unit_tests/sources/test_integration_source.py +25 -8
- unit_tests/sources/test_source_read.py +1 -1
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/WHEEL +0 -0
- {airbyte_cdk-0.62.2.dist-info → airbyte_cdk-0.63.1.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,9 @@ from airbyte_cdk.models import (
|
|
14
14
|
AirbyteStreamStatus,
|
15
15
|
ConfiguredAirbyteCatalog,
|
16
16
|
ConfiguredAirbyteStream,
|
17
|
+
FailureType,
|
17
18
|
Status,
|
19
|
+
StreamDescriptor,
|
18
20
|
SyncMode,
|
19
21
|
)
|
20
22
|
from airbyte_cdk.models import Type as MessageType
|
@@ -27,6 +29,7 @@ from airbyte_cdk.sources.streams.http.http import HttpStream
|
|
27
29
|
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
28
30
|
from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
|
29
31
|
from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
|
32
|
+
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
30
33
|
from airbyte_cdk.utils.event_timing import create_timer
|
31
34
|
from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
|
32
35
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
@@ -133,11 +136,16 @@ class AbstractSource(Source, ABC):
|
|
133
136
|
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
134
137
|
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
|
135
138
|
except AirbyteTracedException as e:
|
139
|
+
logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
|
140
|
+
logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
|
136
141
|
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
142
|
+
yield e.as_sanitized_airbyte_message(stream_descriptor=StreamDescriptor(name=configured_stream.stream.name))
|
143
|
+
stream_name_to_exception[stream_instance.name] = e
|
144
|
+
if self.stop_sync_on_stream_failure:
|
145
|
+
logger.info(
|
146
|
+
f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
|
147
|
+
)
|
148
|
+
break
|
141
149
|
except Exception as e:
|
142
150
|
yield from self._emit_queued_messages()
|
143
151
|
logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
|
@@ -145,15 +153,28 @@ class AbstractSource(Source, ABC):
|
|
145
153
|
yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
|
146
154
|
display_message = stream_instance.get_error_display_message(e)
|
147
155
|
if display_message:
|
148
|
-
|
149
|
-
|
156
|
+
traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
|
157
|
+
else:
|
158
|
+
traced_exception = AirbyteTracedException.from_exception(e)
|
159
|
+
yield traced_exception.as_sanitized_airbyte_message(
|
160
|
+
stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
|
161
|
+
)
|
162
|
+
stream_name_to_exception[stream_instance.name] = traced_exception
|
163
|
+
if self.stop_sync_on_stream_failure:
|
164
|
+
logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
|
165
|
+
break
|
150
166
|
finally:
|
151
167
|
timer.finish_event()
|
152
168
|
logger.info(f"Finished syncing {configured_stream.stream.name}")
|
153
169
|
logger.info(timer.report())
|
154
170
|
|
155
|
-
if
|
156
|
-
|
171
|
+
if len(stream_name_to_exception) > 0:
|
172
|
+
error_message = self._generate_failed_streams_error_message(stream_name_to_exception)
|
173
|
+
logger.info(error_message)
|
174
|
+
# We still raise at least one exception when a stream raises an exception because the platform currently relies
|
175
|
+
# on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
|
176
|
+
# type because this combined error isn't actionable, but rather the previously emitted individual errors.
|
177
|
+
raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
|
157
178
|
logger.info(f"Finished syncing {self.name}")
|
158
179
|
|
159
180
|
@property
|
@@ -282,17 +303,17 @@ class AbstractSource(Source, ABC):
|
|
282
303
|
return _default_message_repository
|
283
304
|
|
284
305
|
@property
|
285
|
-
def
|
306
|
+
def stop_sync_on_stream_failure(self) -> bool:
|
286
307
|
"""
|
287
308
|
WARNING: This function is in-development which means it is subject to change. Use at your own risk.
|
288
309
|
|
289
|
-
By default, a source
|
290
|
-
|
291
|
-
|
310
|
+
By default, when a source encounters an exception while syncing a stream, it will emit an error trace message and then
|
311
|
+
continue syncing the next stream. This can be overwritten on a per-source basis so that the source will stop the sync
|
312
|
+
on the first error seen and emit a single error trace message for that stream.
|
292
313
|
"""
|
293
314
|
return False
|
294
315
|
|
295
316
|
@staticmethod
|
296
317
|
def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
|
297
|
-
failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
|
318
|
+
failures = ", ".join([f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exception in stream_failures.items()])
|
298
319
|
return f"During the sync, the following streams did not sync successfully: {failures}"
|
@@ -243,6 +243,9 @@ class FileBasedStreamPartition(Partition):
|
|
243
243
|
data_to_return = dict(record_data)
|
244
244
|
self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
|
245
245
|
yield Record(data_to_return, self.stream_name())
|
246
|
+
elif isinstance(record_data, AirbyteMessage) and record_data.type == Type.RECORD:
|
247
|
+
# `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
|
248
|
+
yield Record(record_data.record.data, self.stream_name())
|
246
249
|
else:
|
247
250
|
self._message_repository.emit_message(record_data)
|
248
251
|
except Exception as e:
|
@@ -13,6 +13,7 @@ from airbyte_cdk.models import (
|
|
13
13
|
AirbyteTraceMessage,
|
14
14
|
FailureType,
|
15
15
|
Status,
|
16
|
+
StreamDescriptor,
|
16
17
|
TraceType,
|
17
18
|
)
|
18
19
|
from airbyte_cdk.models import Type as MessageType
|
@@ -43,7 +44,7 @@ class AirbyteTracedException(Exception):
|
|
43
44
|
self._exception = exception
|
44
45
|
super().__init__(internal_message)
|
45
46
|
|
46
|
-
def as_airbyte_message(self) -> AirbyteMessage:
|
47
|
+
def as_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
|
47
48
|
"""
|
48
49
|
Builds an AirbyteTraceMessage from the exception
|
49
50
|
"""
|
@@ -60,6 +61,7 @@ class AirbyteTracedException(Exception):
|
|
60
61
|
internal_message=self.internal_message,
|
61
62
|
failure_type=self.failure_type,
|
62
63
|
stack_trace=stack_trace_str,
|
64
|
+
stream_descriptor=stream_descriptor,
|
63
65
|
),
|
64
66
|
)
|
65
67
|
|
@@ -88,3 +90,16 @@ class AirbyteTracedException(Exception):
|
|
88
90
|
:param exc: the exception that caused the error
|
89
91
|
"""
|
90
92
|
return cls(internal_message=str(exc), exception=exc, *args, **kwargs) # type: ignore # ignoring because of args and kwargs
|
93
|
+
|
94
|
+
def as_sanitized_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
|
95
|
+
"""
|
96
|
+
Builds an AirbyteTraceMessage from the exception and sanitizes any secrets from the message body
|
97
|
+
"""
|
98
|
+
error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor)
|
99
|
+
if error_message.trace.error.message:
|
100
|
+
error_message.trace.error.message = filter_secrets(error_message.trace.error.message)
|
101
|
+
if error_message.trace.error.internal_message:
|
102
|
+
error_message.trace.error.internal_message = filter_secrets(error_message.trace.error.internal_message)
|
103
|
+
if error_message.trace.error.stack_trace:
|
104
|
+
error_message.trace.error.stack_trace = filter_secrets(error_message.trace.error.stack_trace)
|
105
|
+
return error_message
|
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
|
|
24
24
|
airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
|
25
25
|
airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
|
26
26
|
airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
|
27
|
-
airbyte_cdk/sources/abstract_source.py,sha256=
|
27
|
+
airbyte_cdk/sources/abstract_source.py,sha256=Gie6CY-WztnUtOahoyMRlV8ON48eDIzjVG6fUKwCqvw,16127
|
28
28
|
airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
|
29
29
|
airbyte_cdk/sources/connector_state_manager.py,sha256=p9iwWbb5uqRbsrHsdZBMXKmyHgLVbsOcV3QQexBFnPE,11052
|
30
30
|
airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
|
@@ -186,7 +186,7 @@ airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaF
|
|
186
186
|
airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=cmO1SQt5PIQRNNoh2KBv6aeY8NEY9x2dlmiRwGwU1vg,6557
|
187
187
|
airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=qS0DJzXlVew6armFDJ0eNcSxRCmkA7JWQYFl6gcv3dU,13113
|
188
188
|
airbyte_cdk/sources/file_based/stream/concurrent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
189
|
-
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=
|
189
|
+
airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=rjf8htUotdAXWSGcFA0jFHJfaai_EnmQxncnxMWTN2A,13320
|
190
190
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py,sha256=WKEYXZwSla6xwp7k1mnyG3kl9xCzEZ9B3eE-cxIuzIM,310
|
191
191
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py,sha256=UYLE2A2RdV-5FaQ70naZZWY34l5AEJkIRlTH05-e_-k,1961
|
192
192
|
airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py,sha256=jHiej28aKQJ3UmWXQxHRCK8xkzY5H0-zxQiVqFs5rAI,14389
|
@@ -269,7 +269,7 @@ airbyte_cdk/utils/oneof_option_config.py,sha256=N8EmWdYdwt0FM7fuShh6H8nj_r4KEL9t
|
|
269
269
|
airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
|
270
270
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=LGjSSk8lmBiC0GiHqxDwu_iMN6bCe05UMpz9e7nCw5E,741
|
271
271
|
airbyte_cdk/utils/stream_status_utils.py,sha256=k7OY6AkJW8ifyh7ZYetC5Yy1nxM6Mx3apOAviCjJh80,971
|
272
|
-
airbyte_cdk/utils/traced_exception.py,sha256=
|
272
|
+
airbyte_cdk/utils/traced_exception.py,sha256=IDYvUkbgkOMjusiuP0xU65mHzl5nLDkhA3o-FvNDfjI,4336
|
273
273
|
source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
274
274
|
source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
|
275
275
|
unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
@@ -282,14 +282,14 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
282
282
|
unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
|
283
283
|
unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
|
284
284
|
unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
285
|
-
unit_tests/sources/test_abstract_source.py,sha256=
|
285
|
+
unit_tests/sources/test_abstract_source.py,sha256=wIcMNIB66bQnYnC8LY_OPLUibBL7fSg9Lm6jDqOWb4g,59544
|
286
286
|
unit_tests/sources/test_concurrent_source.py,sha256=3i7pSRetKSoP6LBpXyuXpWi2_VOwta_aTm_kgnDaLqk,3704
|
287
287
|
unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
|
288
288
|
unit_tests/sources/test_connector_state_manager.py,sha256=KAvYmuaWwg2kSnPNKri6Ne8TmLpsSimotsnDLLKkDD0,24369
|
289
289
|
unit_tests/sources/test_http_logger.py,sha256=VT6DqgspI3DcRnoBQkkQX0z4dF_AOiYZ5P_zxmMW8oU,9004
|
290
|
-
unit_tests/sources/test_integration_source.py,sha256=
|
290
|
+
unit_tests/sources/test_integration_source.py,sha256=qcWld9evB1rAjALWX8SDshGz7seYkN3HCamQ6KQ2Idw,4269
|
291
291
|
unit_tests/sources/test_source.py,sha256=W0I4umL_d_OToLYYiRkjkJR6e-cCYjdV8zKc3uLvF0k,27999
|
292
|
-
unit_tests/sources/test_source_read.py,sha256=
|
292
|
+
unit_tests/sources/test_source_read.py,sha256=n9XpVQLfsQH8eh6D99MDiNVBBKcf6UtouThDJcGH6SU,17186
|
293
293
|
unit_tests/sources/concurrent_source/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
294
294
|
unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py,sha256=zsGnMcEsBedjW8wahil6LNqniil-3NXhyZd5W-80Km0,3665
|
295
295
|
unit_tests/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
|
@@ -456,8 +456,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
|
|
456
456
|
unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
|
457
457
|
unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
|
458
458
|
unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
|
459
|
-
airbyte_cdk-0.
|
460
|
-
airbyte_cdk-0.
|
461
|
-
airbyte_cdk-0.
|
462
|
-
airbyte_cdk-0.
|
463
|
-
airbyte_cdk-0.
|
459
|
+
airbyte_cdk-0.63.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
460
|
+
airbyte_cdk-0.63.1.dist-info/METADATA,sha256=-pgcWwY9lrlm3-6C9BxburyjPpElSbKPAep02SEB-hk,11073
|
461
|
+
airbyte_cdk-0.63.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
462
|
+
airbyte_cdk-0.63.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
|
463
|
+
airbyte_cdk-0.63.1.dist-info/RECORD,,
|
@@ -13,6 +13,7 @@ import pytest
|
|
13
13
|
from airbyte_cdk.models import (
|
14
14
|
AirbyteCatalog,
|
15
15
|
AirbyteConnectionStatus,
|
16
|
+
AirbyteErrorTraceMessage,
|
16
17
|
AirbyteLogMessage,
|
17
18
|
AirbyteMessage,
|
18
19
|
AirbyteRecordMessage,
|
@@ -27,6 +28,7 @@ from airbyte_cdk.models import (
|
|
27
28
|
ConfiguredAirbyteCatalog,
|
28
29
|
ConfiguredAirbyteStream,
|
29
30
|
DestinationSyncMode,
|
31
|
+
FailureType,
|
30
32
|
Level,
|
31
33
|
Status,
|
32
34
|
StreamDescriptor,
|
@@ -40,6 +42,7 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
|
|
40
42
|
from airbyte_cdk.sources.message import MessageRepository
|
41
43
|
from airbyte_cdk.sources.streams import IncrementalMixin, Stream
|
42
44
|
from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
|
45
|
+
from airbyte_cdk.utils.airbyte_secrets_utils import update_secrets
|
43
46
|
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
|
44
47
|
from pytest import fixture
|
45
48
|
|
@@ -54,12 +57,14 @@ class MockSource(AbstractSource):
|
|
54
57
|
per_stream: bool = True,
|
55
58
|
message_repository: MessageRepository = None,
|
56
59
|
exception_on_missing_stream: bool = True,
|
60
|
+
stop_sync_on_stream_failure: bool = False,
|
57
61
|
):
|
58
62
|
self._streams = streams
|
59
63
|
self.check_lambda = check_lambda
|
60
64
|
self.per_stream = per_stream
|
61
65
|
self.exception_on_missing_stream = exception_on_missing_stream
|
62
66
|
self._message_repository = message_repository
|
67
|
+
self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
|
63
68
|
|
64
69
|
def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
|
65
70
|
if self.check_lambda:
|
@@ -84,6 +89,12 @@ class MockSource(AbstractSource):
|
|
84
89
|
return self._message_repository
|
85
90
|
|
86
91
|
|
92
|
+
class MockSourceWithStopSyncFalseOverride(MockSource):
|
93
|
+
@property
|
94
|
+
def stop_sync_on_stream_failure(self) -> bool:
|
95
|
+
return False
|
96
|
+
|
97
|
+
|
87
98
|
class StreamNoStateMethod(Stream):
|
88
99
|
name = "managers"
|
89
100
|
primary_key = None
|
@@ -115,8 +126,11 @@ class StreamRaisesException(Stream):
|
|
115
126
|
name = "lamentations"
|
116
127
|
primary_key = None
|
117
128
|
|
129
|
+
def __init__(self, exception_to_raise):
|
130
|
+
self._exception_to_raise = exception_to_raise
|
131
|
+
|
118
132
|
def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
|
119
|
-
raise
|
133
|
+
raise self._exception_to_raise
|
120
134
|
|
121
135
|
|
122
136
|
MESSAGE_FROM_REPOSITORY = Mock()
|
@@ -291,7 +305,7 @@ def test_read_stream_emits_repository_message_on_error(mocker, message_repositor
|
|
291
305
|
|
292
306
|
source = MockSource(streams=[stream], message_repository=message_repository)
|
293
307
|
|
294
|
-
with pytest.raises(
|
308
|
+
with pytest.raises(AirbyteTracedException):
|
295
309
|
messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
|
296
310
|
assert MESSAGE_FROM_REPOSITORY in messages
|
297
311
|
|
@@ -306,14 +320,14 @@ def test_read_stream_with_error_gets_display_message(mocker):
|
|
306
320
|
catalog = ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])
|
307
321
|
|
308
322
|
# without get_error_display_message
|
309
|
-
with pytest.raises(
|
323
|
+
with pytest.raises(AirbyteTracedException):
|
310
324
|
list(source.read(logger, {}, catalog))
|
311
325
|
|
312
326
|
mocker.patch.object(MockStream, "get_error_display_message", return_value="my message")
|
313
327
|
|
314
|
-
with pytest.raises(AirbyteTracedException
|
328
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
315
329
|
list(source.read(logger, {}, catalog))
|
316
|
-
assert exc.value.message
|
330
|
+
assert "oh no!" in exc.value.message
|
317
331
|
|
318
332
|
|
319
333
|
GLOBAL_EMITTED_AT = 1
|
@@ -358,6 +372,22 @@ def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_stat
|
|
358
372
|
return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data))
|
359
373
|
|
360
374
|
|
375
|
+
def _as_error_trace(stream: str, error_message: str, internal_message: Optional[str], failure_type: Optional[FailureType], stack_trace: Optional[str]) -> AirbyteMessage:
|
376
|
+
trace_message = AirbyteTraceMessage(
|
377
|
+
emitted_at=datetime.datetime.now().timestamp() * 1000.0,
|
378
|
+
type=TraceType.ERROR,
|
379
|
+
error=AirbyteErrorTraceMessage(
|
380
|
+
stream_descriptor=StreamDescriptor(name=stream),
|
381
|
+
message=error_message,
|
382
|
+
internal_message=internal_message,
|
383
|
+
failure_type=failure_type,
|
384
|
+
stack_trace=stack_trace,
|
385
|
+
),
|
386
|
+
)
|
387
|
+
|
388
|
+
return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
|
389
|
+
|
390
|
+
|
361
391
|
def _configured_stream(stream: Stream, sync_mode: SyncMode):
|
362
392
|
return ConfiguredAirbyteStream(
|
363
393
|
stream=stream.as_airbyte_stream(),
|
@@ -1174,21 +1204,27 @@ def test_checkpoint_state_from_stream_instance():
|
|
1174
1204
|
)
|
1175
1205
|
|
1176
1206
|
|
1177
|
-
|
1207
|
+
@pytest.mark.parametrize(
|
1208
|
+
"exception_to_raise,expected_error_message,expected_internal_message",
|
1209
|
+
[
|
1210
|
+
pytest.param(AirbyteTracedException(message="I was born only to crash like Icarus"), "I was born only to crash like Icarus", None, id="test_raises_traced_exception"),
|
1211
|
+
pytest.param(Exception("Generic connector error message"), "Something went wrong in the connector. See the logs for more details.", "Generic connector error message", id="test_raises_generic_exception"),
|
1212
|
+
]
|
1213
|
+
)
|
1214
|
+
def test_continue_sync_with_failed_streams(mocker, exception_to_raise, expected_error_message, expected_internal_message):
|
1178
1215
|
"""
|
1179
|
-
Tests that running a sync for a connector with multiple streams
|
1180
|
-
|
1216
|
+
Tests that running a sync for a connector with multiple streams will continue syncing when one stream fails
|
1217
|
+
with an error. This source does not override the default behavior defined in the AbstractSource class.
|
1181
1218
|
"""
|
1182
1219
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1183
1220
|
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1184
|
-
s2 = StreamRaisesException()
|
1221
|
+
s2 = StreamRaisesException(exception_to_raise=exception_to_raise)
|
1185
1222
|
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1186
1223
|
|
1187
1224
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1188
1225
|
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|
1189
1226
|
|
1190
1227
|
src = MockSource(streams=[s1, s2, s3])
|
1191
|
-
mocker.patch.object(MockSource, "continue_sync_on_stream_failure", return_value=True)
|
1192
1228
|
catalog = ConfiguredAirbyteCatalog(
|
1193
1229
|
streams=[
|
1194
1230
|
_configured_stream(s1, SyncMode.full_refresh),
|
@@ -1205,6 +1241,7 @@ def test_continue_sync_with_failed_streams(mocker):
|
|
1205
1241
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1206
1242
|
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1207
1243
|
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1244
|
+
_as_error_trace("lamentations", expected_error_message, expected_internal_message, FailureType.system_error, None),
|
1208
1245
|
_as_stream_status("s3", AirbyteStreamStatus.STARTED),
|
1209
1246
|
_as_stream_status("s3", AirbyteStreamStatus.RUNNING),
|
1210
1247
|
*_as_records("s3", stream_output),
|
@@ -1212,26 +1249,75 @@ def test_continue_sync_with_failed_streams(mocker):
|
|
1212
1249
|
]
|
1213
1250
|
)
|
1214
1251
|
|
1215
|
-
messages = []
|
1216
1252
|
with pytest.raises(AirbyteTracedException) as exc:
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1253
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1254
|
+
messages = _fix_emitted_at(messages)
|
1255
|
+
|
1256
|
+
assert expected == messages
|
1221
1257
|
|
1222
|
-
messages = _fix_emitted_at(messages)
|
1223
|
-
assert expected == messages
|
1224
1258
|
assert "lamentations" in exc.value.message
|
1259
|
+
assert exc.value.failure_type == FailureType.config_error
|
1225
1260
|
|
1226
1261
|
|
1227
|
-
def
|
1262
|
+
def test_continue_sync_source_override_false(mocker):
|
1228
1263
|
"""
|
1229
|
-
Tests that running a sync for a connector
|
1230
|
-
|
1264
|
+
Tests that running a sync for a connector explicitly overriding the default AbstractSource.stop_sync_on_stream_failure
|
1265
|
+
property to be False which will continue syncing stream even if one encountered an exception.
|
1231
1266
|
"""
|
1267
|
+
update_secrets(["API_KEY_VALUE"])
|
1268
|
+
|
1232
1269
|
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1233
1270
|
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1234
|
-
s2 = StreamRaisesException()
|
1271
|
+
s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="I was born only to crash like Icarus"))
|
1272
|
+
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1273
|
+
|
1274
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1275
|
+
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|
1276
|
+
|
1277
|
+
src = MockSourceWithStopSyncFalseOverride(streams=[s1, s2, s3])
|
1278
|
+
catalog = ConfiguredAirbyteCatalog(
|
1279
|
+
streams=[
|
1280
|
+
_configured_stream(s1, SyncMode.full_refresh),
|
1281
|
+
_configured_stream(s2, SyncMode.full_refresh),
|
1282
|
+
_configured_stream(s3, SyncMode.full_refresh),
|
1283
|
+
]
|
1284
|
+
)
|
1285
|
+
|
1286
|
+
expected = _fix_emitted_at(
|
1287
|
+
[
|
1288
|
+
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
1289
|
+
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
1290
|
+
*_as_records("s1", stream_output),
|
1291
|
+
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1292
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1293
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1294
|
+
_as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
|
1295
|
+
_as_stream_status("s3", AirbyteStreamStatus.STARTED),
|
1296
|
+
_as_stream_status("s3", AirbyteStreamStatus.RUNNING),
|
1297
|
+
*_as_records("s3", stream_output),
|
1298
|
+
_as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
|
1299
|
+
]
|
1300
|
+
)
|
1301
|
+
|
1302
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
1303
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1304
|
+
messages = _fix_emitted_at(messages)
|
1305
|
+
|
1306
|
+
assert expected == messages
|
1307
|
+
|
1308
|
+
assert "lamentations" in exc.value.message
|
1309
|
+
assert exc.value.failure_type == FailureType.config_error
|
1310
|
+
|
1311
|
+
|
1312
|
+
def test_sync_error_trace_messages_obfuscate_secrets(mocker):
|
1313
|
+
"""
|
1314
|
+
Tests that exceptions emitted as trace messages by a source have secrets properly sanitized
|
1315
|
+
"""
|
1316
|
+
update_secrets(["API_KEY_VALUE"])
|
1317
|
+
|
1318
|
+
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1319
|
+
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1320
|
+
s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="My api_key value API_KEY_VALUE flew too close to the sun."))
|
1235
1321
|
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1236
1322
|
|
1237
1323
|
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
@@ -1254,15 +1340,73 @@ def test_stop_sync_with_failed_streams(mocker):
|
|
1254
1340
|
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1255
1341
|
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1256
1342
|
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1343
|
+
_as_error_trace("lamentations", "My api_key value **** flew too close to the sun.", None, FailureType.system_error, None),
|
1344
|
+
_as_stream_status("s3", AirbyteStreamStatus.STARTED),
|
1345
|
+
_as_stream_status("s3", AirbyteStreamStatus.RUNNING),
|
1346
|
+
*_as_records("s3", stream_output),
|
1347
|
+
_as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
|
1257
1348
|
]
|
1258
1349
|
)
|
1259
1350
|
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
# failed streams and that disrupts parsing the generator into the messages emitted before
|
1264
|
-
for message in src.read(logger, {}, catalog):
|
1265
|
-
messages.append(message)
|
1351
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
1352
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1353
|
+
messages = _fix_emitted_at(messages)
|
1266
1354
|
|
1267
|
-
|
1268
|
-
|
1355
|
+
assert expected == messages
|
1356
|
+
|
1357
|
+
assert "lamentations" in exc.value.message
|
1358
|
+
assert exc.value.failure_type == FailureType.config_error
|
1359
|
+
|
1360
|
+
|
1361
|
+
def test_continue_sync_with_failed_streams_with_override_false(mocker):
|
1362
|
+
"""
|
1363
|
+
Tests that running a sync for a connector with multiple streams and stop_sync_on_stream_failure enabled stops
|
1364
|
+
the sync when one stream fails with an error.
|
1365
|
+
"""
|
1366
|
+
stream_output = [{"k1": "v1"}, {"k2": "v2"}]
|
1367
|
+
s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
|
1368
|
+
s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
|
1369
|
+
s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
|
1370
|
+
|
1371
|
+
mocker.patch.object(MockStream, "get_json_schema", return_value={})
|
1372
|
+
mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
|
1373
|
+
|
1374
|
+
src = MockSource(streams=[s1, s2, s3])
|
1375
|
+
mocker.patch.object(MockSource, "stop_sync_on_stream_failure", return_value=True)
|
1376
|
+
catalog = ConfiguredAirbyteCatalog(
|
1377
|
+
streams=[
|
1378
|
+
_configured_stream(s1, SyncMode.full_refresh),
|
1379
|
+
_configured_stream(s2, SyncMode.full_refresh),
|
1380
|
+
_configured_stream(s3, SyncMode.full_refresh),
|
1381
|
+
]
|
1382
|
+
)
|
1383
|
+
|
1384
|
+
expected = _fix_emitted_at(
|
1385
|
+
[
|
1386
|
+
_as_stream_status("s1", AirbyteStreamStatus.STARTED),
|
1387
|
+
_as_stream_status("s1", AirbyteStreamStatus.RUNNING),
|
1388
|
+
*_as_records("s1", stream_output),
|
1389
|
+
_as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
|
1390
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
|
1391
|
+
_as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
|
1392
|
+
_as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
|
1393
|
+
]
|
1394
|
+
)
|
1395
|
+
|
1396
|
+
with pytest.raises(AirbyteTracedException) as exc:
|
1397
|
+
messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
|
1398
|
+
messages = _fix_emitted_at(messages)
|
1399
|
+
|
1400
|
+
assert expected == messages
|
1401
|
+
|
1402
|
+
assert "lamentations" in exc.value.message
|
1403
|
+
assert exc.value.failure_type == FailureType.config_error
|
1404
|
+
|
1405
|
+
|
1406
|
+
def _remove_stack_trace(message: AirbyteMessage) -> AirbyteMessage:
|
1407
|
+
"""
|
1408
|
+
Helper method that removes the stack trace from Airbyte trace messages to make asserting against expected records easier
|
1409
|
+
"""
|
1410
|
+
if message.trace and message.trace.error and message.trace.error.stack_trace:
|
1411
|
+
message.trace.error.stack_trace = None
|
1412
|
+
return message
|
@@ -2,7 +2,9 @@
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
3
|
#
|
4
4
|
|
5
|
+
import json
|
5
6
|
import os
|
7
|
+
from typing import Any, List, Mapping
|
6
8
|
from unittest import mock
|
7
9
|
from unittest.mock import patch
|
8
10
|
|
@@ -22,9 +24,9 @@ from unit_tests.sources.fixtures.source_test_fixture import (
|
|
22
24
|
"deployment_mode, url_base, expected_records, expected_error",
|
23
25
|
[
|
24
26
|
pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
|
25
|
-
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [],
|
26
|
-
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [],
|
27
|
-
pytest.param("CLOUD", "https://localhost:80/api/v1/", [],
|
27
|
+
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
|
28
|
+
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
|
29
|
+
pytest.param("CLOUD", "https://localhost:80/api/v1/", [], "config_error", id="test_cloud_read_with_localhost"),
|
28
30
|
pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
|
29
31
|
pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
|
30
32
|
],
|
@@ -37,8 +39,10 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
|
|
37
39
|
with mock.patch.object(HttpTestStream, "url_base", url_base):
|
38
40
|
args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
|
39
41
|
if expected_error:
|
40
|
-
with pytest.raises(
|
42
|
+
with pytest.raises(AirbyteTracedException):
|
41
43
|
launch(source, args)
|
44
|
+
messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
|
45
|
+
assert contains_error_trace_message(messages, expected_error)
|
42
46
|
else:
|
43
47
|
launch(source, args)
|
44
48
|
|
@@ -47,14 +51,14 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
|
|
47
51
|
"deployment_mode, token_refresh_url, expected_records, expected_error",
|
48
52
|
[
|
49
53
|
pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
|
50
|
-
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [],
|
51
|
-
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [],
|
54
|
+
pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
|
55
|
+
pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
|
52
56
|
pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
|
53
57
|
pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
|
54
58
|
],
|
55
59
|
)
|
56
60
|
@patch.object(requests.Session, "send", fixture_mock_send)
|
57
|
-
def test_external_oauth_request_source(deployment_mode, token_refresh_url, expected_records, expected_error):
|
61
|
+
def test_external_oauth_request_source(capsys, deployment_mode, token_refresh_url, expected_records, expected_error):
|
58
62
|
oauth_authenticator = SourceFixtureOauthAuthenticator(
|
59
63
|
client_id="nora", client_secret="hae_sung", refresh_token="arthur", token_refresh_endpoint=token_refresh_url
|
60
64
|
)
|
@@ -63,7 +67,20 @@ def test_external_oauth_request_source(deployment_mode, token_refresh_url, expec
|
|
63
67
|
with mock.patch.dict(os.environ, {"DEPLOYMENT_MODE": deployment_mode}, clear=False): # clear=True clears the existing os.environ dict
|
64
68
|
args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
|
65
69
|
if expected_error:
|
66
|
-
with pytest.raises(
|
70
|
+
with pytest.raises(AirbyteTracedException):
|
67
71
|
launch(source, args)
|
72
|
+
messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
|
73
|
+
assert contains_error_trace_message(messages, expected_error)
|
68
74
|
else:
|
69
75
|
launch(source, args)
|
76
|
+
|
77
|
+
|
78
|
+
def contains_error_trace_message(messages: List[Mapping[str, Any]], expected_error: str) -> bool:
|
79
|
+
for message in messages:
|
80
|
+
if message.get("type") != "TRACE":
|
81
|
+
continue
|
82
|
+
elif message.get("trace").get("type") != "ERROR":
|
83
|
+
continue
|
84
|
+
elif message.get("trace").get("error").get("failure_type") == expected_error:
|
85
|
+
return True
|
86
|
+
return False
|
@@ -343,7 +343,7 @@ def test_concurrent_source_yields_the_same_messages_as_abstract_source_when_an_e
|
|
343
343
|
source, concurrent_source = _init_sources([stream_slice_to_partition], state, logger)
|
344
344
|
config = {}
|
345
345
|
catalog = _create_configured_catalog(source._streams)
|
346
|
-
messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state,
|
346
|
+
messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state, AirbyteTracedException)
|
347
347
|
messages_from_concurrent_source = _read_from_source(concurrent_source, logger, config, catalog, state, RuntimeError)
|
348
348
|
|
349
349
|
expected_messages = [
|
File without changes
|
File without changes
|
File without changes
|