airbyte-cdk 0.62.2__py3-none-any.whl → 0.63.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -14,7 +14,9 @@ from airbyte_cdk.models import (
14
14
  AirbyteStreamStatus,
15
15
  ConfiguredAirbyteCatalog,
16
16
  ConfiguredAirbyteStream,
17
+ FailureType,
17
18
  Status,
19
+ StreamDescriptor,
18
20
  SyncMode,
19
21
  )
20
22
  from airbyte_cdk.models import Type as MessageType
@@ -27,6 +29,7 @@ from airbyte_cdk.sources.streams.http.http import HttpStream
27
29
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
28
30
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
29
31
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
32
+ from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
30
33
  from airbyte_cdk.utils.event_timing import create_timer
31
34
  from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
32
35
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
@@ -133,11 +136,16 @@ class AbstractSource(Source, ABC):
133
136
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
134
137
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
135
138
  except AirbyteTracedException as e:
139
+ logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
140
+ logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
136
141
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
137
- if self.continue_sync_on_stream_failure:
138
- stream_name_to_exception[stream_instance.name] = e
139
- else:
140
- raise e
142
+ yield e.as_sanitized_airbyte_message(stream_descriptor=StreamDescriptor(name=configured_stream.stream.name))
143
+ stream_name_to_exception[stream_instance.name] = e
144
+ if self.stop_sync_on_stream_failure:
145
+ logger.info(
146
+ f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
147
+ )
148
+ break
141
149
  except Exception as e:
142
150
  yield from self._emit_queued_messages()
143
151
  logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
@@ -145,15 +153,28 @@ class AbstractSource(Source, ABC):
145
153
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
146
154
  display_message = stream_instance.get_error_display_message(e)
147
155
  if display_message:
148
- raise AirbyteTracedException.from_exception(e, message=display_message) from e
149
- raise e
156
+ traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
157
+ else:
158
+ traced_exception = AirbyteTracedException.from_exception(e)
159
+ yield traced_exception.as_sanitized_airbyte_message(
160
+ stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
161
+ )
162
+ stream_name_to_exception[stream_instance.name] = traced_exception
163
+ if self.stop_sync_on_stream_failure:
164
+ logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
165
+ break
150
166
  finally:
151
167
  timer.finish_event()
152
168
  logger.info(f"Finished syncing {configured_stream.stream.name}")
153
169
  logger.info(timer.report())
154
170
 
155
- if self.continue_sync_on_stream_failure and len(stream_name_to_exception) > 0:
156
- raise AirbyteTracedException(message=self._generate_failed_streams_error_message(stream_name_to_exception))
171
+ if len(stream_name_to_exception) > 0:
172
+ error_message = self._generate_failed_streams_error_message(stream_name_to_exception)
173
+ logger.info(error_message)
174
+ # We still raise at least one exception when a stream raises an exception because the platform currently relies
175
+ # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
176
+ # type because this combined error isn't actionable, but rather the previously emitted individual errors.
177
+ raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
157
178
  logger.info(f"Finished syncing {self.name}")
158
179
 
159
180
  @property
@@ -282,17 +303,17 @@ class AbstractSource(Source, ABC):
282
303
  return _default_message_repository
283
304
 
284
305
  @property
285
- def continue_sync_on_stream_failure(self) -> bool:
306
+ def stop_sync_on_stream_failure(self) -> bool:
286
307
  """
287
308
  WARNING: This function is in-development which means it is subject to change. Use at your own risk.
288
309
 
289
- By default, a source should raise an exception and stop the sync when it encounters an error while syncing a stream. This
290
- method can be overridden on a per-source basis so that a source will continue syncing streams other streams even if an
291
- exception is raised for a stream.
310
+ By default, when a source encounters an exception while syncing a stream, it will emit an error trace message and then
311
+ continue syncing the next stream. This can be overwritten on a per-source basis so that the source will stop the sync
312
+ on the first error seen and emit a single error trace message for that stream.
292
313
  """
293
314
  return False
294
315
 
295
316
  @staticmethod
296
317
  def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
297
- failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
318
+ failures = ", ".join([f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exception in stream_failures.items()])
298
319
  return f"During the sync, the following streams did not sync successfully: {failures}"
@@ -243,6 +243,9 @@ class FileBasedStreamPartition(Partition):
243
243
  data_to_return = dict(record_data)
244
244
  self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
245
245
  yield Record(data_to_return, self.stream_name())
246
+ elif isinstance(record_data, AirbyteMessage) and record_data.type == Type.RECORD:
247
+ # `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
248
+ yield Record(record_data.record.data, self.stream_name())
246
249
  else:
247
250
  self._message_repository.emit_message(record_data)
248
251
  except Exception as e:
@@ -13,6 +13,7 @@ from airbyte_cdk.models import (
13
13
  AirbyteTraceMessage,
14
14
  FailureType,
15
15
  Status,
16
+ StreamDescriptor,
16
17
  TraceType,
17
18
  )
18
19
  from airbyte_cdk.models import Type as MessageType
@@ -43,7 +44,7 @@ class AirbyteTracedException(Exception):
43
44
  self._exception = exception
44
45
  super().__init__(internal_message)
45
46
 
46
- def as_airbyte_message(self) -> AirbyteMessage:
47
+ def as_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
47
48
  """
48
49
  Builds an AirbyteTraceMessage from the exception
49
50
  """
@@ -60,6 +61,7 @@ class AirbyteTracedException(Exception):
60
61
  internal_message=self.internal_message,
61
62
  failure_type=self.failure_type,
62
63
  stack_trace=stack_trace_str,
64
+ stream_descriptor=stream_descriptor,
63
65
  ),
64
66
  )
65
67
 
@@ -88,3 +90,16 @@ class AirbyteTracedException(Exception):
88
90
  :param exc: the exception that caused the error
89
91
  """
90
92
  return cls(internal_message=str(exc), exception=exc, *args, **kwargs) # type: ignore # ignoring because of args and kwargs
93
+
94
+ def as_sanitized_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
95
+ """
96
+ Builds an AirbyteTraceMessage from the exception and sanitizes any secrets from the message body
97
+ """
98
+ error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor)
99
+ if error_message.trace.error.message:
100
+ error_message.trace.error.message = filter_secrets(error_message.trace.error.message)
101
+ if error_message.trace.error.internal_message:
102
+ error_message.trace.error.internal_message = filter_secrets(error_message.trace.error.internal_message)
103
+ if error_message.trace.error.stack_trace:
104
+ error_message.trace.error.stack_trace = filter_secrets(error_message.trace.error.stack_trace)
105
+ return error_message
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.62.2
3
+ Version: 0.63.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=GSpNwbwJ0v-KvxWa0u_nWeC0r6G2fZNkpKUhXzf6YlI,14399
27
+ airbyte_cdk/sources/abstract_source.py,sha256=Gie6CY-WztnUtOahoyMRlV8ON48eDIzjVG6fUKwCqvw,16127
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=p9iwWbb5uqRbsrHsdZBMXKmyHgLVbsOcV3QQexBFnPE,11052
30
30
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
@@ -186,7 +186,7 @@ airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaF
186
186
  airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=cmO1SQt5PIQRNNoh2KBv6aeY8NEY9x2dlmiRwGwU1vg,6557
187
187
  airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=qS0DJzXlVew6armFDJ0eNcSxRCmkA7JWQYFl6gcv3dU,13113
188
188
  airbyte_cdk/sources/file_based/stream/concurrent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=AX_H5cIWkWOJkhXGuTSuZ56Jr5szoNfQ3NdabbWPTtI,13043
189
+ airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=rjf8htUotdAXWSGcFA0jFHJfaai_EnmQxncnxMWTN2A,13320
190
190
  airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py,sha256=WKEYXZwSla6xwp7k1mnyG3kl9xCzEZ9B3eE-cxIuzIM,310
191
191
  airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py,sha256=UYLE2A2RdV-5FaQ70naZZWY34l5AEJkIRlTH05-e_-k,1961
192
192
  airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py,sha256=jHiej28aKQJ3UmWXQxHRCK8xkzY5H0-zxQiVqFs5rAI,14389
@@ -269,7 +269,7 @@ airbyte_cdk/utils/oneof_option_config.py,sha256=N8EmWdYdwt0FM7fuShh6H8nj_r4KEL9t
269
269
  airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
270
270
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=LGjSSk8lmBiC0GiHqxDwu_iMN6bCe05UMpz9e7nCw5E,741
271
271
  airbyte_cdk/utils/stream_status_utils.py,sha256=k7OY6AkJW8ifyh7ZYetC5Yy1nxM6Mx3apOAviCjJh80,971
272
- airbyte_cdk/utils/traced_exception.py,sha256=ChtuhSV_fkmMv8QjPBR1dV1US8uxlmVt_Myt-C2OIqQ,3396
272
+ airbyte_cdk/utils/traced_exception.py,sha256=IDYvUkbgkOMjusiuP0xU65mHzl5nLDkhA3o-FvNDfjI,4336
273
273
  source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
274
274
  source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
275
275
  unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
@@ -282,14 +282,14 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
282
282
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
283
283
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
284
284
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
- unit_tests/sources/test_abstract_source.py,sha256=m-YcMK1DgIhJLKUHoANFPx_d6yh-zgLrU1wLNlNCuTg,52802
285
+ unit_tests/sources/test_abstract_source.py,sha256=wIcMNIB66bQnYnC8LY_OPLUibBL7fSg9Lm6jDqOWb4g,59544
286
286
  unit_tests/sources/test_concurrent_source.py,sha256=3i7pSRetKSoP6LBpXyuXpWi2_VOwta_aTm_kgnDaLqk,3704
287
287
  unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
288
288
  unit_tests/sources/test_connector_state_manager.py,sha256=KAvYmuaWwg2kSnPNKri6Ne8TmLpsSimotsnDLLKkDD0,24369
289
289
  unit_tests/sources/test_http_logger.py,sha256=VT6DqgspI3DcRnoBQkkQX0z4dF_AOiYZ5P_zxmMW8oU,9004
290
- unit_tests/sources/test_integration_source.py,sha256=u_w5NS9n8GkTsoTjJvBE3-g8x0NG2054hL3PtW7IfAM,3458
290
+ unit_tests/sources/test_integration_source.py,sha256=qcWld9evB1rAjALWX8SDshGz7seYkN3HCamQ6KQ2Idw,4269
291
291
  unit_tests/sources/test_source.py,sha256=W0I4umL_d_OToLYYiRkjkJR6e-cCYjdV8zKc3uLvF0k,27999
292
- unit_tests/sources/test_source_read.py,sha256=AEFoJfzM0_5QQIJyKwGLK_kq_Vz_CBivImnUnXJQJ0I,17176
292
+ unit_tests/sources/test_source_read.py,sha256=n9XpVQLfsQH8eh6D99MDiNVBBKcf6UtouThDJcGH6SU,17186
293
293
  unit_tests/sources/concurrent_source/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
294
294
  unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py,sha256=zsGnMcEsBedjW8wahil6LNqniil-3NXhyZd5W-80Km0,3665
295
295
  unit_tests/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
@@ -456,8 +456,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
456
456
  unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
457
457
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
458
458
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
459
- airbyte_cdk-0.62.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
460
- airbyte_cdk-0.62.2.dist-info/METADATA,sha256=mg5FUvzFvSF_W3YQZY6V6fUcSoUy4C03oFt2hF6w0FI,11073
461
- airbyte_cdk-0.62.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
462
- airbyte_cdk-0.62.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
463
- airbyte_cdk-0.62.2.dist-info/RECORD,,
459
+ airbyte_cdk-0.63.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
460
+ airbyte_cdk-0.63.1.dist-info/METADATA,sha256=-pgcWwY9lrlm3-6C9BxburyjPpElSbKPAep02SEB-hk,11073
461
+ airbyte_cdk-0.63.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
462
+ airbyte_cdk-0.63.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
463
+ airbyte_cdk-0.63.1.dist-info/RECORD,,
@@ -13,6 +13,7 @@ import pytest
13
13
  from airbyte_cdk.models import (
14
14
  AirbyteCatalog,
15
15
  AirbyteConnectionStatus,
16
+ AirbyteErrorTraceMessage,
16
17
  AirbyteLogMessage,
17
18
  AirbyteMessage,
18
19
  AirbyteRecordMessage,
@@ -27,6 +28,7 @@ from airbyte_cdk.models import (
27
28
  ConfiguredAirbyteCatalog,
28
29
  ConfiguredAirbyteStream,
29
30
  DestinationSyncMode,
31
+ FailureType,
30
32
  Level,
31
33
  Status,
32
34
  StreamDescriptor,
@@ -40,6 +42,7 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
40
42
  from airbyte_cdk.sources.message import MessageRepository
41
43
  from airbyte_cdk.sources.streams import IncrementalMixin, Stream
42
44
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
45
+ from airbyte_cdk.utils.airbyte_secrets_utils import update_secrets
43
46
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
44
47
  from pytest import fixture
45
48
 
@@ -54,12 +57,14 @@ class MockSource(AbstractSource):
54
57
  per_stream: bool = True,
55
58
  message_repository: MessageRepository = None,
56
59
  exception_on_missing_stream: bool = True,
60
+ stop_sync_on_stream_failure: bool = False,
57
61
  ):
58
62
  self._streams = streams
59
63
  self.check_lambda = check_lambda
60
64
  self.per_stream = per_stream
61
65
  self.exception_on_missing_stream = exception_on_missing_stream
62
66
  self._message_repository = message_repository
67
+ self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
63
68
 
64
69
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
65
70
  if self.check_lambda:
@@ -84,6 +89,12 @@ class MockSource(AbstractSource):
84
89
  return self._message_repository
85
90
 
86
91
 
92
+ class MockSourceWithStopSyncFalseOverride(MockSource):
93
+ @property
94
+ def stop_sync_on_stream_failure(self) -> bool:
95
+ return False
96
+
97
+
87
98
  class StreamNoStateMethod(Stream):
88
99
  name = "managers"
89
100
  primary_key = None
@@ -115,8 +126,11 @@ class StreamRaisesException(Stream):
115
126
  name = "lamentations"
116
127
  primary_key = None
117
128
 
129
+ def __init__(self, exception_to_raise):
130
+ self._exception_to_raise = exception_to_raise
131
+
118
132
  def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
119
- raise AirbyteTracedException(message="I was born only to crash like Icarus")
133
+ raise self._exception_to_raise
120
134
 
121
135
 
122
136
  MESSAGE_FROM_REPOSITORY = Mock()
@@ -291,7 +305,7 @@ def test_read_stream_emits_repository_message_on_error(mocker, message_repositor
291
305
 
292
306
  source = MockSource(streams=[stream], message_repository=message_repository)
293
307
 
294
- with pytest.raises(RuntimeError):
308
+ with pytest.raises(AirbyteTracedException):
295
309
  messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
296
310
  assert MESSAGE_FROM_REPOSITORY in messages
297
311
 
@@ -306,14 +320,14 @@ def test_read_stream_with_error_gets_display_message(mocker):
306
320
  catalog = ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])
307
321
 
308
322
  # without get_error_display_message
309
- with pytest.raises(RuntimeError, match="oh no!"):
323
+ with pytest.raises(AirbyteTracedException):
310
324
  list(source.read(logger, {}, catalog))
311
325
 
312
326
  mocker.patch.object(MockStream, "get_error_display_message", return_value="my message")
313
327
 
314
- with pytest.raises(AirbyteTracedException, match="oh no!") as exc:
328
+ with pytest.raises(AirbyteTracedException) as exc:
315
329
  list(source.read(logger, {}, catalog))
316
- assert exc.value.message == "my message"
330
+ assert "oh no!" in exc.value.message
317
331
 
318
332
 
319
333
  GLOBAL_EMITTED_AT = 1
@@ -358,6 +372,22 @@ def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_stat
358
372
  return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data))
359
373
 
360
374
 
375
+ def _as_error_trace(stream: str, error_message: str, internal_message: Optional[str], failure_type: Optional[FailureType], stack_trace: Optional[str]) -> AirbyteMessage:
376
+ trace_message = AirbyteTraceMessage(
377
+ emitted_at=datetime.datetime.now().timestamp() * 1000.0,
378
+ type=TraceType.ERROR,
379
+ error=AirbyteErrorTraceMessage(
380
+ stream_descriptor=StreamDescriptor(name=stream),
381
+ message=error_message,
382
+ internal_message=internal_message,
383
+ failure_type=failure_type,
384
+ stack_trace=stack_trace,
385
+ ),
386
+ )
387
+
388
+ return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
389
+
390
+
361
391
  def _configured_stream(stream: Stream, sync_mode: SyncMode):
362
392
  return ConfiguredAirbyteStream(
363
393
  stream=stream.as_airbyte_stream(),
@@ -1174,21 +1204,27 @@ def test_checkpoint_state_from_stream_instance():
1174
1204
  )
1175
1205
 
1176
1206
 
1177
- def test_continue_sync_with_failed_streams(mocker):
1207
+ @pytest.mark.parametrize(
1208
+ "exception_to_raise,expected_error_message,expected_internal_message",
1209
+ [
1210
+ pytest.param(AirbyteTracedException(message="I was born only to crash like Icarus"), "I was born only to crash like Icarus", None, id="test_raises_traced_exception"),
1211
+ pytest.param(Exception("Generic connector error message"), "Something went wrong in the connector. See the logs for more details.", "Generic connector error message", id="test_raises_generic_exception"),
1212
+ ]
1213
+ )
1214
+ def test_continue_sync_with_failed_streams(mocker, exception_to_raise, expected_error_message, expected_internal_message):
1178
1215
  """
1179
- Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure enabled continues
1180
- syncing even when one stream fails with an error.
1216
+ Tests that running a sync for a connector with multiple streams will continue syncing when one stream fails
1217
+ with an error. This source does not override the default behavior defined in the AbstractSource class.
1181
1218
  """
1182
1219
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1183
1220
  s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1184
- s2 = StreamRaisesException()
1221
+ s2 = StreamRaisesException(exception_to_raise=exception_to_raise)
1185
1222
  s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1186
1223
 
1187
1224
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
1188
1225
  mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1189
1226
 
1190
1227
  src = MockSource(streams=[s1, s2, s3])
1191
- mocker.patch.object(MockSource, "continue_sync_on_stream_failure", return_value=True)
1192
1228
  catalog = ConfiguredAirbyteCatalog(
1193
1229
  streams=[
1194
1230
  _configured_stream(s1, SyncMode.full_refresh),
@@ -1205,6 +1241,7 @@ def test_continue_sync_with_failed_streams(mocker):
1205
1241
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1206
1242
  _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1207
1243
  _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1244
+ _as_error_trace("lamentations", expected_error_message, expected_internal_message, FailureType.system_error, None),
1208
1245
  _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1209
1246
  _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1210
1247
  *_as_records("s3", stream_output),
@@ -1212,26 +1249,75 @@ def test_continue_sync_with_failed_streams(mocker):
1212
1249
  ]
1213
1250
  )
1214
1251
 
1215
- messages = []
1216
1252
  with pytest.raises(AirbyteTracedException) as exc:
1217
- # We can't use list comprehension or list() here because we are still raising a final exception for the
1218
- # failed streams and that disrupts parsing the generator into the messages emitted before
1219
- for message in src.read(logger, {}, catalog):
1220
- messages.append(message)
1253
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1254
+ messages = _fix_emitted_at(messages)
1255
+
1256
+ assert expected == messages
1221
1257
 
1222
- messages = _fix_emitted_at(messages)
1223
- assert expected == messages
1224
1258
  assert "lamentations" in exc.value.message
1259
+ assert exc.value.failure_type == FailureType.config_error
1225
1260
 
1226
1261
 
1227
- def test_stop_sync_with_failed_streams(mocker):
1262
+ def test_continue_sync_source_override_false(mocker):
1228
1263
  """
1229
- Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure disabled stops
1230
- syncing once a stream fails with an error.
1264
+ Tests that running a sync for a connector explicitly overriding the default AbstractSource.stop_sync_on_stream_failure
1265
+ property to be False which will continue syncing stream even if one encountered an exception.
1231
1266
  """
1267
+ update_secrets(["API_KEY_VALUE"])
1268
+
1232
1269
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1233
1270
  s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1234
- s2 = StreamRaisesException()
1271
+ s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="I was born only to crash like Icarus"))
1272
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1273
+
1274
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1275
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1276
+
1277
+ src = MockSourceWithStopSyncFalseOverride(streams=[s1, s2, s3])
1278
+ catalog = ConfiguredAirbyteCatalog(
1279
+ streams=[
1280
+ _configured_stream(s1, SyncMode.full_refresh),
1281
+ _configured_stream(s2, SyncMode.full_refresh),
1282
+ _configured_stream(s3, SyncMode.full_refresh),
1283
+ ]
1284
+ )
1285
+
1286
+ expected = _fix_emitted_at(
1287
+ [
1288
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1289
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1290
+ *_as_records("s1", stream_output),
1291
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1292
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1293
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1294
+ _as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
1295
+ _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1296
+ _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1297
+ *_as_records("s3", stream_output),
1298
+ _as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
1299
+ ]
1300
+ )
1301
+
1302
+ with pytest.raises(AirbyteTracedException) as exc:
1303
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1304
+ messages = _fix_emitted_at(messages)
1305
+
1306
+ assert expected == messages
1307
+
1308
+ assert "lamentations" in exc.value.message
1309
+ assert exc.value.failure_type == FailureType.config_error
1310
+
1311
+
1312
+ def test_sync_error_trace_messages_obfuscate_secrets(mocker):
1313
+ """
1314
+ Tests that exceptions emitted as trace messages by a source have secrets properly sanitized
1315
+ """
1316
+ update_secrets(["API_KEY_VALUE"])
1317
+
1318
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1319
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1320
+ s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="My api_key value API_KEY_VALUE flew too close to the sun."))
1235
1321
  s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1236
1322
 
1237
1323
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
@@ -1254,15 +1340,73 @@ def test_stop_sync_with_failed_streams(mocker):
1254
1340
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1255
1341
  _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1256
1342
  _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1343
+ _as_error_trace("lamentations", "My api_key value **** flew too close to the sun.", None, FailureType.system_error, None),
1344
+ _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1345
+ _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1346
+ *_as_records("s3", stream_output),
1347
+ _as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
1257
1348
  ]
1258
1349
  )
1259
1350
 
1260
- messages = []
1261
- with pytest.raises(AirbyteTracedException):
1262
- # We can't use list comprehension or list() here because we are still raising a final exception for the
1263
- # failed streams and that disrupts parsing the generator into the messages emitted before
1264
- for message in src.read(logger, {}, catalog):
1265
- messages.append(message)
1351
+ with pytest.raises(AirbyteTracedException) as exc:
1352
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1353
+ messages = _fix_emitted_at(messages)
1266
1354
 
1267
- messages = _fix_emitted_at(messages)
1268
- assert expected == messages
1355
+ assert expected == messages
1356
+
1357
+ assert "lamentations" in exc.value.message
1358
+ assert exc.value.failure_type == FailureType.config_error
1359
+
1360
+
1361
+ def test_continue_sync_with_failed_streams_with_override_false(mocker):
1362
+ """
1363
+ Tests that running a sync for a connector with multiple streams and stop_sync_on_stream_failure enabled stops
1364
+ the sync when one stream fails with an error.
1365
+ """
1366
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1367
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1368
+ s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
1369
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1370
+
1371
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1372
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1373
+
1374
+ src = MockSource(streams=[s1, s2, s3])
1375
+ mocker.patch.object(MockSource, "stop_sync_on_stream_failure", return_value=True)
1376
+ catalog = ConfiguredAirbyteCatalog(
1377
+ streams=[
1378
+ _configured_stream(s1, SyncMode.full_refresh),
1379
+ _configured_stream(s2, SyncMode.full_refresh),
1380
+ _configured_stream(s3, SyncMode.full_refresh),
1381
+ ]
1382
+ )
1383
+
1384
+ expected = _fix_emitted_at(
1385
+ [
1386
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1387
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1388
+ *_as_records("s1", stream_output),
1389
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1390
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1391
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1392
+ _as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
1393
+ ]
1394
+ )
1395
+
1396
+ with pytest.raises(AirbyteTracedException) as exc:
1397
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1398
+ messages = _fix_emitted_at(messages)
1399
+
1400
+ assert expected == messages
1401
+
1402
+ assert "lamentations" in exc.value.message
1403
+ assert exc.value.failure_type == FailureType.config_error
1404
+
1405
+
1406
+ def _remove_stack_trace(message: AirbyteMessage) -> AirbyteMessage:
1407
+ """
1408
+ Helper method that removes the stack trace from Airbyte trace messages to make asserting against expected records easier
1409
+ """
1410
+ if message.trace and message.trace.error and message.trace.error.stack_trace:
1411
+ message.trace.error.stack_trace = None
1412
+ return message
@@ -2,7 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ import json
5
6
  import os
7
+ from typing import Any, List, Mapping
6
8
  from unittest import mock
7
9
  from unittest.mock import patch
8
10
 
@@ -22,9 +24,9 @@ from unit_tests.sources.fixtures.source_test_fixture import (
22
24
  "deployment_mode, url_base, expected_records, expected_error",
23
25
  [
24
26
  pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
25
- pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], ValueError, id="test_cloud_read_with_unsecured_url"),
26
- pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], AirbyteTracedException, id="test_cloud_read_with_private_endpoint"),
27
- pytest.param("CLOUD", "https://localhost:80/api/v1/", [], AirbyteTracedException, id="test_cloud_read_with_localhost"),
27
+ pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
28
+ pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
29
+ pytest.param("CLOUD", "https://localhost:80/api/v1/", [], "config_error", id="test_cloud_read_with_localhost"),
28
30
  pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
29
31
  pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
30
32
  ],
@@ -37,8 +39,10 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
37
39
  with mock.patch.object(HttpTestStream, "url_base", url_base):
38
40
  args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
39
41
  if expected_error:
40
- with pytest.raises(expected_error):
42
+ with pytest.raises(AirbyteTracedException):
41
43
  launch(source, args)
44
+ messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
45
+ assert contains_error_trace_message(messages, expected_error)
42
46
  else:
43
47
  launch(source, args)
44
48
 
@@ -47,14 +51,14 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
47
51
  "deployment_mode, token_refresh_url, expected_records, expected_error",
48
52
  [
49
53
  pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
50
- pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], ValueError, id="test_cloud_read_with_unsecured_url"),
51
- pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], AirbyteTracedException, id="test_cloud_read_with_private_endpoint"),
54
+ pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
55
+ pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
52
56
  pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
53
57
  pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
54
58
  ],
55
59
  )
56
60
  @patch.object(requests.Session, "send", fixture_mock_send)
57
- def test_external_oauth_request_source(deployment_mode, token_refresh_url, expected_records, expected_error):
61
+ def test_external_oauth_request_source(capsys, deployment_mode, token_refresh_url, expected_records, expected_error):
58
62
  oauth_authenticator = SourceFixtureOauthAuthenticator(
59
63
  client_id="nora", client_secret="hae_sung", refresh_token="arthur", token_refresh_endpoint=token_refresh_url
60
64
  )
@@ -63,7 +67,20 @@ def test_external_oauth_request_source(deployment_mode, token_refresh_url, expec
63
67
  with mock.patch.dict(os.environ, {"DEPLOYMENT_MODE": deployment_mode}, clear=False): # clear=True clears the existing os.environ dict
64
68
  args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
65
69
  if expected_error:
66
- with pytest.raises(expected_error):
70
+ with pytest.raises(AirbyteTracedException):
67
71
  launch(source, args)
72
+ messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
73
+ assert contains_error_trace_message(messages, expected_error)
68
74
  else:
69
75
  launch(source, args)
76
+
77
+
78
+ def contains_error_trace_message(messages: List[Mapping[str, Any]], expected_error: str) -> bool:
79
+ for message in messages:
80
+ if message.get("type") != "TRACE":
81
+ continue
82
+ elif message.get("trace").get("type") != "ERROR":
83
+ continue
84
+ elif message.get("trace").get("error").get("failure_type") == expected_error:
85
+ return True
86
+ return False
@@ -343,7 +343,7 @@ def test_concurrent_source_yields_the_same_messages_as_abstract_source_when_an_e
343
343
  source, concurrent_source = _init_sources([stream_slice_to_partition], state, logger)
344
344
  config = {}
345
345
  catalog = _create_configured_catalog(source._streams)
346
- messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state, RuntimeError)
346
+ messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state, AirbyteTracedException)
347
347
  messages_from_concurrent_source = _read_from_source(concurrent_source, logger, config, catalog, state, RuntimeError)
348
348
 
349
349
  expected_messages = [