airbyte-cdk 0.62.2__py3-none-any.whl → 0.63.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,7 +14,9 @@ from airbyte_cdk.models import (
14
14
  AirbyteStreamStatus,
15
15
  ConfiguredAirbyteCatalog,
16
16
  ConfiguredAirbyteStream,
17
+ FailureType,
17
18
  Status,
19
+ StreamDescriptor,
18
20
  SyncMode,
19
21
  )
20
22
  from airbyte_cdk.models import Type as MessageType
@@ -27,6 +29,7 @@ from airbyte_cdk.sources.streams.http.http import HttpStream
27
29
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
28
30
  from airbyte_cdk.sources.utils.schema_helpers import InternalConfig, split_config
29
31
  from airbyte_cdk.sources.utils.slice_logger import DebugSliceLogger, SliceLogger
32
+ from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
30
33
  from airbyte_cdk.utils.event_timing import create_timer
31
34
  from airbyte_cdk.utils.stream_status_utils import as_airbyte_message as stream_status_as_airbyte_message
32
35
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
@@ -133,11 +136,16 @@ class AbstractSource(Source, ABC):
133
136
  logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
134
137
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
135
138
  except AirbyteTracedException as e:
139
+ logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
140
+ logger.info(f"Marking stream {configured_stream.stream.name} as STOPPED")
136
141
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
137
- if self.continue_sync_on_stream_failure:
138
- stream_name_to_exception[stream_instance.name] = e
139
- else:
140
- raise e
142
+ yield e.as_sanitized_airbyte_message(stream_descriptor=StreamDescriptor(name=configured_stream.stream.name))
143
+ stream_name_to_exception[stream_instance.name] = e
144
+ if self.stop_sync_on_stream_failure:
145
+ logger.info(
146
+ f"Stopping sync on error from stream {configured_stream.stream.name} because {self.name} does not support continuing syncs on error."
147
+ )
148
+ break
141
149
  except Exception as e:
142
150
  yield from self._emit_queued_messages()
143
151
  logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
@@ -145,15 +153,28 @@ class AbstractSource(Source, ABC):
145
153
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
146
154
  display_message = stream_instance.get_error_display_message(e)
147
155
  if display_message:
148
- raise AirbyteTracedException.from_exception(e, message=display_message) from e
149
- raise e
156
+ traced_exception = AirbyteTracedException.from_exception(e, message=display_message)
157
+ else:
158
+ traced_exception = AirbyteTracedException.from_exception(e)
159
+ yield traced_exception.as_sanitized_airbyte_message(
160
+ stream_descriptor=StreamDescriptor(name=configured_stream.stream.name)
161
+ )
162
+ stream_name_to_exception[stream_instance.name] = traced_exception
163
+ if self.stop_sync_on_stream_failure:
164
+ logger.info(f"{self.name} does not support continuing syncs on error from stream {configured_stream.stream.name}")
165
+ break
150
166
  finally:
151
167
  timer.finish_event()
152
168
  logger.info(f"Finished syncing {configured_stream.stream.name}")
153
169
  logger.info(timer.report())
154
170
 
155
- if self.continue_sync_on_stream_failure and len(stream_name_to_exception) > 0:
156
- raise AirbyteTracedException(message=self._generate_failed_streams_error_message(stream_name_to_exception))
171
+ if len(stream_name_to_exception) > 0:
172
+ error_message = self._generate_failed_streams_error_message(stream_name_to_exception)
173
+ logger.info(error_message)
174
+ # We still raise at least one exception when a stream raises an exception because the platform currently relies
175
+ # on a non-zero exit code to determine if a sync attempt has failed. We also raise the exception as a config_error
176
+ # type because this combined error isn't actionable, but rather the previously emitted individual errors.
177
+ raise AirbyteTracedException(message=error_message, failure_type=FailureType.config_error)
157
178
  logger.info(f"Finished syncing {self.name}")
158
179
 
159
180
  @property
@@ -282,17 +303,17 @@ class AbstractSource(Source, ABC):
282
303
  return _default_message_repository
283
304
 
284
305
  @property
285
- def continue_sync_on_stream_failure(self) -> bool:
306
+ def stop_sync_on_stream_failure(self) -> bool:
286
307
  """
287
308
  WARNING: This function is in-development which means it is subject to change. Use at your own risk.
288
309
 
289
- By default, a source should raise an exception and stop the sync when it encounters an error while syncing a stream. This
290
- method can be overridden on a per-source basis so that a source will continue syncing streams other streams even if an
291
- exception is raised for a stream.
310
+ By default, when a source encounters an exception while syncing a stream, it will emit an error trace message and then
311
+ continue syncing the next stream. This can be overwritten on a per-source basis so that the source will stop the sync
312
+ on the first error seen and emit a single error trace message for that stream.
292
313
  """
293
314
  return False
294
315
 
295
316
  @staticmethod
296
317
  def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
297
- failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
318
+ failures = ", ".join([f"{stream}: {filter_secrets(exception.__repr__())}" for stream, exception in stream_failures.items()])
298
319
  return f"During the sync, the following streams did not sync successfully: {failures}"
@@ -243,6 +243,9 @@ class FileBasedStreamPartition(Partition):
243
243
  data_to_return = dict(record_data)
244
244
  self._stream.transformer.transform(data_to_return, self._stream.get_json_schema())
245
245
  yield Record(data_to_return, self.stream_name())
246
+ elif isinstance(record_data, AirbyteMessage) and record_data.type == Type.RECORD:
247
+ # `AirbyteMessage`s of type `Record` should also be yielded so they are enqueued
248
+ yield Record(record_data.record.data, self.stream_name())
246
249
  else:
247
250
  self._message_repository.emit_message(record_data)
248
251
  except Exception as e:
@@ -13,6 +13,7 @@ from airbyte_cdk.models import (
13
13
  AirbyteTraceMessage,
14
14
  FailureType,
15
15
  Status,
16
+ StreamDescriptor,
16
17
  TraceType,
17
18
  )
18
19
  from airbyte_cdk.models import Type as MessageType
@@ -43,7 +44,7 @@ class AirbyteTracedException(Exception):
43
44
  self._exception = exception
44
45
  super().__init__(internal_message)
45
46
 
46
- def as_airbyte_message(self) -> AirbyteMessage:
47
+ def as_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
47
48
  """
48
49
  Builds an AirbyteTraceMessage from the exception
49
50
  """
@@ -60,6 +61,7 @@ class AirbyteTracedException(Exception):
60
61
  internal_message=self.internal_message,
61
62
  failure_type=self.failure_type,
62
63
  stack_trace=stack_trace_str,
64
+ stream_descriptor=stream_descriptor,
63
65
  ),
64
66
  )
65
67
 
@@ -88,3 +90,16 @@ class AirbyteTracedException(Exception):
88
90
  :param exc: the exception that caused the error
89
91
  """
90
92
  return cls(internal_message=str(exc), exception=exc, *args, **kwargs) # type: ignore # ignoring because of args and kwargs
93
+
94
+ def as_sanitized_airbyte_message(self, stream_descriptor: StreamDescriptor = None) -> AirbyteMessage:
95
+ """
96
+ Builds an AirbyteTraceMessage from the exception and sanitizes any secrets from the message body
97
+ """
98
+ error_message = self.as_airbyte_message(stream_descriptor=stream_descriptor)
99
+ if error_message.trace.error.message:
100
+ error_message.trace.error.message = filter_secrets(error_message.trace.error.message)
101
+ if error_message.trace.error.internal_message:
102
+ error_message.trace.error.internal_message = filter_secrets(error_message.trace.error.internal_message)
103
+ if error_message.trace.error.stack_trace:
104
+ error_message.trace.error.stack_trace = filter_secrets(error_message.trace.error.stack_trace)
105
+ return error_message
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.62.2
3
+ Version: 0.63.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=GSpNwbwJ0v-KvxWa0u_nWeC0r6G2fZNkpKUhXzf6YlI,14399
27
+ airbyte_cdk/sources/abstract_source.py,sha256=Gie6CY-WztnUtOahoyMRlV8ON48eDIzjVG6fUKwCqvw,16127
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=p9iwWbb5uqRbsrHsdZBMXKmyHgLVbsOcV3QQexBFnPE,11052
30
30
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
@@ -186,7 +186,7 @@ airbyte_cdk/sources/file_based/stream/__init__.py,sha256=QPDqdgjsabOQD93dSFqHGaF
186
186
  airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py,sha256=cmO1SQt5PIQRNNoh2KBv6aeY8NEY9x2dlmiRwGwU1vg,6557
187
187
  airbyte_cdk/sources/file_based/stream/default_file_based_stream.py,sha256=qS0DJzXlVew6armFDJ0eNcSxRCmkA7JWQYFl6gcv3dU,13113
188
188
  airbyte_cdk/sources/file_based/stream/concurrent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=AX_H5cIWkWOJkhXGuTSuZ56Jr5szoNfQ3NdabbWPTtI,13043
189
+ airbyte_cdk/sources/file_based/stream/concurrent/adapters.py,sha256=rjf8htUotdAXWSGcFA0jFHJfaai_EnmQxncnxMWTN2A,13320
190
190
  airbyte_cdk/sources/file_based/stream/concurrent/cursor/__init__.py,sha256=WKEYXZwSla6xwp7k1mnyG3kl9xCzEZ9B3eE-cxIuzIM,310
191
191
  airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py,sha256=UYLE2A2RdV-5FaQ70naZZWY34l5AEJkIRlTH05-e_-k,1961
192
192
  airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py,sha256=jHiej28aKQJ3UmWXQxHRCK8xkzY5H0-zxQiVqFs5rAI,14389
@@ -269,7 +269,7 @@ airbyte_cdk/utils/oneof_option_config.py,sha256=N8EmWdYdwt0FM7fuShh6H8nj_r4KEL9t
269
269
  airbyte_cdk/utils/schema_inferrer.py,sha256=D8vFVgeK6VLcAug4YVAHfa3D29On0A_nMlwq9SPlfPI,3799
270
270
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=LGjSSk8lmBiC0GiHqxDwu_iMN6bCe05UMpz9e7nCw5E,741
271
271
  airbyte_cdk/utils/stream_status_utils.py,sha256=k7OY6AkJW8ifyh7ZYetC5Yy1nxM6Mx3apOAviCjJh80,971
272
- airbyte_cdk/utils/traced_exception.py,sha256=ChtuhSV_fkmMv8QjPBR1dV1US8uxlmVt_Myt-C2OIqQ,3396
272
+ airbyte_cdk/utils/traced_exception.py,sha256=IDYvUkbgkOMjusiuP0xU65mHzl5nLDkhA3o-FvNDfjI,4336
273
273
  source_declarative_manifest/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
274
274
  source_declarative_manifest/main.py,sha256=HXzuRsRyhHwPrGU-hc4S7RrgoOoHImqkdfbmO2geBeE,1027
275
275
  unit_tests/connector_builder/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
@@ -282,14 +282,14 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
282
282
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
283
283
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
284
284
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
285
- unit_tests/sources/test_abstract_source.py,sha256=m-YcMK1DgIhJLKUHoANFPx_d6yh-zgLrU1wLNlNCuTg,52802
285
+ unit_tests/sources/test_abstract_source.py,sha256=wIcMNIB66bQnYnC8LY_OPLUibBL7fSg9Lm6jDqOWb4g,59544
286
286
  unit_tests/sources/test_concurrent_source.py,sha256=3i7pSRetKSoP6LBpXyuXpWi2_VOwta_aTm_kgnDaLqk,3704
287
287
  unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
288
288
  unit_tests/sources/test_connector_state_manager.py,sha256=KAvYmuaWwg2kSnPNKri6Ne8TmLpsSimotsnDLLKkDD0,24369
289
289
  unit_tests/sources/test_http_logger.py,sha256=VT6DqgspI3DcRnoBQkkQX0z4dF_AOiYZ5P_zxmMW8oU,9004
290
- unit_tests/sources/test_integration_source.py,sha256=u_w5NS9n8GkTsoTjJvBE3-g8x0NG2054hL3PtW7IfAM,3458
290
+ unit_tests/sources/test_integration_source.py,sha256=qcWld9evB1rAjALWX8SDshGz7seYkN3HCamQ6KQ2Idw,4269
291
291
  unit_tests/sources/test_source.py,sha256=W0I4umL_d_OToLYYiRkjkJR6e-cCYjdV8zKc3uLvF0k,27999
292
- unit_tests/sources/test_source_read.py,sha256=AEFoJfzM0_5QQIJyKwGLK_kq_Vz_CBivImnUnXJQJ0I,17176
292
+ unit_tests/sources/test_source_read.py,sha256=n9XpVQLfsQH8eh6D99MDiNVBBKcf6UtouThDJcGH6SU,17186
293
293
  unit_tests/sources/concurrent_source/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
294
294
  unit_tests/sources/concurrent_source/test_concurrent_source_adapter.py,sha256=zsGnMcEsBedjW8wahil6LNqniil-3NXhyZd5W-80Km0,3665
295
295
  unit_tests/sources/declarative/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
@@ -456,8 +456,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
456
456
  unit_tests/utils/test_secret_utils.py,sha256=CdKK8A2-5XVxbXVtX22FK9dwwMeP5KNqDH6luWRXSNw,5256
457
457
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
458
458
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
459
- airbyte_cdk-0.62.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
460
- airbyte_cdk-0.62.2.dist-info/METADATA,sha256=mg5FUvzFvSF_W3YQZY6V6fUcSoUy4C03oFt2hF6w0FI,11073
461
- airbyte_cdk-0.62.2.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
462
- airbyte_cdk-0.62.2.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
463
- airbyte_cdk-0.62.2.dist-info/RECORD,,
459
+ airbyte_cdk-0.63.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
460
+ airbyte_cdk-0.63.1.dist-info/METADATA,sha256=-pgcWwY9lrlm3-6C9BxburyjPpElSbKPAep02SEB-hk,11073
461
+ airbyte_cdk-0.63.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
462
+ airbyte_cdk-0.63.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
463
+ airbyte_cdk-0.63.1.dist-info/RECORD,,
@@ -13,6 +13,7 @@ import pytest
13
13
  from airbyte_cdk.models import (
14
14
  AirbyteCatalog,
15
15
  AirbyteConnectionStatus,
16
+ AirbyteErrorTraceMessage,
16
17
  AirbyteLogMessage,
17
18
  AirbyteMessage,
18
19
  AirbyteRecordMessage,
@@ -27,6 +28,7 @@ from airbyte_cdk.models import (
27
28
  ConfiguredAirbyteCatalog,
28
29
  ConfiguredAirbyteStream,
29
30
  DestinationSyncMode,
31
+ FailureType,
30
32
  Level,
31
33
  Status,
32
34
  StreamDescriptor,
@@ -40,6 +42,7 @@ from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
40
42
  from airbyte_cdk.sources.message import MessageRepository
41
43
  from airbyte_cdk.sources.streams import IncrementalMixin, Stream
42
44
  from airbyte_cdk.sources.utils.record_helper import stream_data_to_airbyte_message
45
+ from airbyte_cdk.utils.airbyte_secrets_utils import update_secrets
43
46
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
44
47
  from pytest import fixture
45
48
 
@@ -54,12 +57,14 @@ class MockSource(AbstractSource):
54
57
  per_stream: bool = True,
55
58
  message_repository: MessageRepository = None,
56
59
  exception_on_missing_stream: bool = True,
60
+ stop_sync_on_stream_failure: bool = False,
57
61
  ):
58
62
  self._streams = streams
59
63
  self.check_lambda = check_lambda
60
64
  self.per_stream = per_stream
61
65
  self.exception_on_missing_stream = exception_on_missing_stream
62
66
  self._message_repository = message_repository
67
+ self._stop_sync_on_stream_failure = stop_sync_on_stream_failure
63
68
 
64
69
  def check_connection(self, logger: logging.Logger, config: Mapping[str, Any]) -> Tuple[bool, Optional[Any]]:
65
70
  if self.check_lambda:
@@ -84,6 +89,12 @@ class MockSource(AbstractSource):
84
89
  return self._message_repository
85
90
 
86
91
 
92
+ class MockSourceWithStopSyncFalseOverride(MockSource):
93
+ @property
94
+ def stop_sync_on_stream_failure(self) -> bool:
95
+ return False
96
+
97
+
87
98
  class StreamNoStateMethod(Stream):
88
99
  name = "managers"
89
100
  primary_key = None
@@ -115,8 +126,11 @@ class StreamRaisesException(Stream):
115
126
  name = "lamentations"
116
127
  primary_key = None
117
128
 
129
+ def __init__(self, exception_to_raise):
130
+ self._exception_to_raise = exception_to_raise
131
+
118
132
  def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
119
- raise AirbyteTracedException(message="I was born only to crash like Icarus")
133
+ raise self._exception_to_raise
120
134
 
121
135
 
122
136
  MESSAGE_FROM_REPOSITORY = Mock()
@@ -291,7 +305,7 @@ def test_read_stream_emits_repository_message_on_error(mocker, message_repositor
291
305
 
292
306
  source = MockSource(streams=[stream], message_repository=message_repository)
293
307
 
294
- with pytest.raises(RuntimeError):
308
+ with pytest.raises(AirbyteTracedException):
295
309
  messages = list(source.read(logger, {}, ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])))
296
310
  assert MESSAGE_FROM_REPOSITORY in messages
297
311
 
@@ -306,14 +320,14 @@ def test_read_stream_with_error_gets_display_message(mocker):
306
320
  catalog = ConfiguredAirbyteCatalog(streams=[_configured_stream(stream, SyncMode.full_refresh)])
307
321
 
308
322
  # without get_error_display_message
309
- with pytest.raises(RuntimeError, match="oh no!"):
323
+ with pytest.raises(AirbyteTracedException):
310
324
  list(source.read(logger, {}, catalog))
311
325
 
312
326
  mocker.patch.object(MockStream, "get_error_display_message", return_value="my message")
313
327
 
314
- with pytest.raises(AirbyteTracedException, match="oh no!") as exc:
328
+ with pytest.raises(AirbyteTracedException) as exc:
315
329
  list(source.read(logger, {}, catalog))
316
- assert exc.value.message == "my message"
330
+ assert "oh no!" in exc.value.message
317
331
 
318
332
 
319
333
  GLOBAL_EMITTED_AT = 1
@@ -358,6 +372,22 @@ def _as_state(state_data: Dict[str, Any], stream_name: str = "", per_stream_stat
358
372
  return AirbyteMessage(type=Type.STATE, state=AirbyteStateMessage(data=state_data))
359
373
 
360
374
 
375
+ def _as_error_trace(stream: str, error_message: str, internal_message: Optional[str], failure_type: Optional[FailureType], stack_trace: Optional[str]) -> AirbyteMessage:
376
+ trace_message = AirbyteTraceMessage(
377
+ emitted_at=datetime.datetime.now().timestamp() * 1000.0,
378
+ type=TraceType.ERROR,
379
+ error=AirbyteErrorTraceMessage(
380
+ stream_descriptor=StreamDescriptor(name=stream),
381
+ message=error_message,
382
+ internal_message=internal_message,
383
+ failure_type=failure_type,
384
+ stack_trace=stack_trace,
385
+ ),
386
+ )
387
+
388
+ return AirbyteMessage(type=MessageType.TRACE, trace=trace_message)
389
+
390
+
361
391
  def _configured_stream(stream: Stream, sync_mode: SyncMode):
362
392
  return ConfiguredAirbyteStream(
363
393
  stream=stream.as_airbyte_stream(),
@@ -1174,21 +1204,27 @@ def test_checkpoint_state_from_stream_instance():
1174
1204
  )
1175
1205
 
1176
1206
 
1177
- def test_continue_sync_with_failed_streams(mocker):
1207
+ @pytest.mark.parametrize(
1208
+ "exception_to_raise,expected_error_message,expected_internal_message",
1209
+ [
1210
+ pytest.param(AirbyteTracedException(message="I was born only to crash like Icarus"), "I was born only to crash like Icarus", None, id="test_raises_traced_exception"),
1211
+ pytest.param(Exception("Generic connector error message"), "Something went wrong in the connector. See the logs for more details.", "Generic connector error message", id="test_raises_generic_exception"),
1212
+ ]
1213
+ )
1214
+ def test_continue_sync_with_failed_streams(mocker, exception_to_raise, expected_error_message, expected_internal_message):
1178
1215
  """
1179
- Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure enabled continues
1180
- syncing even when one stream fails with an error.
1216
+ Tests that running a sync for a connector with multiple streams will continue syncing when one stream fails
1217
+ with an error. This source does not override the default behavior defined in the AbstractSource class.
1181
1218
  """
1182
1219
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1183
1220
  s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1184
- s2 = StreamRaisesException()
1221
+ s2 = StreamRaisesException(exception_to_raise=exception_to_raise)
1185
1222
  s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1186
1223
 
1187
1224
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
1188
1225
  mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1189
1226
 
1190
1227
  src = MockSource(streams=[s1, s2, s3])
1191
- mocker.patch.object(MockSource, "continue_sync_on_stream_failure", return_value=True)
1192
1228
  catalog = ConfiguredAirbyteCatalog(
1193
1229
  streams=[
1194
1230
  _configured_stream(s1, SyncMode.full_refresh),
@@ -1205,6 +1241,7 @@ def test_continue_sync_with_failed_streams(mocker):
1205
1241
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1206
1242
  _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1207
1243
  _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1244
+ _as_error_trace("lamentations", expected_error_message, expected_internal_message, FailureType.system_error, None),
1208
1245
  _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1209
1246
  _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1210
1247
  *_as_records("s3", stream_output),
@@ -1212,26 +1249,75 @@ def test_continue_sync_with_failed_streams(mocker):
1212
1249
  ]
1213
1250
  )
1214
1251
 
1215
- messages = []
1216
1252
  with pytest.raises(AirbyteTracedException) as exc:
1217
- # We can't use list comprehension or list() here because we are still raising a final exception for the
1218
- # failed streams and that disrupts parsing the generator into the messages emitted before
1219
- for message in src.read(logger, {}, catalog):
1220
- messages.append(message)
1253
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1254
+ messages = _fix_emitted_at(messages)
1255
+
1256
+ assert expected == messages
1221
1257
 
1222
- messages = _fix_emitted_at(messages)
1223
- assert expected == messages
1224
1258
  assert "lamentations" in exc.value.message
1259
+ assert exc.value.failure_type == FailureType.config_error
1225
1260
 
1226
1261
 
1227
- def test_stop_sync_with_failed_streams(mocker):
1262
+ def test_continue_sync_source_override_false(mocker):
1228
1263
  """
1229
- Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure disabled stops
1230
- syncing once a stream fails with an error.
1264
+ Tests that running a sync for a connector explicitly overriding the default AbstractSource.stop_sync_on_stream_failure
1265
+ property to be False which will continue syncing stream even if one encountered an exception.
1231
1266
  """
1267
+ update_secrets(["API_KEY_VALUE"])
1268
+
1232
1269
  stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1233
1270
  s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1234
- s2 = StreamRaisesException()
1271
+ s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="I was born only to crash like Icarus"))
1272
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1273
+
1274
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1275
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1276
+
1277
+ src = MockSourceWithStopSyncFalseOverride(streams=[s1, s2, s3])
1278
+ catalog = ConfiguredAirbyteCatalog(
1279
+ streams=[
1280
+ _configured_stream(s1, SyncMode.full_refresh),
1281
+ _configured_stream(s2, SyncMode.full_refresh),
1282
+ _configured_stream(s3, SyncMode.full_refresh),
1283
+ ]
1284
+ )
1285
+
1286
+ expected = _fix_emitted_at(
1287
+ [
1288
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1289
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1290
+ *_as_records("s1", stream_output),
1291
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1292
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1293
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1294
+ _as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
1295
+ _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1296
+ _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1297
+ *_as_records("s3", stream_output),
1298
+ _as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
1299
+ ]
1300
+ )
1301
+
1302
+ with pytest.raises(AirbyteTracedException) as exc:
1303
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1304
+ messages = _fix_emitted_at(messages)
1305
+
1306
+ assert expected == messages
1307
+
1308
+ assert "lamentations" in exc.value.message
1309
+ assert exc.value.failure_type == FailureType.config_error
1310
+
1311
+
1312
+ def test_sync_error_trace_messages_obfuscate_secrets(mocker):
1313
+ """
1314
+ Tests that exceptions emitted as trace messages by a source have secrets properly sanitized
1315
+ """
1316
+ update_secrets(["API_KEY_VALUE"])
1317
+
1318
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1319
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1320
+ s2 = StreamRaisesException(exception_to_raise=AirbyteTracedException(message="My api_key value API_KEY_VALUE flew too close to the sun."))
1235
1321
  s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1236
1322
 
1237
1323
  mocker.patch.object(MockStream, "get_json_schema", return_value={})
@@ -1254,15 +1340,73 @@ def test_stop_sync_with_failed_streams(mocker):
1254
1340
  _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1255
1341
  _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1256
1342
  _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1343
+ _as_error_trace("lamentations", "My api_key value **** flew too close to the sun.", None, FailureType.system_error, None),
1344
+ _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1345
+ _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1346
+ *_as_records("s3", stream_output),
1347
+ _as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
1257
1348
  ]
1258
1349
  )
1259
1350
 
1260
- messages = []
1261
- with pytest.raises(AirbyteTracedException):
1262
- # We can't use list comprehension or list() here because we are still raising a final exception for the
1263
- # failed streams and that disrupts parsing the generator into the messages emitted before
1264
- for message in src.read(logger, {}, catalog):
1265
- messages.append(message)
1351
+ with pytest.raises(AirbyteTracedException) as exc:
1352
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1353
+ messages = _fix_emitted_at(messages)
1266
1354
 
1267
- messages = _fix_emitted_at(messages)
1268
- assert expected == messages
1355
+ assert expected == messages
1356
+
1357
+ assert "lamentations" in exc.value.message
1358
+ assert exc.value.failure_type == FailureType.config_error
1359
+
1360
+
1361
+ def test_continue_sync_with_failed_streams_with_override_false(mocker):
1362
+ """
1363
+ Tests that running a sync for a connector with multiple streams and stop_sync_on_stream_failure enabled stops
1364
+ the sync when one stream fails with an error.
1365
+ """
1366
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1367
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1368
+ s2 = StreamRaisesException(AirbyteTracedException(message="I was born only to crash like Icarus"))
1369
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1370
+
1371
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1372
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1373
+
1374
+ src = MockSource(streams=[s1, s2, s3])
1375
+ mocker.patch.object(MockSource, "stop_sync_on_stream_failure", return_value=True)
1376
+ catalog = ConfiguredAirbyteCatalog(
1377
+ streams=[
1378
+ _configured_stream(s1, SyncMode.full_refresh),
1379
+ _configured_stream(s2, SyncMode.full_refresh),
1380
+ _configured_stream(s3, SyncMode.full_refresh),
1381
+ ]
1382
+ )
1383
+
1384
+ expected = _fix_emitted_at(
1385
+ [
1386
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1387
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1388
+ *_as_records("s1", stream_output),
1389
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1390
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1391
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1392
+ _as_error_trace("lamentations", "I was born only to crash like Icarus", None, FailureType.system_error, None),
1393
+ ]
1394
+ )
1395
+
1396
+ with pytest.raises(AirbyteTracedException) as exc:
1397
+ messages = [_remove_stack_trace(message) for message in src.read(logger, {}, catalog)]
1398
+ messages = _fix_emitted_at(messages)
1399
+
1400
+ assert expected == messages
1401
+
1402
+ assert "lamentations" in exc.value.message
1403
+ assert exc.value.failure_type == FailureType.config_error
1404
+
1405
+
1406
+ def _remove_stack_trace(message: AirbyteMessage) -> AirbyteMessage:
1407
+ """
1408
+ Helper method that removes the stack trace from Airbyte trace messages to make asserting against expected records easier
1409
+ """
1410
+ if message.trace and message.trace.error and message.trace.error.stack_trace:
1411
+ message.trace.error.stack_trace = None
1412
+ return message
@@ -2,7 +2,9 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ import json
5
6
  import os
7
+ from typing import Any, List, Mapping
6
8
  from unittest import mock
7
9
  from unittest.mock import patch
8
10
 
@@ -22,9 +24,9 @@ from unit_tests.sources.fixtures.source_test_fixture import (
22
24
  "deployment_mode, url_base, expected_records, expected_error",
23
25
  [
24
26
  pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
25
- pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], ValueError, id="test_cloud_read_with_unsecured_url"),
26
- pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], AirbyteTracedException, id="test_cloud_read_with_private_endpoint"),
27
- pytest.param("CLOUD", "https://localhost:80/api/v1/", [], AirbyteTracedException, id="test_cloud_read_with_localhost"),
27
+ pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
28
+ pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
29
+ pytest.param("CLOUD", "https://localhost:80/api/v1/", [], "config_error", id="test_cloud_read_with_localhost"),
28
30
  pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
29
31
  pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
30
32
  ],
@@ -37,8 +39,10 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
37
39
  with mock.patch.object(HttpTestStream, "url_base", url_base):
38
40
  args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
39
41
  if expected_error:
40
- with pytest.raises(expected_error):
42
+ with pytest.raises(AirbyteTracedException):
41
43
  launch(source, args)
44
+ messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
45
+ assert contains_error_trace_message(messages, expected_error)
42
46
  else:
43
47
  launch(source, args)
44
48
 
@@ -47,14 +51,14 @@ def test_external_request_source(capsys, deployment_mode, url_base, expected_rec
47
51
  "deployment_mode, token_refresh_url, expected_records, expected_error",
48
52
  [
49
53
  pytest.param("CLOUD", "https://airbyte.com/api/v1/", [], None, id="test_cloud_read_with_public_endpoint"),
50
- pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], ValueError, id="test_cloud_read_with_unsecured_url"),
51
- pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], AirbyteTracedException, id="test_cloud_read_with_private_endpoint"),
54
+ pytest.param("CLOUD", "http://unsecured.com/api/v1/", [], "system_error", id="test_cloud_read_with_unsecured_url"),
55
+ pytest.param("CLOUD", "https://172.20.105.99/api/v1/", [], "config_error", id="test_cloud_read_with_private_endpoint"),
52
56
  pytest.param("OSS", "https://airbyte.com/api/v1/", [], None, id="test_oss_read_with_public_endpoint"),
53
57
  pytest.param("OSS", "https://172.20.105.99/api/v1/", [], None, id="test_oss_read_with_private_endpoint"),
54
58
  ],
55
59
  )
56
60
  @patch.object(requests.Session, "send", fixture_mock_send)
57
- def test_external_oauth_request_source(deployment_mode, token_refresh_url, expected_records, expected_error):
61
+ def test_external_oauth_request_source(capsys, deployment_mode, token_refresh_url, expected_records, expected_error):
58
62
  oauth_authenticator = SourceFixtureOauthAuthenticator(
59
63
  client_id="nora", client_secret="hae_sung", refresh_token="arthur", token_refresh_endpoint=token_refresh_url
60
64
  )
@@ -63,7 +67,20 @@ def test_external_oauth_request_source(deployment_mode, token_refresh_url, expec
63
67
  with mock.patch.dict(os.environ, {"DEPLOYMENT_MODE": deployment_mode}, clear=False): # clear=True clears the existing os.environ dict
64
68
  args = ["read", "--config", "config.json", "--catalog", "configured_catalog.json"]
65
69
  if expected_error:
66
- with pytest.raises(expected_error):
70
+ with pytest.raises(AirbyteTracedException):
67
71
  launch(source, args)
72
+ messages = [json.loads(line) for line in capsys.readouterr().out.splitlines()]
73
+ assert contains_error_trace_message(messages, expected_error)
68
74
  else:
69
75
  launch(source, args)
76
+
77
+
78
+ def contains_error_trace_message(messages: List[Mapping[str, Any]], expected_error: str) -> bool:
79
+ for message in messages:
80
+ if message.get("type") != "TRACE":
81
+ continue
82
+ elif message.get("trace").get("type") != "ERROR":
83
+ continue
84
+ elif message.get("trace").get("error").get("failure_type") == expected_error:
85
+ return True
86
+ return False
@@ -343,7 +343,7 @@ def test_concurrent_source_yields_the_same_messages_as_abstract_source_when_an_e
343
343
  source, concurrent_source = _init_sources([stream_slice_to_partition], state, logger)
344
344
  config = {}
345
345
  catalog = _create_configured_catalog(source._streams)
346
- messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state, RuntimeError)
346
+ messages_from_abstract_source = _read_from_source(source, logger, config, catalog, state, AirbyteTracedException)
347
347
  messages_from_concurrent_source = _read_from_source(concurrent_source, logger, config, catalog, state, RuntimeError)
348
348
 
349
349
  expected_messages = [