airbyte-cdk 0.55.5__py3-none-any.whl → 0.56.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -101,6 +101,9 @@ class AbstractSource(Source, ABC):
101
101
  stream_instances = {s.name: s for s in self.streams(config)}
102
102
  state_manager = ConnectorStateManager(stream_instance_map=stream_instances, state=state)
103
103
  self._stream_to_instance_map = stream_instances
104
+
105
+ stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
106
+
104
107
  with create_timer(self.name) as timer:
105
108
  for configured_stream in catalog.streams:
106
109
  stream_instance = stream_instances.get(configured_stream.stream.name)
@@ -131,7 +134,10 @@ class AbstractSource(Source, ABC):
131
134
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
132
135
  except AirbyteTracedException as e:
133
136
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
134
- raise e
137
+ if self.continue_sync_on_stream_failure:
138
+ stream_name_to_exception[stream_instance.name] = e
139
+ else:
140
+ raise e
135
141
  except Exception as e:
136
142
  yield from self._emit_queued_messages()
137
143
  logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
@@ -146,6 +152,8 @@ class AbstractSource(Source, ABC):
146
152
  logger.info(f"Finished syncing {configured_stream.stream.name}")
147
153
  logger.info(timer.report())
148
154
 
155
+ if self.continue_sync_on_stream_failure and len(stream_name_to_exception) > 0:
156
+ raise AirbyteTracedException(message=self._generate_failed_streams_error_message(stream_name_to_exception))
149
157
  logger.info(f"Finished syncing {self.name}")
150
158
 
151
159
  @property
@@ -272,3 +280,19 @@ class AbstractSource(Source, ABC):
272
280
  @property
273
281
  def message_repository(self) -> Union[None, MessageRepository]:
274
282
  return _default_message_repository
283
+
284
+ @property
285
+ def continue_sync_on_stream_failure(self) -> bool:
286
+ """
287
+ WARNING: This function is in-development which means it is subject to change. Use at your own risk.
288
+
289
+ By default, a source should raise an exception and stop the sync when it encounters an error while syncing a stream. This
290
+ method can be overridden on a per-source basis so that a source will continue syncing streams other streams even if an
291
+ exception is raised for a stream.
292
+ """
293
+ return False
294
+
295
+ @staticmethod
296
+ def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
297
+ failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
298
+ return f"During the sync, the following streams did not sync successfully: {failures}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.55.5
3
+ Version: 0.56.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=tlXXwCRBisbOu7pA2NYtiRolhHSCaKlXcET6dny5V9o,13043
27
+ airbyte_cdk/sources/abstract_source.py,sha256=w-f9Z2sp8xAk4MHVkr-K4tBP1NMxBIGRCd9NsHm2xBE,14365
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
30
30
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
@@ -269,7 +269,7 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
269
269
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
270
270
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
271
271
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
272
- unit_tests/sources/test_abstract_source.py,sha256=V7zSpOk-jqfOz8FtnImAo_zDe-Q2TjPqD_l_T0QaiDw,48179
272
+ unit_tests/sources/test_abstract_source.py,sha256=_wFBhJ2dsxWpYqw_QSGL7WwJyqVvXLQOMN3GrD5nC4A,52519
273
273
  unit_tests/sources/test_concurrent_source.py,sha256=NT4K0z-oz2OZBHE9xNQT0KUdI2wJ-5vNWLUHZlIYKKU,3552
274
274
  unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
275
275
  unit_tests/sources/test_connector_state_manager.py,sha256=ynFxA63Cxe6t-wMMh9C6ByTlMAuk8W7H2FikDhnUEQ0,24264
@@ -437,8 +437,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
437
437
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
438
438
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
439
439
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
440
- airbyte_cdk-0.55.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
441
- airbyte_cdk-0.55.5.dist-info/METADATA,sha256=UGBscoQLxD10YK6vK2sziE_7IkU_JY03BSihYZcEJv0,11983
442
- airbyte_cdk-0.55.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
443
- airbyte_cdk-0.55.5.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
444
- airbyte_cdk-0.55.5.dist-info/RECORD,,
440
+ airbyte_cdk-0.56.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
441
+ airbyte_cdk-0.56.1.dist-info/METADATA,sha256=9SrM3BpVcDEm06b_24RA58-vED_17w1-dZbGme1dt3Q,11983
442
+ airbyte_cdk-0.56.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
443
+ airbyte_cdk-0.56.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
444
+ airbyte_cdk-0.56.1.dist-info/RECORD,,
@@ -111,6 +111,14 @@ class MockStreamOverridesStateMethod(Stream, IncrementalMixin):
111
111
  self._cursor_value = value.get(self.cursor_field, self.start_date)
112
112
 
113
113
 
114
+ class StreamRaisesException(Stream):
115
+ name = "lamentations"
116
+ primary_key = None
117
+
118
+ def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
119
+ raise AirbyteTracedException(message="I was born only to crash like Icarus")
120
+
121
+
114
122
  MESSAGE_FROM_REPOSITORY = Mock()
115
123
 
116
124
 
@@ -1158,3 +1166,97 @@ def test_checkpoint_state_from_stream_instance():
1158
1166
  assert actual_message == _as_state(
1159
1167
  {"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
1160
1168
  )
1169
+
1170
+
1171
+ def test_continue_sync_with_failed_streams(mocker):
1172
+ """
1173
+ Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure enabled continues
1174
+ syncing even when one stream fails with an error.
1175
+ """
1176
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1177
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1178
+ s2 = StreamRaisesException()
1179
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1180
+
1181
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1182
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1183
+
1184
+ src = MockSource(streams=[s1, s2, s3])
1185
+ mocker.patch.object(MockSource, "continue_sync_on_stream_failure", return_value=True)
1186
+ catalog = ConfiguredAirbyteCatalog(
1187
+ streams=[
1188
+ _configured_stream(s1, SyncMode.full_refresh),
1189
+ _configured_stream(s2, SyncMode.full_refresh),
1190
+ _configured_stream(s3, SyncMode.full_refresh),
1191
+ ]
1192
+ )
1193
+
1194
+ expected = _fix_emitted_at(
1195
+ [
1196
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1197
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1198
+ *_as_records("s1", stream_output),
1199
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1200
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1201
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1202
+ _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1203
+ _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1204
+ *_as_records("s3", stream_output),
1205
+ _as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
1206
+ ]
1207
+ )
1208
+
1209
+ messages = []
1210
+ with pytest.raises(AirbyteTracedException) as exc:
1211
+ # We can't use list comprehension or list() here because we are still raising a final exception for the
1212
+ # failed streams and that disrupts parsing the generator into the messages emitted before
1213
+ for message in src.read(logger, {}, catalog):
1214
+ messages.append(message)
1215
+
1216
+ messages = _fix_emitted_at(messages)
1217
+ assert expected == messages
1218
+ assert "lamentations" in exc.value.message
1219
+
1220
+
1221
+ def test_stop_sync_with_failed_streams(mocker):
1222
+ """
1223
+ Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure disabled stops
1224
+ syncing once a stream fails with an error.
1225
+ """
1226
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1227
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1228
+ s2 = StreamRaisesException()
1229
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1230
+
1231
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1232
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1233
+
1234
+ src = MockSource(streams=[s1, s2, s3])
1235
+ catalog = ConfiguredAirbyteCatalog(
1236
+ streams=[
1237
+ _configured_stream(s1, SyncMode.full_refresh),
1238
+ _configured_stream(s2, SyncMode.full_refresh),
1239
+ _configured_stream(s3, SyncMode.full_refresh),
1240
+ ]
1241
+ )
1242
+
1243
+ expected = _fix_emitted_at(
1244
+ [
1245
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1246
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1247
+ *_as_records("s1", stream_output),
1248
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1249
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1250
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1251
+ ]
1252
+ )
1253
+
1254
+ messages = []
1255
+ with pytest.raises(AirbyteTracedException):
1256
+ # We can't use list comprehension or list() here because we are still raising a final exception for the
1257
+ # failed streams and that disrupts parsing the generator into the messages emitted before
1258
+ for message in src.read(logger, {}, catalog):
1259
+ messages.append(message)
1260
+
1261
+ messages = _fix_emitted_at(messages)
1262
+ assert expected == messages