airbyte-cdk 0.55.5__py3-none-any.whl → 0.56.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -101,6 +101,9 @@ class AbstractSource(Source, ABC):
101
101
  stream_instances = {s.name: s for s in self.streams(config)}
102
102
  state_manager = ConnectorStateManager(stream_instance_map=stream_instances, state=state)
103
103
  self._stream_to_instance_map = stream_instances
104
+
105
+ stream_name_to_exception: MutableMapping[str, AirbyteTracedException] = {}
106
+
104
107
  with create_timer(self.name) as timer:
105
108
  for configured_stream in catalog.streams:
106
109
  stream_instance = stream_instances.get(configured_stream.stream.name)
@@ -131,7 +134,10 @@ class AbstractSource(Source, ABC):
131
134
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.COMPLETE)
132
135
  except AirbyteTracedException as e:
133
136
  yield stream_status_as_airbyte_message(configured_stream.stream, AirbyteStreamStatus.INCOMPLETE)
134
- raise e
137
+ if self.continue_sync_on_stream_failure:
138
+ stream_name_to_exception[stream_instance.name] = e
139
+ else:
140
+ raise e
135
141
  except Exception as e:
136
142
  yield from self._emit_queued_messages()
137
143
  logger.exception(f"Encountered an exception while reading stream {configured_stream.stream.name}")
@@ -146,6 +152,8 @@ class AbstractSource(Source, ABC):
146
152
  logger.info(f"Finished syncing {configured_stream.stream.name}")
147
153
  logger.info(timer.report())
148
154
 
155
+ if self.continue_sync_on_stream_failure and len(stream_name_to_exception) > 0:
156
+ raise AirbyteTracedException(message=self._generate_failed_streams_error_message(stream_name_to_exception))
149
157
  logger.info(f"Finished syncing {self.name}")
150
158
 
151
159
  @property
@@ -272,3 +280,19 @@ class AbstractSource(Source, ABC):
272
280
  @property
273
281
  def message_repository(self) -> Union[None, MessageRepository]:
274
282
  return _default_message_repository
283
+
284
+ @property
285
+ def continue_sync_on_stream_failure(self) -> bool:
286
+ """
287
+ WARNING: This function is in-development which means it is subject to change. Use at your own risk.
288
+
289
+ By default, a source should raise an exception and stop the sync when it encounters an error while syncing a stream. This
290
+ method can be overridden on a per-source basis so that a source will continue syncing streams other streams even if an
291
+ exception is raised for a stream.
292
+ """
293
+ return False
294
+
295
+ @staticmethod
296
+ def _generate_failed_streams_error_message(stream_failures: Mapping[str, AirbyteTracedException]) -> str:
297
+ failures = ", ".join([f"{stream}: {exception.__repr__()}" for stream, exception in stream_failures.items()])
298
+ return f"During the sync, the following streams did not sync successfully: {failures}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 0.55.5
3
+ Version: 0.56.1
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://github.com/airbytehq/airbyte
6
6
  Author: Airbyte
@@ -24,7 +24,7 @@ airbyte_cdk/models/__init__.py,sha256=Kg8YHBqUsNWHlAw-u3ZGdG4dxLh7qBlHhqMRfamNCR
24
24
  airbyte_cdk/models/airbyte_protocol.py,sha256=DoJvnmGM3xMAZFTwA6_RGMiKSFqfE3ib_Ru0KJ65Ag4,100
25
25
  airbyte_cdk/models/well_known_types.py,sha256=KKfNbow2gdLoC1Z4hcXy_JR8m_acsB2ol7gQuEgjobw,117
26
26
  airbyte_cdk/sources/__init__.py,sha256=Ov7Uf03KPSZUmMZqZfUAK3tQwsdKjDQUDvTb-H0JyfA,1141
27
- airbyte_cdk/sources/abstract_source.py,sha256=tlXXwCRBisbOu7pA2NYtiRolhHSCaKlXcET6dny5V9o,13043
27
+ airbyte_cdk/sources/abstract_source.py,sha256=w-f9Z2sp8xAk4MHVkr-K4tBP1NMxBIGRCd9NsHm2xBE,14365
28
28
  airbyte_cdk/sources/config.py,sha256=PYsY7y2u3EUwxLiEb96JnuKwH_E8CuxKggsRO2ZPSRc,856
29
29
  airbyte_cdk/sources/connector_state_manager.py,sha256=wsmUgII398MazCTKxwLBLzeiU6Z-tMTrKX882EEy-YE,10904
30
30
  airbyte_cdk/sources/http_config.py,sha256=OBZeuyFilm6NlDlBhFQvHhTWabEvZww6OHDIlZujIS0,730
@@ -269,7 +269,7 @@ unit_tests/singer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
269
269
  unit_tests/singer/test_singer_helpers.py,sha256=pZV6VxJuK-3-FICNGmoGbokrA_zkaFZEd4rYZCVpSRU,1762
270
270
  unit_tests/singer/test_singer_source.py,sha256=edN_kv7dnYAdBveWdUYOs74ak0dK6p8uaX225h_ZILA,4442
271
271
  unit_tests/sources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
272
- unit_tests/sources/test_abstract_source.py,sha256=V7zSpOk-jqfOz8FtnImAo_zDe-Q2TjPqD_l_T0QaiDw,48179
272
+ unit_tests/sources/test_abstract_source.py,sha256=_wFBhJ2dsxWpYqw_QSGL7WwJyqVvXLQOMN3GrD5nC4A,52519
273
273
  unit_tests/sources/test_concurrent_source.py,sha256=NT4K0z-oz2OZBHE9xNQT0KUdI2wJ-5vNWLUHZlIYKKU,3552
274
274
  unit_tests/sources/test_config.py,sha256=lxjeaf48pOMF4Pf3-Z1ux_tHTyjRFCdG_hpnxw3e7uQ,2839
275
275
  unit_tests/sources/test_connector_state_manager.py,sha256=ynFxA63Cxe6t-wMMh9C6ByTlMAuk8W7H2FikDhnUEQ0,24264
@@ -437,8 +437,8 @@ unit_tests/utils/test_schema_inferrer.py,sha256=Z2jHBZ540wnYkylIdV_2xr75Vtwlxuyg
437
437
  unit_tests/utils/test_secret_utils.py,sha256=XKe0f1RHYii8iwE6ATmBr5JGDI1pzzrnZUGdUSMJQP4,4886
438
438
  unit_tests/utils/test_stream_status_utils.py,sha256=Xr8MZ2HWgTVIyMbywDvuYkRaUF4RZLQOT8-JjvcfR24,2970
439
439
  unit_tests/utils/test_traced_exception.py,sha256=bDFP5zMBizFenz6V2WvEZTRCKGB5ijh3DBezjbfoYIs,4198
440
- airbyte_cdk-0.55.5.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
441
- airbyte_cdk-0.55.5.dist-info/METADATA,sha256=UGBscoQLxD10YK6vK2sziE_7IkU_JY03BSihYZcEJv0,11983
442
- airbyte_cdk-0.55.5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
443
- airbyte_cdk-0.55.5.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
444
- airbyte_cdk-0.55.5.dist-info/RECORD,,
440
+ airbyte_cdk-0.56.1.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
441
+ airbyte_cdk-0.56.1.dist-info/METADATA,sha256=9SrM3BpVcDEm06b_24RA58-vED_17w1-dZbGme1dt3Q,11983
442
+ airbyte_cdk-0.56.1.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
443
+ airbyte_cdk-0.56.1.dist-info/top_level.txt,sha256=edvsDKTnE6sD2wfCUaeTfKf5gQIL6CPVMwVL2sWZzqo,51
444
+ airbyte_cdk-0.56.1.dist-info/RECORD,,
@@ -111,6 +111,14 @@ class MockStreamOverridesStateMethod(Stream, IncrementalMixin):
111
111
  self._cursor_value = value.get(self.cursor_field, self.start_date)
112
112
 
113
113
 
114
+ class StreamRaisesException(Stream):
115
+ name = "lamentations"
116
+ primary_key = None
117
+
118
+ def read_records(self, *args, **kwargs) -> Iterable[Mapping[str, Any]]:
119
+ raise AirbyteTracedException(message="I was born only to crash like Icarus")
120
+
121
+
114
122
  MESSAGE_FROM_REPOSITORY = Mock()
115
123
 
116
124
 
@@ -1158,3 +1166,97 @@ def test_checkpoint_state_from_stream_instance():
1158
1166
  assert actual_message == _as_state(
1159
1167
  {"teams": {"updated_at": "2022-09-11"}, "managers": {"updated": "expected_here"}}, "managers", {"updated": "expected_here"}
1160
1168
  )
1169
+
1170
+
1171
+ def test_continue_sync_with_failed_streams(mocker):
1172
+ """
1173
+ Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure enabled continues
1174
+ syncing even when one stream fails with an error.
1175
+ """
1176
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1177
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1178
+ s2 = StreamRaisesException()
1179
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1180
+
1181
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1182
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1183
+
1184
+ src = MockSource(streams=[s1, s2, s3])
1185
+ mocker.patch.object(MockSource, "continue_sync_on_stream_failure", return_value=True)
1186
+ catalog = ConfiguredAirbyteCatalog(
1187
+ streams=[
1188
+ _configured_stream(s1, SyncMode.full_refresh),
1189
+ _configured_stream(s2, SyncMode.full_refresh),
1190
+ _configured_stream(s3, SyncMode.full_refresh),
1191
+ ]
1192
+ )
1193
+
1194
+ expected = _fix_emitted_at(
1195
+ [
1196
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1197
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1198
+ *_as_records("s1", stream_output),
1199
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1200
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1201
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1202
+ _as_stream_status("s3", AirbyteStreamStatus.STARTED),
1203
+ _as_stream_status("s3", AirbyteStreamStatus.RUNNING),
1204
+ *_as_records("s3", stream_output),
1205
+ _as_stream_status("s3", AirbyteStreamStatus.COMPLETE),
1206
+ ]
1207
+ )
1208
+
1209
+ messages = []
1210
+ with pytest.raises(AirbyteTracedException) as exc:
1211
+ # We can't use list comprehension or list() here because we are still raising a final exception for the
1212
+ # failed streams and that disrupts parsing the generator into the messages emitted before
1213
+ for message in src.read(logger, {}, catalog):
1214
+ messages.append(message)
1215
+
1216
+ messages = _fix_emitted_at(messages)
1217
+ assert expected == messages
1218
+ assert "lamentations" in exc.value.message
1219
+
1220
+
1221
+ def test_stop_sync_with_failed_streams(mocker):
1222
+ """
1223
+ Tests that running a sync for a connector with multiple streams and continue_sync_on_stream_failure disabled stops
1224
+ syncing once a stream fails with an error.
1225
+ """
1226
+ stream_output = [{"k1": "v1"}, {"k2": "v2"}]
1227
+ s1 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s1")
1228
+ s2 = StreamRaisesException()
1229
+ s3 = MockStream([({"sync_mode": SyncMode.full_refresh}, stream_output)], name="s3")
1230
+
1231
+ mocker.patch.object(MockStream, "get_json_schema", return_value={})
1232
+ mocker.patch.object(StreamRaisesException, "get_json_schema", return_value={})
1233
+
1234
+ src = MockSource(streams=[s1, s2, s3])
1235
+ catalog = ConfiguredAirbyteCatalog(
1236
+ streams=[
1237
+ _configured_stream(s1, SyncMode.full_refresh),
1238
+ _configured_stream(s2, SyncMode.full_refresh),
1239
+ _configured_stream(s3, SyncMode.full_refresh),
1240
+ ]
1241
+ )
1242
+
1243
+ expected = _fix_emitted_at(
1244
+ [
1245
+ _as_stream_status("s1", AirbyteStreamStatus.STARTED),
1246
+ _as_stream_status("s1", AirbyteStreamStatus.RUNNING),
1247
+ *_as_records("s1", stream_output),
1248
+ _as_stream_status("s1", AirbyteStreamStatus.COMPLETE),
1249
+ _as_stream_status("lamentations", AirbyteStreamStatus.STARTED),
1250
+ _as_stream_status("lamentations", AirbyteStreamStatus.INCOMPLETE),
1251
+ ]
1252
+ )
1253
+
1254
+ messages = []
1255
+ with pytest.raises(AirbyteTracedException):
1256
+ # We can't use list comprehension or list() here because we are still raising a final exception for the
1257
+ # failed streams and that disrupts parsing the generator into the messages emitted before
1258
+ for message in src.read(logger, {}, catalog):
1259
+ messages.append(message)
1260
+
1261
+ messages = _fix_emitted_at(messages)
1262
+ assert expected == messages