boilermaker-servicebus 1.0.0.dev4__tar.gz → 1.0.0.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/PKG-INFO +1 -1
  2. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/app.py +37 -8
  3. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/evaluators/common.py +17 -52
  4. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/evaluators/eval.py +2 -5
  5. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/evaluators/results_store.py +2 -5
  6. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/evaluators/simple.py +2 -5
  7. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/evaluators/task_graph.py +72 -15
  8. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/exc.py +1 -3
  9. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/retries.py +1 -1
  10. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/storage/base.py +20 -1
  11. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/storage/blob_storage.py +48 -1
  12. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/task/graph.py +13 -6
  13. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/task/result.py +0 -1
  14. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/tracing.py +1 -3
  15. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker_servicebus.egg-info/PKG-INFO +1 -1
  16. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/pyproject.toml +5 -5
  17. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/conftest.py +4 -0
  18. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/test_common.py +7 -21
  19. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/test_eval.py +1 -4
  20. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/test_simple.py +13 -39
  21. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/test_task_graphs.py +361 -36
  22. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/graph_factories.py +11 -18
  23. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/storage/test_blob_storage.py +44 -39
  24. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/task/test_graph.py +89 -17
  25. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/task/test_graph_cycle_detection.py +35 -0
  26. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/task/test_result.py +1 -3
  27. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/task/test_task.py +0 -2
  28. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/test_app.py +98 -4
  29. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/test_retries.py +27 -3
  30. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/test_sample.py +1 -3
  31. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/LICENSE +0 -0
  32. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/README.md +0 -0
  33. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/__init__.py +0 -0
  34. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/config.py +0 -0
  35. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/evaluators/__init__.py +0 -0
  36. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/failure.py +0 -0
  37. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/sample.py +0 -0
  38. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/service_bus.py +0 -0
  39. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/storage/__init__.py +0 -0
  40. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/task/__init__.py +0 -0
  41. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/task/task.py +0 -0
  42. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/task/task_id.py +0 -0
  43. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker/task/types.py +0 -0
  44. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker_servicebus.egg-info/SOURCES.txt +0 -0
  45. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker_servicebus.egg-info/dependency_links.txt +0 -0
  46. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker_servicebus.egg-info/requires.txt +0 -0
  47. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/boilermaker_servicebus.egg-info/top_level.txt +0 -0
  48. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/examples/basic.py +0 -0
  49. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/examples/callbacks.py +0 -0
  50. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/examples/task_graph_example.py +0 -0
  51. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/setup.cfg +0 -0
  52. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/conftest.py +0 -0
  53. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/test_eval_factory.py +0 -0
  54. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/evaluators/test_results_store.py +0 -0
  55. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/task/helpers.py +0 -0
  56. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/test_config.py +0 -0
  57. {boilermaker_servicebus-1.0.0.dev4 → boilermaker_servicebus-1.0.0.dev5}/tests/test_service_bus.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boilermaker-servicebus
3
- Version: 1.0.0.dev4
3
+ Version: 1.0.0.dev5
4
4
  Summary: An async python Background task system using Azure Service Bus Queues
5
5
  Author-email: Erik Aker <eaker@mulliganfunding.com>
6
6
  License: Apache License
@@ -322,9 +322,7 @@ class Boilermaker:
322
322
  await app.apply_async(cleanup_temp_files, delay=300)
323
323
  """
324
324
  task = self.create_task(fn, *args, policy=policy, **kwargs)
325
- return await self.publish_task(
326
- task, delay=delay, publish_attempts=publish_attempts
327
- )
325
+ return await self.publish_task(task, delay=delay, publish_attempts=publish_attempts)
328
326
 
329
327
  @tracer.start_as_current_span("boilermaker.publish-task")
330
328
  async def publish_task(
@@ -366,9 +364,7 @@ class Boilermaker:
366
364
  if results and len(results) == 1:
367
365
  sequence_number = results[0]
368
366
  task.mark_published(sequence_number)
369
- logger.debug(
370
- f"Published task {task.task_id} to queue with sequence_number={sequence_number}"
371
- )
367
+ logger.debug(f"Published task {task.task_id} to queue with sequence_number={sequence_number}")
372
368
  return task
373
369
 
374
370
  except (
@@ -407,6 +403,13 @@ class Boilermaker:
407
403
  [f"Expected graph_id={graph.graph_id}, found {task.graph_id=}"],
408
404
  )
409
405
 
406
+ for task in graph.fail_children.values():
407
+ if task.graph_id != graph.graph_id:
408
+ raise BoilermakerAppException(
409
+ "All failure callback tasks must have graph_id matching graph",
410
+ [f"Expected graph_id={graph.graph_id}, found {task.graph_id=}"],
411
+ )
412
+
410
413
  # Store the graph definition and all pending task results
411
414
  # If this graph has already been stored, this should fail.
412
415
  try:
@@ -414,8 +417,34 @@ class Boilermaker:
414
417
  except BoilermakerStorageError as exc:
415
418
  raise BoilermakerAppException("Error storing TaskGraph to storage", [str(exc)]) from exc
416
419
 
417
- # Publish all ready tasks (should be root nodes with no dependencies)
418
- for task in graph.generate_ready_tasks():
420
+ # Reload to get ETags on pending result blobs (required for ETag-guarded Scheduled writes)
421
+ try:
422
+ loaded_graph = await self.results_storage.load_graph(graph.graph_id)
423
+ except BoilermakerStorageError as exc:
424
+ raise BoilermakerAppException("Error loading TaskGraph after storage", [str(exc)]) from exc
425
+
426
+ if not loaded_graph:
427
+ raise BoilermakerAppException("TaskGraph not found after storing — this should not happen", [])
428
+
429
+ # Publish root tasks with ETag-guarded Scheduled write (same protocol as continue_graph)
430
+ for task in loaded_graph.generate_ready_tasks():
431
+ try:
432
+ result = loaded_graph.schedule_task(task.task_id)
433
+ await self.results_storage.store_task_result(result, etag=result.etag)
434
+ except BoilermakerStorageError:
435
+ logger.error(
436
+ f"Failed to write Scheduled status for root task {task.task_id} in graph "
437
+ f"{graph.graph_id}. Not publishing to avoid inconsistent state.",
438
+ exc_info=True,
439
+ )
440
+ continue
441
+ except ValueError:
442
+ logger.error(
443
+ f"schedule_task raised ValueError for root task {task.task_id} in graph "
444
+ f"{graph.graph_id}. Skipping.",
445
+ exc_info=True,
446
+ )
447
+ continue
419
448
  await self.publish_task(task)
420
449
 
421
450
  return graph
@@ -50,9 +50,7 @@ class MessageActions:
50
50
  """
51
51
 
52
52
  @classmethod
53
- async def task_decoder(
54
- cls, msg: ServiceBusReceivedMessage, receiver: ServiceBusReceiver
55
- ) -> Task | None:
53
+ async def task_decoder(cls, msg: ServiceBusReceivedMessage, receiver: ServiceBusReceiver) -> Task | None:
56
54
  """Decode a ServiceBusReceivedMessage into a Task."""
57
55
  try:
58
56
  task = Task.model_validate_json(str(msg))
@@ -60,10 +58,7 @@ class MessageActions:
60
58
  return task
61
59
  except (JSONDecodeError, ValidationError):
62
60
  # This task is not parseable
63
- log_err_msg = (
64
- f"Invalid task sequence_number={msg.sequence_number} "
65
- f"exc_info={traceback.format_exc()}"
66
- )
61
+ log_err_msg = f"Invalid task sequence_number={msg.sequence_number} exc_info={traceback.format_exc()}"
67
62
  logger.error(log_err_msg)
68
63
  await receiver.dead_letter_message(
69
64
  msg,
@@ -73,17 +68,13 @@ class MessageActions:
73
68
  return None
74
69
 
75
70
  @staticmethod
76
- async def abandon_message(
77
- msg: ServiceBusReceivedMessage, receiver: ServiceBusReceiver
78
- ) -> None:
71
+ async def abandon_message(msg: ServiceBusReceivedMessage, receiver: ServiceBusReceiver) -> None:
79
72
  """Abandon the message being processed."""
80
73
  if msg is not None:
81
74
  sequence_number = msg.sequence_number
82
75
  try:
83
76
  await receiver.abandon_message(msg)
84
- logger.warning(
85
- f"Abandoning message: returned to queue {sequence_number=}"
86
- )
77
+ logger.warning(f"Abandoning message: returned to queue {sequence_number=}")
87
78
  except (
88
79
  MessageAlreadySettled,
89
80
  MessageLockLostError,
@@ -93,18 +84,13 @@ class MessageActions:
93
84
  logger.error(err_msg)
94
85
  raise exc.BoilermakerTaskLeaseLost(err_msg) from None
95
86
  except ServiceBusError as sb_exc:
96
- err_msg = (
97
- f"ServiceBusError requeuing message {sequence_number=} "
98
- f"exc_info={traceback.format_exc()}"
99
- )
87
+ err_msg = f"ServiceBusError requeuing message {sequence_number=} exc_info={traceback.format_exc()}"
100
88
  logger.error(err_msg)
101
89
  raise exc.BoilermakerServiceBusError(err_msg) from sb_exc
102
90
  return None
103
91
 
104
92
  @staticmethod
105
- async def complete_message(
106
- msg: ServiceBusReceivedMessage, receiver: ServiceBusReceiver
107
- ):
93
+ async def complete_message(msg: ServiceBusReceivedMessage, receiver: ServiceBusReceiver):
108
94
  """Complete the current message being processed."""
109
95
  try:
110
96
  await receiver.complete_message(msg)
@@ -113,10 +99,7 @@ class MessageActions:
113
99
  MessageLockLostError,
114
100
  SessionLockLostError,
115
101
  ):
116
- logmsg = (
117
- f"Failed to settle message sequence_number={msg.sequence_number} "
118
- f"exc_info={traceback.format_exc()}"
119
- )
102
+ logmsg = f"Failed to settle message sequence_number={msg.sequence_number} exc_info={traceback.format_exc()}"
120
103
  logger.error(logmsg)
121
104
  raise exc.BoilermakerTaskLeaseLost(msg) from None
122
105
  except ServiceBusError as sb_exc:
@@ -148,8 +131,7 @@ class MessageActions:
148
131
  SessionLockLostError,
149
132
  ):
150
133
  logmsg = (
151
- f"Failed to deadletter message sequence_number={msg.sequence_number} "
152
- f"exc_info={traceback.format_exc()}"
134
+ f"Failed to deadletter message sequence_number={msg.sequence_number} exc_info={traceback.format_exc()}"
153
135
  )
154
136
  logger.error(logmsg)
155
137
  raise exc.BoilermakerTaskLeaseLost(logmsg) from None
@@ -178,8 +160,7 @@ class MessageActions:
178
160
  return await receiver.renew_message_lock(msg)
179
161
  except (MessageLockLostError, MessageAlreadySettled, SessionLockLostError):
180
162
  logmsg = (
181
- f"Failed to renew message lock sequence_number={msg.sequence_number} "
182
- f"exc_info={traceback.format_exc()}"
163
+ f"Failed to renew message lock sequence_number={msg.sequence_number} exc_info={traceback.format_exc()}"
183
164
  )
184
165
  logger.error(logmsg)
185
166
  raise exc.BoilermakerTaskLeaseLost(logmsg) from None
@@ -201,9 +182,7 @@ class MessageActions:
201
182
  ):
202
183
  """Deadletter or complete the current task based on its configuration."""
203
184
  if task.msg is None:
204
- logger.warning(
205
- "No current message to settle for deadletter_or_complete_task"
206
- )
185
+ logger.warning("No current message to settle for deadletter_or_complete_task")
207
186
  return None
208
187
 
209
188
  if task.should_dead_letter:
@@ -270,22 +249,16 @@ class TaskEvaluatorBase:
270
249
  # Message handling actions
271
250
  async def abandon_current_message(self) -> None:
272
251
  if self.current_msg is not None:
273
- return await MessageActions.abandon_message(
274
- self.current_msg, self._receiver
275
- )
252
+ return await MessageActions.abandon_message(self.current_msg, self._receiver)
276
253
 
277
254
  async def complete_message(self) -> None:
278
255
  if self.current_msg is not None:
279
- return await MessageActions.complete_message(
280
- self.current_msg, self._receiver
281
- )
256
+ return await MessageActions.complete_message(self.current_msg, self._receiver)
282
257
 
283
258
  async def renew_message_lock(self) -> datetime.datetime | None:
284
259
  """Renew the lock on the current message being processed."""
285
260
  if self.current_msg is not None:
286
- return await MessageActions.renew_message_lock(
287
- self.current_msg, self._receiver
288
- )
261
+ return await MessageActions.renew_message_lock(self.current_msg, self._receiver)
289
262
  return None
290
263
 
291
264
  async def deadletter_or_complete_task(
@@ -293,9 +266,7 @@ class TaskEvaluatorBase:
293
266
  reason: str,
294
267
  detail: Exception | str | None = None,
295
268
  ) -> None:
296
- return await MessageActions.deadletter_or_complete_task(
297
- self.task, self._receiver, reason, detail=detail
298
- )
269
+ return await MessageActions.deadletter_or_complete_task(self.task, self._receiver, reason, detail=detail)
299
270
 
300
271
  async def __call__(self) -> TaskResult | None:
301
272
  """Call pre-processing hook and then `message_handler`."""
@@ -304,9 +275,7 @@ class TaskEvaluatorBase:
304
275
  if not await self.pre_process():
305
276
  return None
306
277
  except exc.BoilermakerUnregisteredFunction:
307
- await self.deadletter_or_complete_task(
308
- "ExpectationFailed", detail="Pre-processing expectation failed"
309
- )
278
+ await self.deadletter_or_complete_task("ExpectationFailed", detail="Pre-processing expectation failed")
310
279
  return TaskResult(
311
280
  task_id=self.task.task_id,
312
281
  graph_id=self.task.graph_id,
@@ -316,9 +285,7 @@ class TaskEvaluatorBase:
316
285
  )
317
286
  except Exception:
318
287
  logger.error("Exception in pre_process", exc_info=True)
319
- await self.deadletter_or_complete_task(
320
- "ProcessingError", detail="Pre-processing exception"
321
- )
288
+ await self.deadletter_or_complete_task("ProcessingError", detail="Pre-processing exception")
322
289
  return TaskResult(
323
290
  task_id=self.task.task_id,
324
291
  graph_id=self.task.graph_id,
@@ -349,7 +316,5 @@ class TaskEvaluatorBase:
349
316
  return False
350
317
 
351
318
  if not self.function_registry.get(self.task.function_name):
352
- raise exc.BoilermakerUnregisteredFunction(
353
- f"Missing registered function {self.task.function_name}"
354
- )
319
+ raise exc.BoilermakerUnregisteredFunction(f"Missing registered function {self.task.function_name}")
355
320
  return True
@@ -44,9 +44,7 @@ async def eval_task(
44
44
  logger.info(f"[{task.function_name}] Begin Task {task.sequence_number=}")
45
45
 
46
46
  if function is None:
47
- raise BoilermakerUnregisteredFunction(
48
- f"Function {task.function_name} not found in registry"
49
- )
47
+ raise BoilermakerUnregisteredFunction(f"Function {task.function_name} not found in registry")
50
48
 
51
49
  try:
52
50
  result = await function(
@@ -77,8 +75,7 @@ async def eval_task(
77
75
  status=TaskStatus.Success,
78
76
  )
79
77
  logger.info(
80
- f"[{task.function_name}] Completed Task {task.sequence_number=} "
81
- f"in {time.monotonic() - start:.3f}s"
78
+ f"[{task.function_name}] Completed Task {task.sequence_number=} in {time.monotonic() - start:.3f}s"
82
79
  )
83
80
  except RetryException as retry:
84
81
  # A retry has been requested:
@@ -62,9 +62,7 @@ class ResultsStorageTaskEvaluator(TaskEvaluatorBase):
62
62
  await self.complete_message()
63
63
  message_settled = True
64
64
  except exc.BoilermakerTaskLeaseLost:
65
- logger.error(
66
- f"Lost message lease when trying to complete early for task {self.task.function_name}"
67
- )
65
+ logger.error(f"Lost message lease when trying to complete early for task {self.task.function_name}")
68
66
  return TaskResult(
69
67
  task_id=self.task.task_id,
70
68
  graph_id=self.task.graph_id,
@@ -89,8 +87,7 @@ class ResultsStorageTaskEvaluator(TaskEvaluatorBase):
89
87
  message_settled = True
90
88
  except exc.BoilermakerTaskLeaseLost:
91
89
  logger.error(
92
- f"Lost message lease when trying to deadletter/complete "
93
- f" for task {self.task.function_name}"
90
+ f"Lost message lease when trying to deadletter/complete for task {self.task.function_name}"
94
91
  )
95
92
  return TaskResult(
96
93
  task_id=self.task.task_id,
@@ -45,9 +45,7 @@ class NoStorageEvaluator(TaskEvaluatorBase):
45
45
  await self.complete_message()
46
46
  message_settled = True
47
47
  except exc.BoilermakerTaskLeaseLost:
48
- logger.error(
49
- f"Lost message lease when trying to complete early for task {self.task.function_name}"
50
- )
48
+ logger.error(f"Lost message lease when trying to complete early for task {self.task.function_name}")
51
49
  return TaskResult(
52
50
  task_id=self.task.task_id,
53
51
  graph_id=self.task.graph_id,
@@ -71,8 +69,7 @@ class NoStorageEvaluator(TaskEvaluatorBase):
71
69
  message_settled = True
72
70
  except exc.BoilermakerTaskLeaseLost:
73
71
  logger.error(
74
- f"Lost message lease when trying to deadletter/complete "
75
- f" for task {self.task.function_name}"
72
+ f"Lost message lease when trying to deadletter/complete for task {self.task.function_name}"
76
73
  )
77
74
  return TaskResult(
78
75
  task_id=self.task.task_id,
@@ -45,6 +45,14 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
45
45
  if storage_interface is None:
46
46
  raise ValueError("Storage interface is required for TaskGraphEvaluator")
47
47
 
48
+ if task.acks_early:
49
+ logger.warning(
50
+ f"Task {task.task_id} ({task.function_name}) uses acks_early=True in a "
51
+ "TaskGraph context. If the worker crashes after message settlement but before "
52
+ "the task result is written, this task will be permanently stuck in Started "
53
+ "status with no recovery path. Use acks_late=True (the default) for graph tasks."
54
+ )
55
+
48
56
  super().__init__(
49
57
  receiver,
50
58
  task,
@@ -61,6 +69,51 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
61
69
  """Individual message handler"""
62
70
  message_settled = False
63
71
 
72
+ # Idempotent redelivery guard — if this task already reached a terminal state
73
+ # (e.g. a prior execution succeeded before the SB lock expired and redelivered), skip
74
+ # re-execution entirely. Writing Started on top of Success/Failure would regress the
75
+ # blob status and corrupt graph state.
76
+ if self.task.graph_id:
77
+ try:
78
+ _existing = await self.storage_interface.load_task_result(self.task.task_id, self.task.graph_id)
79
+ except exc.BoilermakerStorageError:
80
+ # Transient read failure — proceed normally; do NOT skip execution on a read
81
+ # error, as that would permanently stall the graph.
82
+ logger.warning(
83
+ f"Failed to read current status for task {self.task.task_id} before "
84
+ "writing Started; proceeding with execution",
85
+ exc_info=True,
86
+ )
87
+ _existing = None
88
+
89
+ if _existing is not None and _existing.status.finished:
90
+ logger.info(
91
+ f"Task {self.task.task_id} already in terminal state {_existing.status!r} "
92
+ "(SB redelivery); skipping re-execution"
93
+ )
94
+ _terminal_result = TaskResult(
95
+ task_id=self.task.task_id,
96
+ graph_id=self.task.graph_id,
97
+ status=_existing.status,
98
+ )
99
+ try:
100
+ await self.continue_graph(_terminal_result)
101
+ except exc.ContinueGraphError:
102
+ logger.error(
103
+ f"continue_graph failed on redelivery for task {self.task.task_id}; "
104
+ "suppressing settlement to allow redelivery",
105
+ exc_info=True,
106
+ )
107
+ return _terminal_result
108
+ try:
109
+ await self.complete_message()
110
+ except (exc.BoilermakerTaskLeaseLost, exc.BoilermakerServiceBusError):
111
+ logger.warning(
112
+ f"Failed to complete message on redelivery for task {self.task.task_id}; SB will redeliver",
113
+ exc_info=True,
114
+ )
115
+ return _terminal_result
116
+
64
117
  start_result = TaskResult(
65
118
  task_id=self.task.task_id,
66
119
  graph_id=self.task.graph_id,
@@ -78,9 +131,7 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
78
131
  await self.complete_message()
79
132
  message_settled = True
80
133
  except exc.BoilermakerTaskLeaseLost:
81
- logger.error(
82
- f"Lost message lease when trying to complete early for task {self.task.function_name}"
83
- )
134
+ logger.error(f"Lost message lease when trying to complete early for task {self.task.function_name}")
84
135
  return TaskResult(
85
136
  task_id=self.task.task_id,
86
137
  graph_id=self.task.graph_id,
@@ -104,8 +155,7 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
104
155
  message_settled = True
105
156
  except exc.BoilermakerTaskLeaseLost:
106
157
  logger.error(
107
- f"Lost message lease when trying to deadletter/complete "
108
- f" for task {self.task.function_name}"
158
+ f"Lost message lease when trying to deadletter/complete for task {self.task.function_name}"
109
159
  )
110
160
  return TaskResult(
111
161
  task_id=self.task.task_id,
@@ -266,8 +316,7 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
266
316
  exc_info=True,
267
317
  )
268
318
  raise exc.ContinueGraphError(
269
- f"load_graph failed for graph {graph_id} after "
270
- f"{_LOAD_GRAPH_RETRY_POLICY.max_tries} attempts"
319
+ f"load_graph failed for graph {graph_id} after {_LOAD_GRAPH_RETRY_POLICY.max_tries} attempts"
271
320
  ) from last_exc
272
321
  else:
273
322
  # Should only be reached if max_tries == 0 (not expected).
@@ -289,9 +338,12 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
289
338
  logger.error(
290
339
  f"Task status mismatch in continue_graph for graph {graph_id}: "
291
340
  f"expected {completed_task_result.task_id} to be {completed_task_result.status}, "
292
- f"but got {loaded_task_status}"
341
+ f"but got {loaded_task_status}. Suppressing settlement to allow redelivery."
342
+ )
343
+ raise exc.ContinueGraphError(
344
+ f"Status mismatch for task {completed_task_result.task_id} in graph {graph_id}: "
345
+ f"expected {completed_task_result.status}, got {loaded_task_status}"
293
346
  )
294
- return None
295
347
 
296
348
  # Snapshot tasks already in Scheduled status BEFORE the first pass.
297
349
  # The second pass uses this snapshot so that tasks freshly scheduled
@@ -322,14 +374,19 @@ class TaskGraphEvaluator(TaskEvaluatorBase):
322
374
  )
323
375
  continue
324
376
 
325
- ready_count += 1
326
- await self.publish_task(ready_task)
327
- logger.info(f"Publishing ready task {ready_task.task_id} in graph {graph_id} total={ready_count}")
377
+ try:
378
+ await self.publish_task(ready_task)
379
+ ready_count += 1
380
+ logger.info(f"Publishing ready task {ready_task.task_id} in graph {graph_id} total={ready_count}")
381
+ except Exception:
382
+ logger.error(
383
+ f"Failed to publish ready task {ready_task.task_id} in graph {graph_id}; "
384
+ "task is in Scheduled status in blob and will be recovered by crash-recovery pass on redelivery.",
385
+ exc_info=True,
386
+ )
328
387
 
329
388
  if ready_count == 0:
330
- logger.info(
331
- f"No new tasks ready in graph {graph_id} after task {completed_task_result.task_id}"
332
- )
389
+ logger.info(f"No new tasks ready in graph {graph_id} after task {completed_task_result.task_id}")
333
390
 
334
391
  # Second pass: re-publish tasks that were ALREADY in Scheduled status when the
335
392
  # graph was loaded (crash-recovery).
@@ -33,9 +33,7 @@ class BoilermakerStorageError(Exception):
33
33
  def __getattr__(self, item):
34
34
  if self.details.get(item, None):
35
35
  return self.details[item]
36
- raise AttributeError(
37
- f"BoilermakerStorageError object has no attribute '{item}'"
38
- )
36
+ raise AttributeError(f"BoilermakerStorageError object has no attribute '{item}'")
39
37
 
40
38
 
41
39
  class BoilermakerUnregisteredFunction(ValueError):
@@ -138,7 +138,7 @@ class RetryPolicy(BaseModel):
138
138
  case RetryMode.Fixed:
139
139
  return min(self.delay, self.delay_max)
140
140
  case RetryMode.Linear:
141
- return min(self.delay * attempts_so_far, self.delay_max)
141
+ return min(self.delay * (attempts_so_far + 1), self.delay_max)
142
142
  case RetryMode.Exponential:
143
143
  # Jitter is added so that a lot of work doesn't get bunched up at the
144
144
  # end and eventually hurt throughput.
@@ -1,7 +1,7 @@
1
1
  import logging
2
2
  from abc import ABC, abstractmethod
3
3
 
4
- from boilermaker.task import GraphId, TaskGraph, TaskResult, TaskResultSlim
4
+ from boilermaker.task import GraphId, TaskGraph, TaskId, TaskResult, TaskResultSlim
5
5
 
6
6
  logger = logging.getLogger(__name__)
7
7
 
@@ -40,3 +40,22 @@ class StorageInterface(ABC):
40
40
  task_result: The TaskResult instance to storage.
41
41
  """
42
42
  raise NotImplementedError
43
+
44
+ @abstractmethod
45
+ async def load_task_result(self, task_id: TaskId, graph_id: GraphId) -> TaskResultSlim | None:
46
+ """Load a single task result from storage.
47
+
48
+ Used by the idempotent redelivery guard in TaskGraphEvaluator to check whether
49
+ a task has already reached a terminal state before writing Started on redelivery.
50
+
51
+ Args:
52
+ task_id: The TaskId of the task result to load.
53
+ graph_id: The GraphId the task belongs to.
54
+
55
+ Returns:
56
+ The TaskResultSlim instance, or None if not found.
57
+
58
+ Raises:
59
+ BoilermakerStorageError: If the result cannot be loaded for reasons other than not found.
60
+ """
61
+ raise NotImplementedError
@@ -16,7 +16,7 @@ from pydantic import ValidationError
16
16
 
17
17
  from boilermaker.exc import BoilermakerStorageError
18
18
  from boilermaker.storage import StorageInterface
19
- from boilermaker.task import GraphId, TaskGraph, TaskResult, TaskResultSlim
19
+ from boilermaker.task import GraphId, TaskGraph, TaskId, TaskResult, TaskResultSlim
20
20
 
21
21
  logger = logging.getLogger(__name__)
22
22
 
@@ -89,7 +89,10 @@ class BlobClientStorage(AzureBlobStorageClient, StorageInterface):
89
89
  except ValidationError as e:
90
90
  raise BoilermakerStorageError(
91
91
  f"Failed to deserialize task result in graph {graph_id}: {e}",
92
+ name=blob.name,
93
+ graph_id=graph_id,
92
94
  status_code=None,
95
+ reason="DeserializationError",
93
96
  ) from e
94
97
  tr.etag = blob.etag
95
98
  if tr.graph_id == graph_id:
@@ -161,6 +164,50 @@ class BlobClientStorage(AzureBlobStorageClient, StorageInterface):
161
164
  await lease.release()
162
165
  return graph
163
166
 
167
+ async def load_task_result(self, task_id: TaskId, graph_id: GraphId) -> TaskResultSlim | None:
168
+ """Load a single task result from Azure Blob Storage.
169
+
170
+ Returns None if the blob does not exist (404). Raises BoilermakerStorageError
171
+ for any other failure. Used by the idempotent redelivery guard in
172
+ TaskGraphEvaluator to check terminal status before writing Started.
173
+
174
+ Args:
175
+ task_id: The TaskId of the task result to load.
176
+ graph_id: The GraphId the task belongs to.
177
+ """
178
+ fname = f"{self.task_result_prefix}/{graph_id}/{task_id}.json"
179
+ blob_etag = None
180
+ try:
181
+ # We need to make sure we load the etag
182
+ async with self.get_blob_client(fname) as blob_client:
183
+ blob_properties = await blob_client.get_blob_properties()
184
+ blob_etag = blob_properties.etag if blob_properties and blob_properties.etag is not None else None
185
+ contents = await self.download_blob(fname)
186
+ except AzureBlobError as exc:
187
+ if exc.status_code == 404:
188
+ return None
189
+ raise BoilermakerStorageError(
190
+ f"Failed to load task result {task_id}",
191
+ task_id=task_id,
192
+ graph_id=graph_id,
193
+ status_code=exc.status_code,
194
+ reason=exc.reason,
195
+ ) from exc
196
+ if contents is None:
197
+ return None
198
+ try:
199
+ result = TaskResultSlim.model_validate_json(contents)
200
+ result.etag = blob_etag
201
+ return result
202
+ except ValidationError as e:
203
+ raise BoilermakerStorageError(
204
+ f"Failed to deserialize task result {task_id}: {e}",
205
+ task_id=task_id,
206
+ graph_id=graph_id,
207
+ status_code=None,
208
+ reason="DeserializationError",
209
+ ) from e
210
+
164
211
  async def store_task_result(self, task_result: TaskResult | TaskResultSlim, etag: str | None = None) -> None:
165
212
  """Stores a TaskResult to Azure Blob Storage.
166
213
 
@@ -1,4 +1,5 @@
1
1
  import itertools
2
+ import logging
2
3
  import typing
3
4
  from collections import defaultdict
4
5
  from collections.abc import Generator
@@ -10,6 +11,8 @@ from .result import TaskResult, TaskResultSlim, TaskStatus
10
11
  from .task import Task
11
12
  from .task_id import GraphId, ident_field, TaskId
12
13
 
14
+ logger = logging.getLogger("boilermaker.app")
15
+
13
16
 
14
17
  class TaskGraph(BaseModel):
15
18
  """
@@ -171,7 +174,9 @@ class TaskGraph(BaseModel):
171
174
  if not self.edges[parent_id]: # Remove empty set
172
175
  del self.edges[parent_id]
173
176
  del self.children[task.task_id]
174
- raise ValueError(f"Adding task {task.task_id} with parent {parent_id} would create a cycle in the DAG")
177
+ raise ValueError(
178
+ f"Adding task {task.task_id} with parents {parent_ids} would create a cycle in the DAG"
179
+ )
175
180
 
176
181
  # If we leave `on_success` and `on_failure` it's potentially confusing for both callers
177
182
  # and our own evaluation. It also has the potential to create cycles inadvertently, so we
@@ -294,12 +299,14 @@ class TaskGraph(BaseModel):
294
299
  def generate_ready_tasks(self) -> Generator[Task]:
295
300
  """Get a list of tasks that are ready to be executed (not started and all antecedents succeeded)."""
296
301
  for task_id in self.children.keys():
297
- # Task is ready if:
298
- # 1. It has no result yet (never started) OR it has Pending status
299
- # 2. All its antecedents have succeeded
300
302
  task_result = self.results.get(task_id)
301
- is_not_started = task_result is None or task_result.status == TaskStatus.Pending
302
- if is_not_started and self.task_is_ready(task_id):
303
+ if task_result is None:
304
+ logger.warning(
305
+ f"Task {task_id} has no result blob in graph {self.graph_id}; "
306
+ "skipping. This may indicate a partial store_graph failure."
307
+ )
308
+ continue
309
+ if task_result.status == TaskStatus.Pending and self.task_is_ready(task_id):
303
310
  yield self.children[task_id]
304
311
 
305
312
  def generate_failure_ready_tasks(self) -> Generator[Task]:
@@ -19,7 +19,6 @@ class TaskStatus(enum.StrEnum):
19
19
  RetriesExhausted = "retries_exhausted"
20
20
  Deadlettered = "deadlettered"
21
21
 
22
-
23
22
  @classmethod
24
23
  def default(cls) -> "TaskStatus":
25
24
  """Get the default task status.
@@ -44,9 +44,7 @@ async def start_span_from_parent_event_async(
44
44
  """
45
45
  if otel_enabled:
46
46
  tracectx = extract(get_traceparent_context(event))
47
- with tracer.start_as_current_span(
48
- name, context=tracectx, kind=trace.SpanKind.CONSUMER
49
- ) as current_span:
47
+ with tracer.start_as_current_span(name, context=tracectx, kind=trace.SpanKind.CONSUMER) as current_span:
50
48
  yield current_span
51
49
  else:
52
50
  yield None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: boilermaker-servicebus
3
- Version: 1.0.0.dev4
3
+ Version: 1.0.0.dev5
4
4
  Summary: An async python Background task system using Azure Service Bus Queues
5
5
  Author-email: Erik Aker <eaker@mulliganfunding.com>
6
6
  License: Apache License