loopgraph 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. {loopgraph-0.2.0 → loopgraph-0.3.0}/PKG-INFO +25 -1
  2. {loopgraph-0.2.0 → loopgraph-0.3.0}/README.md +24 -0
  3. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/graph.py +36 -10
  4. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/state.py +8 -1
  5. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/scheduler/scheduler.py +198 -18
  6. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/PKG-INFO +25 -1
  7. {loopgraph-0.2.0 → loopgraph-0.3.0}/pyproject.toml +1 -1
  8. {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_eventbus.py +1 -0
  9. {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_integration_workflows.py +446 -5
  10. loopgraph-0.3.0/tests/test_scheduler_recovery.py +256 -0
  11. loopgraph-0.2.0/tests/test_scheduler_recovery.py +0 -75
  12. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/__init__.py +0 -0
  13. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/_debug.py +0 -0
  14. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/bus/__init__.py +0 -0
  15. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/bus/eventbus.py +0 -0
  16. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/concurrency/__init__.py +0 -0
  17. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/concurrency/policies.py +0 -0
  18. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/__init__.py +0 -0
  19. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/types.py +0 -0
  20. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/diagnostics/__init__.py +0 -0
  21. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/diagnostics/inspect.py +0 -0
  22. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/persistence/__init__.py +0 -0
  23. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/persistence/event_log.py +0 -0
  24. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/persistence/snapshot.py +0 -0
  25. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/py.typed +0 -0
  26. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/registry/__init__.py +0 -0
  27. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/registry/function_registry.py +0 -0
  28. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/scheduler/__init__.py +0 -0
  29. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/SOURCES.txt +0 -0
  30. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/dependency_links.txt +0 -0
  31. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/requires.txt +0 -0
  32. {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/top_level.txt +0 -0
  33. {loopgraph-0.2.0 → loopgraph-0.3.0}/setup.cfg +0 -0
  34. {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_doctests.py +0 -0
  35. {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_priority_concurrency.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgraph
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Event-driven graph workflow engine with native loop support.
5
5
  Author: LoopGraph Team
6
6
  License: MIT
@@ -131,6 +131,30 @@ registry.register("my_node", make_handler(bus, my_agent))
131
131
  - Reset clears upstream-completion tracking and preserves cumulative `visit_count`
132
132
  - Overlapping loops sharing any node are rejected at graph construction time
133
133
 
134
+ ## Scheduler Semantics
135
+
136
+ - The scheduler seeds its internal pending set from graph entry nodes only. A
137
+ node enters pending later only when an upstream edge actually activates it.
138
+ - Unselected `SWITCH` branches never enter pending, so leaf branches that were
139
+ not chosen cannot deadlock the workflow.
140
+ - A graph with nodes but no entry nodes now fails fast with `ValueError`
141
+ instead of entering a deadlocked run loop.
142
+ - If a `SWITCH` returns a route that matches no downstream edge and no
143
+ `exit` fallback edge exists, the scheduler raises `ValueError`.
144
+ - `NodeKind.TERMINAL` keeps the same runtime scheduling semantics as `TASK`.
145
+
146
+ ## Recovery Boundaries
147
+
148
+ - Persisted scheduler snapshots now include `snapshot_format_version`.
149
+ - Resume is supported only for snapshots with the current supported snapshot
150
+ format version.
151
+ - If a snapshot is missing `snapshot_format_version` or carries an unsupported
152
+ version, resume fails fast with a `ValueError` that reports the actual
153
+ version, the supported version, and discard-or-migrate guidance.
154
+ - On resume, pending is rebuilt from uncompleted entry nodes plus nodes already
155
+ persisted as `PENDING` or `RUNNING`. Persisted `RUNNING` nodes are reset to
156
+ `PENDING` before scheduling.
157
+
134
158
  ---
135
159
 
136
160
  ## Installation
@@ -116,6 +116,30 @@ registry.register("my_node", make_handler(bus, my_agent))
116
116
  - Reset clears upstream-completion tracking and preserves cumulative `visit_count`
117
117
  - Overlapping loops sharing any node are rejected at graph construction time
118
118
 
119
+ ## Scheduler Semantics
120
+
121
+ - The scheduler seeds its internal pending set from graph entry nodes only. A
122
+ node enters pending later only when an upstream edge actually activates it.
123
+ - Unselected `SWITCH` branches never enter pending, so leaf branches that were
124
+ not chosen cannot deadlock the workflow.
125
+ - A graph with nodes but no entry nodes now fails fast with `ValueError`
126
+ instead of entering a deadlocked run loop.
127
+ - If a `SWITCH` returns a route that matches no downstream edge and no
128
+ `exit` fallback edge exists, the scheduler raises `ValueError`.
129
+ - `NodeKind.TERMINAL` keeps the same runtime scheduling semantics as `TASK`.
130
+
131
+ ## Recovery Boundaries
132
+
133
+ - Persisted scheduler snapshots now include `snapshot_format_version`.
134
+ - Resume is supported only for snapshots with the current supported snapshot
135
+ format version.
136
+ - If a snapshot is missing `snapshot_format_version` or carries an unsupported
137
+ version, resume fails fast with a `ValueError` that reports the actual
138
+ version, the supported version, and discard-or-migrate guidance.
139
+ - On resume, pending is rebuilt from uncompleted entry nodes plus nodes already
140
+ persisted as `PENDING` or `RUNNING`. Persisted `RUNNING` nodes are reset to
141
+ `PENDING` before scheduling.
142
+
119
143
  ---
120
144
 
121
145
  ## Installation
@@ -168,9 +168,7 @@ class Graph:
168
168
  f"Aggregate node '{node.id}' requires 'required' > 0"
169
169
  )
170
170
  upstream_count = len(self.upstream_nodes(node.id))
171
- log_variable_change(
172
- func_name, "upstream_count", upstream_count
173
- )
171
+ log_variable_change(func_name, "upstream_count", upstream_count)
174
172
  if required_raw > upstream_count:
175
173
  log_branch(func_name, "aggregate_required_exceeds_upstream")
176
174
  raise ValueError(
@@ -200,18 +198,14 @@ class Graph:
200
198
  log_branch(func_name, "shared_node_multi_loop_error")
201
199
  shared_list = sorted(shared_nodes)
202
200
  log_variable_change(func_name, "shared_list", shared_list)
203
- raise ValueError(
204
- f"Graph has multi-loop shared nodes: {shared_list}"
205
- )
201
+ raise ValueError(f"Graph has multi-loop shared nodes: {shared_list}")
206
202
  log_branch(func_name, "cycle_validation_passed")
207
203
 
208
204
  @staticmethod
209
205
  def _canonical_cycle(cycle: List[str]) -> Tuple[str, ...]:
210
206
  """Normalize a cycle so equivalent rotations share one representation."""
211
207
  cycle_len = len(cycle)
212
- rotations = [
213
- tuple(cycle[index:] + cycle[:index]) for index in range(cycle_len)
214
- ]
208
+ rotations = [tuple(cycle[index:] + cycle[:index]) for index in range(cycle_len)]
215
209
  return min(rotations)
216
210
 
217
211
  def _find_cycles(self) -> List[Tuple[str, ...]]:
@@ -327,6 +321,36 @@ class Graph:
327
321
  log_variable_change(func_name, "sources", sources)
328
322
  return sources
329
323
 
324
+ def entry_nodes(self) -> List[Node]:
325
+ """Return nodes with no upstream edges.
326
+
327
+ >>> graph = Graph(
328
+ ... nodes={
329
+ ... "start": Node(id="start", kind=NodeKind.TASK, handler="start"),
330
+ ... "middle": Node(id="middle", kind=NodeKind.TASK, handler="middle"),
331
+ ... "lonely": Node(id="lonely", kind=NodeKind.TASK, handler="lonely"),
332
+ ... },
333
+ ... edges={"e": Edge(id="e", source="start", target="middle")},
334
+ ... )
335
+ >>> [node.id for node in graph.entry_nodes()]
336
+ ['start', 'lonely']
337
+ """
338
+ func_name = "Graph.entry_nodes"
339
+ log_parameter(func_name)
340
+ entries: List[Node] = []
341
+ log_variable_change(func_name, "entries", entries)
342
+ for iteration, node_id in enumerate(self.nodes):
343
+ log_loop_iteration(func_name, "nodes", iteration)
344
+ reverse_edges = self._reverse_adj.get(node_id, [])
345
+ log_variable_change(func_name, "reverse_edges", reverse_edges)
346
+ if reverse_edges:
347
+ log_branch(func_name, "has_upstream")
348
+ continue
349
+ log_branch(func_name, "entry_node")
350
+ entries.append(self.nodes[node_id])
351
+ log_variable_change(func_name, "entries", list(entries))
352
+ return entries
353
+
330
354
  def to_dict(self) -> Dict[str, object]:
331
355
  """Serialize the graph to a dictionary."""
332
356
  func_name = "Graph.to_dict"
@@ -359,7 +383,9 @@ class Graph:
359
383
  return payload
360
384
 
361
385
  @classmethod
362
- def from_dict(cls, payload: Mapping[str, Iterable[Mapping[str, object]]]) -> "Graph":
386
+ def from_dict(
387
+ cls, payload: Mapping[str, Iterable[Mapping[str, object]]]
388
+ ) -> "Graph":
363
389
  """Deserialize a graph from a dictionary payload."""
364
390
  func_name = "Graph.from_dict"
365
391
  log_parameter(func_name, payload=payload)
@@ -14,6 +14,8 @@ from .._debug import (
14
14
  from .graph import Graph
15
15
  from .types import NodeKind, NodeStatus, VisitOutcome
16
16
 
17
+ SNAPSHOT_FORMAT_VERSION = 2
18
+
17
19
 
18
20
  @dataclass
19
21
  class NodeVisit:
@@ -390,7 +392,11 @@ class ExecutionState:
390
392
  log_variable_change(func_name, "state_after", state)
391
393
 
392
394
  def snapshot(self) -> Dict[str, Any]:
393
- """Produce a JSON-serializable snapshot of execution state."""
395
+ """Produce a JSON-serializable snapshot of execution state.
396
+
397
+ >>> ExecutionState().snapshot()["snapshot_format_version"]
398
+ 2
399
+ """
394
400
  func_name = "ExecutionState.snapshot"
395
401
  log_parameter(func_name)
396
402
  states_payload: Dict[str, Dict[str, Any]] = {}
@@ -402,6 +408,7 @@ class ExecutionState:
402
408
  func_name, f"states_payload[{node_id!r}]", states_payload[node_id]
403
409
  )
404
410
  payload = {
411
+ "snapshot_format_version": SNAPSHOT_FORMAT_VERSION,
405
412
  "states": states_payload,
406
413
  "completed_nodes": sorted(self._completed_nodes),
407
414
  }
@@ -13,11 +13,13 @@ from .._debug import (
13
13
  from ..bus.eventbus import Event, EventBus
14
14
  from ..concurrency import ConcurrencyManager, SemaphorePolicy
15
15
  from ..core.graph import Edge, Graph, Node
16
- from ..core.state import ExecutionState
16
+ from ..core.state import SNAPSHOT_FORMAT_VERSION, ExecutionState
17
17
  from ..core.types import EventType, NodeKind, NodeStatus, VisitOutcome
18
18
  from ..persistence import EventLog, SnapshotStore
19
19
  from ..registry.function_registry import FunctionRegistry
20
20
 
21
+ SUPPORTED_SNAPSHOT_FORMAT_VERSION = SNAPSHOT_FORMAT_VERSION
22
+
21
23
 
22
24
  class Scheduler:
23
25
  """Execute graphs by dispatching node handlers.
@@ -179,15 +181,28 @@ class Scheduler:
179
181
  graph_id=graph_id,
180
182
  initial_payload=initial_payload,
181
183
  )
182
- execution_state = self._load_or_create_state(graph_id)
184
+ execution_state, resumed_from_snapshot = self._load_or_create_state(graph_id)
183
185
  log_variable_change(func_name, "execution_state", execution_state)
186
+ log_variable_change(func_name, "resumed_from_snapshot", resumed_from_snapshot)
184
187
  snapshot_data = execution_state.snapshot()
185
188
  log_variable_change(func_name, "snapshot_data", snapshot_data)
186
189
  results = self._initial_results_from_snapshot(snapshot_data)
187
190
  log_variable_change(func_name, "results", results)
188
191
  completed_nodes = set(snapshot_data["completed_nodes"])
189
192
  log_variable_change(func_name, "completed_nodes", completed_nodes)
190
- pending = {node_id for node_id in graph.nodes if node_id not in completed_nodes}
193
+ if resumed_from_snapshot:
194
+ log_branch(func_name, "supported_snapshot_resume")
195
+ pending = self._seed_pending_from_supported_snapshot(
196
+ graph=graph,
197
+ snapshot_data=snapshot_data,
198
+ completed_nodes=completed_nodes,
199
+ )
200
+ else:
201
+ log_branch(func_name, "fresh_run")
202
+ pending = self._seed_pending_from_entry_nodes(
203
+ graph=graph,
204
+ completed_nodes=completed_nodes,
205
+ )
191
206
  log_variable_change(func_name, "pending", pending)
192
207
  loop_iteration = 0
193
208
  while pending:
@@ -225,7 +240,11 @@ class Scheduler:
225
240
  status=NodeStatus.RUNNING,
226
241
  )
227
242
  )
228
- handler_result, reentry_targets = await self._execute_node(
243
+ (
244
+ handler_result,
245
+ reentry_targets,
246
+ activated_targets,
247
+ ) = await self._execute_node(
229
248
  node=node,
230
249
  graph=graph,
231
250
  execution_state=execution_state,
@@ -234,12 +253,34 @@ class Scheduler:
234
253
  )
235
254
  log_variable_change(func_name, "handler_result", handler_result)
236
255
  log_variable_change(func_name, "reentry_targets", reentry_targets)
256
+ log_variable_change(func_name, "activated_targets", activated_targets)
237
257
  results[node_id] = handler_result
238
258
  log_variable_change(func_name, "results", results)
259
+ completed_nodes.add(node_id)
260
+ log_variable_change(func_name, "completed_nodes", completed_nodes)
239
261
  pending.remove(node_id)
240
262
  log_variable_change(func_name, "pending", pending)
263
+ for (
264
+ activated_iteration,
265
+ activated_target,
266
+ ) in enumerate(activated_targets):
267
+ log_loop_iteration(
268
+ func_name, "activated_targets", activated_iteration
269
+ )
270
+ if activated_target in completed_nodes:
271
+ log_branch(func_name, "activated_target_completed")
272
+ continue
273
+ log_branch(func_name, "activated_target_pending")
274
+ pending.add(activated_target)
275
+ log_variable_change(
276
+ func_name,
277
+ f"pending_with_activated_{activated_target}",
278
+ pending,
279
+ )
241
280
  for reentry_iteration, reentry_target in enumerate(reentry_targets):
242
281
  log_loop_iteration(func_name, "reentry_targets", reentry_iteration)
282
+ completed_nodes.discard(reentry_target)
283
+ log_variable_change(func_name, "completed_nodes", completed_nodes)
243
284
  pending.add(reentry_target)
244
285
  log_variable_change(
245
286
  func_name,
@@ -270,7 +311,7 @@ class Scheduler:
270
311
  execution_state: ExecutionState,
271
312
  graph_id: str,
272
313
  upstream_payload: Optional[Any],
273
- ) -> Tuple[Any, List[str]]:
314
+ ) -> Tuple[Any, List[str], List[str]]:
274
315
  """Execute a single node handler."""
275
316
  func_name = "Scheduler._execute_node"
276
317
  log_parameter(
@@ -329,6 +370,8 @@ class Scheduler:
329
370
  log_variable_change(func_name, "selected_edges", selected_edges)
330
371
  reentry_targets: List[str] = []
331
372
  log_variable_change(func_name, "reentry_targets", reentry_targets)
373
+ activated_targets: List[str] = []
374
+ log_variable_change(func_name, "activated_targets", activated_targets)
332
375
  for iteration, edge in enumerate(selected_edges):
333
376
  log_loop_iteration(func_name, "downstream_edges", iteration)
334
377
  log_variable_change(func_name, "edge", edge)
@@ -364,6 +407,12 @@ class Scheduler:
364
407
  log_branch(func_name, "reentry_failed_skip")
365
408
  elif downstream_visits == 0 and downstream_status is NodeStatus.PENDING:
366
409
  log_branch(func_name, "initial_pending_target")
410
+ activated_targets.append(downstream.id)
411
+ log_variable_change(
412
+ func_name,
413
+ "activated_targets",
414
+ list(activated_targets),
415
+ )
367
416
  else:
368
417
  log_branch(func_name, "reentry_non_terminal_error")
369
418
  raise RuntimeError(
@@ -372,7 +421,7 @@ class Scheduler:
372
421
  )
373
422
  execution_state.note_upstream_completion(downstream.id, node.id)
374
423
  self._persist_snapshot(execution_state, graph_id)
375
- return result, reentry_targets
424
+ return result, reentry_targets, activated_targets
376
425
 
377
426
  def _build_input_payload(
378
427
  self,
@@ -406,9 +455,7 @@ class Scheduler:
406
455
  log_loop_iteration(func_name, "aggregate_upstream", iteration)
407
456
  if upstream.id in results:
408
457
  aggregated.append(results[upstream.id])
409
- log_variable_change(
410
- func_name, "aggregated", list(aggregated)
411
- )
458
+ log_variable_change(func_name, "aggregated", list(aggregated))
412
459
  log_variable_change(func_name, "payload", aggregated)
413
460
  return aggregated
414
461
 
@@ -476,7 +523,9 @@ class Scheduler:
476
523
  log_branch(func_name, "fallback_exit")
477
524
  return exit_edges
478
525
  log_branch(func_name, "no_matching_edge")
479
- return []
526
+ raise ValueError(
527
+ f"Switch node '{node.id}' returned unmatched route {route!r} with no fallback edge"
528
+ )
480
529
 
481
530
  def _has_remaining_visits(
482
531
  self, graph: Graph, execution_state: ExecutionState, node_id: str
@@ -495,7 +544,7 @@ class Scheduler:
495
544
  log_variable_change(func_name, "has_capacity", has_capacity)
496
545
  return has_capacity
497
546
 
498
- def _load_or_create_state(self, graph_id: str) -> ExecutionState:
547
+ def _load_or_create_state(self, graph_id: str) -> Tuple[ExecutionState, bool]:
499
548
  """Retrieve execution state from snapshots if available."""
500
549
 
501
550
  func_name = "Scheduler._load_or_create_state"
@@ -504,20 +553,153 @@ class Scheduler:
504
553
  log_branch(func_name, "no_snapshot_store")
505
554
  state = ExecutionState()
506
555
  log_variable_change(func_name, "state", state)
507
- return state
556
+ return state, False
508
557
 
509
558
  try:
510
559
  snapshot_payload = self._snapshot_store.load(graph_id)
511
560
  except KeyError:
512
561
  log_branch(func_name, "snapshot_missing")
513
562
  state = ExecutionState()
563
+ resumed_from_snapshot = False
514
564
  else:
515
565
  log_branch(func_name, "snapshot_loaded")
516
- state = ExecutionState.restore(dict(snapshot_payload))
566
+ log_variable_change(func_name, "snapshot_payload", snapshot_payload)
567
+ snapshot_dict = dict(snapshot_payload)
568
+ log_variable_change(func_name, "snapshot_dict", snapshot_dict)
569
+ self._validate_snapshot_format(snapshot_dict)
570
+ state = ExecutionState.restore(snapshot_dict)
571
+ self._reset_running_nodes_for_resume(
572
+ execution_state=state,
573
+ snapshot_data=snapshot_dict,
574
+ )
575
+ resumed_from_snapshot = True
517
576
  log_variable_change(func_name, "state", state)
518
- return state
577
+ log_variable_change(func_name, "resumed_from_snapshot", resumed_from_snapshot)
578
+ return state, resumed_from_snapshot
579
+
580
+ def _validate_snapshot_format(self, snapshot_data: Dict[str, Any]) -> None:
581
+ """Reject unsupported snapshot payloads before restore."""
582
+
583
+ func_name = "Scheduler._validate_snapshot_format"
584
+ log_parameter(func_name, snapshot_data=snapshot_data)
585
+ version = snapshot_data.get("snapshot_format_version")
586
+ log_variable_change(func_name, "version", version)
587
+ log_variable_change(
588
+ func_name,
589
+ "supported_version",
590
+ SUPPORTED_SNAPSHOT_FORMAT_VERSION,
591
+ )
592
+ if version == SUPPORTED_SNAPSHOT_FORMAT_VERSION:
593
+ log_branch(func_name, "supported_version")
594
+ return
595
+ log_branch(func_name, "unsupported_version")
596
+ raise ValueError(
597
+ "Unsupported snapshot format version "
598
+ f"{version!r}; supported version is "
599
+ f"{SUPPORTED_SNAPSHOT_FORMAT_VERSION}. "
600
+ "Discard or migrate the snapshot before resuming."
601
+ )
519
602
 
520
- def _initial_results_from_snapshot(self, snapshot: Dict[str, Any]) -> Dict[str, Any]:
603
+ def _reset_running_nodes_for_resume(
604
+ self,
605
+ *,
606
+ execution_state: ExecutionState,
607
+ snapshot_data: Dict[str, Any],
608
+ ) -> None:
609
+ """Reset RUNNING snapshot nodes to PENDING before scheduling."""
610
+
611
+ func_name = "Scheduler._reset_running_nodes_for_resume"
612
+ log_parameter(
613
+ func_name,
614
+ execution_state=execution_state,
615
+ snapshot_data=snapshot_data,
616
+ )
617
+ for iteration, (node_id, node_state) in enumerate(
618
+ snapshot_data.get("states", {}).items()
619
+ ):
620
+ log_loop_iteration(func_name, "states", iteration)
621
+ status_value = node_state.get("status")
622
+ log_variable_change(func_name, "status_value", status_value)
623
+ if status_value != NodeStatus.RUNNING.value:
624
+ log_branch(func_name, "state_not_running")
625
+ continue
626
+ log_branch(func_name, "reset_running_to_pending")
627
+ state = execution_state._ensure_state(node_id)
628
+ log_variable_change(func_name, "state_before", state)
629
+ state.status = NodeStatus.PENDING
630
+ log_variable_change(func_name, "state_after", state)
631
+
632
+ def _seed_pending_from_entry_nodes(
633
+ self,
634
+ *,
635
+ graph: Graph,
636
+ completed_nodes: set[str],
637
+ ) -> set[str]:
638
+ """Seed a fresh run from entry nodes only."""
639
+
640
+ func_name = "Scheduler._seed_pending_from_entry_nodes"
641
+ log_parameter(func_name, graph=graph, completed_nodes=completed_nodes)
642
+ entry_nodes = graph.entry_nodes()
643
+ log_variable_change(func_name, "entry_nodes", entry_nodes)
644
+ entry_ids = {node.id for node in entry_nodes}
645
+ log_variable_change(func_name, "entry_ids", entry_ids)
646
+ if graph.nodes and not entry_ids:
647
+ log_branch(func_name, "no_entry_nodes")
648
+ raise ValueError("Graph has no entry nodes (nodes with no upstream edges)")
649
+ log_branch(func_name, "seed_from_entries")
650
+ pending = {node_id for node_id in entry_ids if node_id not in completed_nodes}
651
+ log_variable_change(func_name, "pending", pending)
652
+ return pending
653
+
654
+ def _seed_pending_from_supported_snapshot(
655
+ self,
656
+ *,
657
+ graph: Graph,
658
+ snapshot_data: Dict[str, Any],
659
+ completed_nodes: set[str],
660
+ ) -> set[str]:
661
+ """Seed a resumed run from supported snapshot state plus entry nodes."""
662
+
663
+ func_name = "Scheduler._seed_pending_from_supported_snapshot"
664
+ log_parameter(
665
+ func_name,
666
+ graph=graph,
667
+ snapshot_data=snapshot_data,
668
+ completed_nodes=completed_nodes,
669
+ )
670
+ pending: set[str] = set()
671
+ log_variable_change(func_name, "pending", pending)
672
+ for iteration, node in enumerate(graph.entry_nodes()):
673
+ log_loop_iteration(func_name, "entry_nodes", iteration)
674
+ if node.id in completed_nodes:
675
+ log_branch(func_name, "entry_completed")
676
+ continue
677
+ log_branch(func_name, "entry_pending")
678
+ pending.add(node.id)
679
+ log_variable_change(func_name, "pending", pending)
680
+ for iteration, (node_id, node_state) in enumerate(
681
+ snapshot_data.get("states", {}).items()
682
+ ):
683
+ log_loop_iteration(func_name, "snapshot_states", iteration)
684
+ status_value = node_state.get("status")
685
+ log_variable_change(func_name, "status_value", status_value)
686
+ if node_id in completed_nodes:
687
+ log_branch(func_name, "snapshot_node_completed")
688
+ continue
689
+ if status_value not in (
690
+ NodeStatus.PENDING.value,
691
+ NodeStatus.RUNNING.value,
692
+ ):
693
+ log_branch(func_name, "snapshot_node_not_activated")
694
+ continue
695
+ log_branch(func_name, "snapshot_node_pending")
696
+ pending.add(node_id)
697
+ log_variable_change(func_name, "pending", pending)
698
+ return pending
699
+
700
+ def _initial_results_from_snapshot(
701
+ self, snapshot: Dict[str, Any]
702
+ ) -> Dict[str, Any]:
521
703
  """Extract node results from a snapshot payload."""
522
704
 
523
705
  func_name = "Scheduler._initial_results_from_snapshot"
@@ -541,9 +723,7 @@ class Scheduler:
541
723
  log_variable_change(func_name, "results_final", results)
542
724
  return results
543
725
 
544
- def _persist_snapshot(
545
- self, execution_state: ExecutionState, graph_id: str
546
- ) -> None:
726
+ def _persist_snapshot(self, execution_state: ExecutionState, graph_id: str) -> None:
547
727
  """Persist execution state snapshot if a store is configured."""
548
728
 
549
729
  func_name = "Scheduler._persist_snapshot"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: loopgraph
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Summary: Event-driven graph workflow engine with native loop support.
5
5
  Author: LoopGraph Team
6
6
  License: MIT
@@ -131,6 +131,30 @@ registry.register("my_node", make_handler(bus, my_agent))
131
131
  - Reset clears upstream-completion tracking and preserves cumulative `visit_count`
132
132
  - Overlapping loops sharing any node are rejected at graph construction time
133
133
 
134
+ ## Scheduler Semantics
135
+
136
+ - The scheduler seeds its internal pending set from graph entry nodes only. A
137
+ node enters pending later only when an upstream edge actually activates it.
138
+ - Unselected `SWITCH` branches never enter pending, so leaf branches that were
139
+ not chosen cannot deadlock the workflow.
140
+ - A graph with nodes but no entry nodes now fails fast with `ValueError`
141
+ instead of entering a deadlocked run loop.
142
+ - If a `SWITCH` returns a route that matches no downstream edge and no
143
+ `exit` fallback edge exists, the scheduler raises `ValueError`.
144
+ - `NodeKind.TERMINAL` keeps the same runtime scheduling semantics as `TASK`.
145
+
146
+ ## Recovery Boundaries
147
+
148
+ - Persisted scheduler snapshots now include `snapshot_format_version`.
149
+ - Resume is supported only for snapshots with the current supported snapshot
150
+ format version.
151
+ - If a snapshot is missing `snapshot_format_version` or carries an unsupported
152
+ version, resume fails fast with a `ValueError` that reports the actual
153
+ version, the supported version, and discard-or-migrate guidance.
154
+ - On resume, pending is rebuilt from uncompleted entry nodes plus nodes already
155
+ persisted as `PENDING` or `RUNNING`. Persisted `RUNNING` nodes are reset to
156
+ `PENDING` before scheduling.
157
+
134
158
  ---
135
159
 
136
160
  ## Installation
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "loopgraph"
7
- version = "0.2.0"
7
+ version = "0.3.0"
8
8
  description = "Event-driven graph workflow engine with native loop support."
9
9
  authors = [
10
10
  { name = "LoopGraph Team" }
@@ -60,6 +60,7 @@ async def test_emit_with_on_error_invokes_handler() -> None:
60
60
  @pytest.mark.asyncio
61
61
  async def test_emit_on_error_can_raise() -> None:
62
62
  """If on_error raises, the exception propagates to caller."""
63
+
63
64
  async def raising_error_handler(exc: Exception, event: Event) -> None:
64
65
  raise RuntimeError(f"critical error handling {exc}")
65
66