loopgraph 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {loopgraph-0.2.0 → loopgraph-0.3.0}/PKG-INFO +25 -1
- {loopgraph-0.2.0 → loopgraph-0.3.0}/README.md +24 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/graph.py +36 -10
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/state.py +8 -1
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/scheduler/scheduler.py +198 -18
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/PKG-INFO +25 -1
- {loopgraph-0.2.0 → loopgraph-0.3.0}/pyproject.toml +1 -1
- {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_eventbus.py +1 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_integration_workflows.py +446 -5
- loopgraph-0.3.0/tests/test_scheduler_recovery.py +256 -0
- loopgraph-0.2.0/tests/test_scheduler_recovery.py +0 -75
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/_debug.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/bus/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/bus/eventbus.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/concurrency/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/concurrency/policies.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/core/types.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/diagnostics/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/diagnostics/inspect.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/persistence/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/persistence/event_log.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/persistence/snapshot.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/py.typed +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/registry/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/registry/function_registry.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph/scheduler/__init__.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/SOURCES.txt +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/dependency_links.txt +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/requires.txt +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/loopgraph.egg-info/top_level.txt +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/setup.cfg +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_doctests.py +0 -0
- {loopgraph-0.2.0 → loopgraph-0.3.0}/tests/test_priority_concurrency.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgraph
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Event-driven graph workflow engine with native loop support.
|
|
5
5
|
Author: LoopGraph Team
|
|
6
6
|
License: MIT
|
|
@@ -131,6 +131,30 @@ registry.register("my_node", make_handler(bus, my_agent))
|
|
|
131
131
|
- Reset clears upstream-completion tracking and preserves cumulative `visit_count`
|
|
132
132
|
- Overlapping loops sharing any node are rejected at graph construction time
|
|
133
133
|
|
|
134
|
+
## Scheduler Semantics
|
|
135
|
+
|
|
136
|
+
- The scheduler seeds its internal pending set from graph entry nodes only. A
|
|
137
|
+
node enters pending later only when an upstream edge actually activates it.
|
|
138
|
+
- Unselected `SWITCH` branches never enter pending, so leaf branches that were
|
|
139
|
+
not chosen cannot deadlock the workflow.
|
|
140
|
+
- A graph with nodes but no entry nodes now fails fast with `ValueError`
|
|
141
|
+
instead of entering a deadlocked run loop.
|
|
142
|
+
- If a `SWITCH` returns a route that matches no downstream edge and no
|
|
143
|
+
`exit` fallback edge exists, the scheduler raises `ValueError`.
|
|
144
|
+
- `NodeKind.TERMINAL` keeps the same runtime scheduling semantics as `TASK`.
|
|
145
|
+
|
|
146
|
+
## Recovery Boundaries
|
|
147
|
+
|
|
148
|
+
- Persisted scheduler snapshots now include `snapshot_format_version`.
|
|
149
|
+
- Resume is supported only for snapshots with the current supported snapshot
|
|
150
|
+
format version.
|
|
151
|
+
- If a snapshot is missing `snapshot_format_version` or carries an unsupported
|
|
152
|
+
version, resume fails fast with a `ValueError` that reports the actual
|
|
153
|
+
version, the supported version, and discard-or-migrate guidance.
|
|
154
|
+
- On resume, pending is rebuilt from uncompleted entry nodes plus nodes already
|
|
155
|
+
persisted as `PENDING` or `RUNNING`. Persisted `RUNNING` nodes are reset to
|
|
156
|
+
`PENDING` before scheduling.
|
|
157
|
+
|
|
134
158
|
---
|
|
135
159
|
|
|
136
160
|
## Installation
|
|
@@ -116,6 +116,30 @@ registry.register("my_node", make_handler(bus, my_agent))
|
|
|
116
116
|
- Reset clears upstream-completion tracking and preserves cumulative `visit_count`
|
|
117
117
|
- Overlapping loops sharing any node are rejected at graph construction time
|
|
118
118
|
|
|
119
|
+
## Scheduler Semantics
|
|
120
|
+
|
|
121
|
+
- The scheduler seeds its internal pending set from graph entry nodes only. A
|
|
122
|
+
node enters pending later only when an upstream edge actually activates it.
|
|
123
|
+
- Unselected `SWITCH` branches never enter pending, so leaf branches that were
|
|
124
|
+
not chosen cannot deadlock the workflow.
|
|
125
|
+
- A graph with nodes but no entry nodes now fails fast with `ValueError`
|
|
126
|
+
instead of entering a deadlocked run loop.
|
|
127
|
+
- If a `SWITCH` returns a route that matches no downstream edge and no
|
|
128
|
+
`exit` fallback edge exists, the scheduler raises `ValueError`.
|
|
129
|
+
- `NodeKind.TERMINAL` keeps the same runtime scheduling semantics as `TASK`.
|
|
130
|
+
|
|
131
|
+
## Recovery Boundaries
|
|
132
|
+
|
|
133
|
+
- Persisted scheduler snapshots now include `snapshot_format_version`.
|
|
134
|
+
- Resume is supported only for snapshots with the current supported snapshot
|
|
135
|
+
format version.
|
|
136
|
+
- If a snapshot is missing `snapshot_format_version` or carries an unsupported
|
|
137
|
+
version, resume fails fast with a `ValueError` that reports the actual
|
|
138
|
+
version, the supported version, and discard-or-migrate guidance.
|
|
139
|
+
- On resume, pending is rebuilt from uncompleted entry nodes plus nodes already
|
|
140
|
+
persisted as `PENDING` or `RUNNING`. Persisted `RUNNING` nodes are reset to
|
|
141
|
+
`PENDING` before scheduling.
|
|
142
|
+
|
|
119
143
|
---
|
|
120
144
|
|
|
121
145
|
## Installation
|
|
@@ -168,9 +168,7 @@ class Graph:
|
|
|
168
168
|
f"Aggregate node '{node.id}' requires 'required' > 0"
|
|
169
169
|
)
|
|
170
170
|
upstream_count = len(self.upstream_nodes(node.id))
|
|
171
|
-
log_variable_change(
|
|
172
|
-
func_name, "upstream_count", upstream_count
|
|
173
|
-
)
|
|
171
|
+
log_variable_change(func_name, "upstream_count", upstream_count)
|
|
174
172
|
if required_raw > upstream_count:
|
|
175
173
|
log_branch(func_name, "aggregate_required_exceeds_upstream")
|
|
176
174
|
raise ValueError(
|
|
@@ -200,18 +198,14 @@ class Graph:
|
|
|
200
198
|
log_branch(func_name, "shared_node_multi_loop_error")
|
|
201
199
|
shared_list = sorted(shared_nodes)
|
|
202
200
|
log_variable_change(func_name, "shared_list", shared_list)
|
|
203
|
-
raise ValueError(
|
|
204
|
-
f"Graph has multi-loop shared nodes: {shared_list}"
|
|
205
|
-
)
|
|
201
|
+
raise ValueError(f"Graph has multi-loop shared nodes: {shared_list}")
|
|
206
202
|
log_branch(func_name, "cycle_validation_passed")
|
|
207
203
|
|
|
208
204
|
@staticmethod
|
|
209
205
|
def _canonical_cycle(cycle: List[str]) -> Tuple[str, ...]:
|
|
210
206
|
"""Normalize a cycle so equivalent rotations share one representation."""
|
|
211
207
|
cycle_len = len(cycle)
|
|
212
|
-
rotations = [
|
|
213
|
-
tuple(cycle[index:] + cycle[:index]) for index in range(cycle_len)
|
|
214
|
-
]
|
|
208
|
+
rotations = [tuple(cycle[index:] + cycle[:index]) for index in range(cycle_len)]
|
|
215
209
|
return min(rotations)
|
|
216
210
|
|
|
217
211
|
def _find_cycles(self) -> List[Tuple[str, ...]]:
|
|
@@ -327,6 +321,36 @@ class Graph:
|
|
|
327
321
|
log_variable_change(func_name, "sources", sources)
|
|
328
322
|
return sources
|
|
329
323
|
|
|
324
|
+
def entry_nodes(self) -> List[Node]:
|
|
325
|
+
"""Return nodes with no upstream edges.
|
|
326
|
+
|
|
327
|
+
>>> graph = Graph(
|
|
328
|
+
... nodes={
|
|
329
|
+
... "start": Node(id="start", kind=NodeKind.TASK, handler="start"),
|
|
330
|
+
... "middle": Node(id="middle", kind=NodeKind.TASK, handler="middle"),
|
|
331
|
+
... "lonely": Node(id="lonely", kind=NodeKind.TASK, handler="lonely"),
|
|
332
|
+
... },
|
|
333
|
+
... edges={"e": Edge(id="e", source="start", target="middle")},
|
|
334
|
+
... )
|
|
335
|
+
>>> [node.id for node in graph.entry_nodes()]
|
|
336
|
+
['start', 'lonely']
|
|
337
|
+
"""
|
|
338
|
+
func_name = "Graph.entry_nodes"
|
|
339
|
+
log_parameter(func_name)
|
|
340
|
+
entries: List[Node] = []
|
|
341
|
+
log_variable_change(func_name, "entries", entries)
|
|
342
|
+
for iteration, node_id in enumerate(self.nodes):
|
|
343
|
+
log_loop_iteration(func_name, "nodes", iteration)
|
|
344
|
+
reverse_edges = self._reverse_adj.get(node_id, [])
|
|
345
|
+
log_variable_change(func_name, "reverse_edges", reverse_edges)
|
|
346
|
+
if reverse_edges:
|
|
347
|
+
log_branch(func_name, "has_upstream")
|
|
348
|
+
continue
|
|
349
|
+
log_branch(func_name, "entry_node")
|
|
350
|
+
entries.append(self.nodes[node_id])
|
|
351
|
+
log_variable_change(func_name, "entries", list(entries))
|
|
352
|
+
return entries
|
|
353
|
+
|
|
330
354
|
def to_dict(self) -> Dict[str, object]:
|
|
331
355
|
"""Serialize the graph to a dictionary."""
|
|
332
356
|
func_name = "Graph.to_dict"
|
|
@@ -359,7 +383,9 @@ class Graph:
|
|
|
359
383
|
return payload
|
|
360
384
|
|
|
361
385
|
@classmethod
|
|
362
|
-
def from_dict(
|
|
386
|
+
def from_dict(
|
|
387
|
+
cls, payload: Mapping[str, Iterable[Mapping[str, object]]]
|
|
388
|
+
) -> "Graph":
|
|
363
389
|
"""Deserialize a graph from a dictionary payload."""
|
|
364
390
|
func_name = "Graph.from_dict"
|
|
365
391
|
log_parameter(func_name, payload=payload)
|
|
@@ -14,6 +14,8 @@ from .._debug import (
|
|
|
14
14
|
from .graph import Graph
|
|
15
15
|
from .types import NodeKind, NodeStatus, VisitOutcome
|
|
16
16
|
|
|
17
|
+
SNAPSHOT_FORMAT_VERSION = 2
|
|
18
|
+
|
|
17
19
|
|
|
18
20
|
@dataclass
|
|
19
21
|
class NodeVisit:
|
|
@@ -390,7 +392,11 @@ class ExecutionState:
|
|
|
390
392
|
log_variable_change(func_name, "state_after", state)
|
|
391
393
|
|
|
392
394
|
def snapshot(self) -> Dict[str, Any]:
|
|
393
|
-
"""Produce a JSON-serializable snapshot of execution state.
|
|
395
|
+
"""Produce a JSON-serializable snapshot of execution state.
|
|
396
|
+
|
|
397
|
+
>>> ExecutionState().snapshot()["snapshot_format_version"]
|
|
398
|
+
2
|
|
399
|
+
"""
|
|
394
400
|
func_name = "ExecutionState.snapshot"
|
|
395
401
|
log_parameter(func_name)
|
|
396
402
|
states_payload: Dict[str, Dict[str, Any]] = {}
|
|
@@ -402,6 +408,7 @@ class ExecutionState:
|
|
|
402
408
|
func_name, f"states_payload[{node_id!r}]", states_payload[node_id]
|
|
403
409
|
)
|
|
404
410
|
payload = {
|
|
411
|
+
"snapshot_format_version": SNAPSHOT_FORMAT_VERSION,
|
|
405
412
|
"states": states_payload,
|
|
406
413
|
"completed_nodes": sorted(self._completed_nodes),
|
|
407
414
|
}
|
|
@@ -13,11 +13,13 @@ from .._debug import (
|
|
|
13
13
|
from ..bus.eventbus import Event, EventBus
|
|
14
14
|
from ..concurrency import ConcurrencyManager, SemaphorePolicy
|
|
15
15
|
from ..core.graph import Edge, Graph, Node
|
|
16
|
-
from ..core.state import ExecutionState
|
|
16
|
+
from ..core.state import SNAPSHOT_FORMAT_VERSION, ExecutionState
|
|
17
17
|
from ..core.types import EventType, NodeKind, NodeStatus, VisitOutcome
|
|
18
18
|
from ..persistence import EventLog, SnapshotStore
|
|
19
19
|
from ..registry.function_registry import FunctionRegistry
|
|
20
20
|
|
|
21
|
+
SUPPORTED_SNAPSHOT_FORMAT_VERSION = SNAPSHOT_FORMAT_VERSION
|
|
22
|
+
|
|
21
23
|
|
|
22
24
|
class Scheduler:
|
|
23
25
|
"""Execute graphs by dispatching node handlers.
|
|
@@ -179,15 +181,28 @@ class Scheduler:
|
|
|
179
181
|
graph_id=graph_id,
|
|
180
182
|
initial_payload=initial_payload,
|
|
181
183
|
)
|
|
182
|
-
execution_state = self._load_or_create_state(graph_id)
|
|
184
|
+
execution_state, resumed_from_snapshot = self._load_or_create_state(graph_id)
|
|
183
185
|
log_variable_change(func_name, "execution_state", execution_state)
|
|
186
|
+
log_variable_change(func_name, "resumed_from_snapshot", resumed_from_snapshot)
|
|
184
187
|
snapshot_data = execution_state.snapshot()
|
|
185
188
|
log_variable_change(func_name, "snapshot_data", snapshot_data)
|
|
186
189
|
results = self._initial_results_from_snapshot(snapshot_data)
|
|
187
190
|
log_variable_change(func_name, "results", results)
|
|
188
191
|
completed_nodes = set(snapshot_data["completed_nodes"])
|
|
189
192
|
log_variable_change(func_name, "completed_nodes", completed_nodes)
|
|
190
|
-
|
|
193
|
+
if resumed_from_snapshot:
|
|
194
|
+
log_branch(func_name, "supported_snapshot_resume")
|
|
195
|
+
pending = self._seed_pending_from_supported_snapshot(
|
|
196
|
+
graph=graph,
|
|
197
|
+
snapshot_data=snapshot_data,
|
|
198
|
+
completed_nodes=completed_nodes,
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
log_branch(func_name, "fresh_run")
|
|
202
|
+
pending = self._seed_pending_from_entry_nodes(
|
|
203
|
+
graph=graph,
|
|
204
|
+
completed_nodes=completed_nodes,
|
|
205
|
+
)
|
|
191
206
|
log_variable_change(func_name, "pending", pending)
|
|
192
207
|
loop_iteration = 0
|
|
193
208
|
while pending:
|
|
@@ -225,7 +240,11 @@ class Scheduler:
|
|
|
225
240
|
status=NodeStatus.RUNNING,
|
|
226
241
|
)
|
|
227
242
|
)
|
|
228
|
-
|
|
243
|
+
(
|
|
244
|
+
handler_result,
|
|
245
|
+
reentry_targets,
|
|
246
|
+
activated_targets,
|
|
247
|
+
) = await self._execute_node(
|
|
229
248
|
node=node,
|
|
230
249
|
graph=graph,
|
|
231
250
|
execution_state=execution_state,
|
|
@@ -234,12 +253,34 @@ class Scheduler:
|
|
|
234
253
|
)
|
|
235
254
|
log_variable_change(func_name, "handler_result", handler_result)
|
|
236
255
|
log_variable_change(func_name, "reentry_targets", reentry_targets)
|
|
256
|
+
log_variable_change(func_name, "activated_targets", activated_targets)
|
|
237
257
|
results[node_id] = handler_result
|
|
238
258
|
log_variable_change(func_name, "results", results)
|
|
259
|
+
completed_nodes.add(node_id)
|
|
260
|
+
log_variable_change(func_name, "completed_nodes", completed_nodes)
|
|
239
261
|
pending.remove(node_id)
|
|
240
262
|
log_variable_change(func_name, "pending", pending)
|
|
263
|
+
for (
|
|
264
|
+
activated_iteration,
|
|
265
|
+
activated_target,
|
|
266
|
+
) in enumerate(activated_targets):
|
|
267
|
+
log_loop_iteration(
|
|
268
|
+
func_name, "activated_targets", activated_iteration
|
|
269
|
+
)
|
|
270
|
+
if activated_target in completed_nodes:
|
|
271
|
+
log_branch(func_name, "activated_target_completed")
|
|
272
|
+
continue
|
|
273
|
+
log_branch(func_name, "activated_target_pending")
|
|
274
|
+
pending.add(activated_target)
|
|
275
|
+
log_variable_change(
|
|
276
|
+
func_name,
|
|
277
|
+
f"pending_with_activated_{activated_target}",
|
|
278
|
+
pending,
|
|
279
|
+
)
|
|
241
280
|
for reentry_iteration, reentry_target in enumerate(reentry_targets):
|
|
242
281
|
log_loop_iteration(func_name, "reentry_targets", reentry_iteration)
|
|
282
|
+
completed_nodes.discard(reentry_target)
|
|
283
|
+
log_variable_change(func_name, "completed_nodes", completed_nodes)
|
|
243
284
|
pending.add(reentry_target)
|
|
244
285
|
log_variable_change(
|
|
245
286
|
func_name,
|
|
@@ -270,7 +311,7 @@ class Scheduler:
|
|
|
270
311
|
execution_state: ExecutionState,
|
|
271
312
|
graph_id: str,
|
|
272
313
|
upstream_payload: Optional[Any],
|
|
273
|
-
) -> Tuple[Any, List[str]]:
|
|
314
|
+
) -> Tuple[Any, List[str], List[str]]:
|
|
274
315
|
"""Execute a single node handler."""
|
|
275
316
|
func_name = "Scheduler._execute_node"
|
|
276
317
|
log_parameter(
|
|
@@ -329,6 +370,8 @@ class Scheduler:
|
|
|
329
370
|
log_variable_change(func_name, "selected_edges", selected_edges)
|
|
330
371
|
reentry_targets: List[str] = []
|
|
331
372
|
log_variable_change(func_name, "reentry_targets", reentry_targets)
|
|
373
|
+
activated_targets: List[str] = []
|
|
374
|
+
log_variable_change(func_name, "activated_targets", activated_targets)
|
|
332
375
|
for iteration, edge in enumerate(selected_edges):
|
|
333
376
|
log_loop_iteration(func_name, "downstream_edges", iteration)
|
|
334
377
|
log_variable_change(func_name, "edge", edge)
|
|
@@ -364,6 +407,12 @@ class Scheduler:
|
|
|
364
407
|
log_branch(func_name, "reentry_failed_skip")
|
|
365
408
|
elif downstream_visits == 0 and downstream_status is NodeStatus.PENDING:
|
|
366
409
|
log_branch(func_name, "initial_pending_target")
|
|
410
|
+
activated_targets.append(downstream.id)
|
|
411
|
+
log_variable_change(
|
|
412
|
+
func_name,
|
|
413
|
+
"activated_targets",
|
|
414
|
+
list(activated_targets),
|
|
415
|
+
)
|
|
367
416
|
else:
|
|
368
417
|
log_branch(func_name, "reentry_non_terminal_error")
|
|
369
418
|
raise RuntimeError(
|
|
@@ -372,7 +421,7 @@ class Scheduler:
|
|
|
372
421
|
)
|
|
373
422
|
execution_state.note_upstream_completion(downstream.id, node.id)
|
|
374
423
|
self._persist_snapshot(execution_state, graph_id)
|
|
375
|
-
return result, reentry_targets
|
|
424
|
+
return result, reentry_targets, activated_targets
|
|
376
425
|
|
|
377
426
|
def _build_input_payload(
|
|
378
427
|
self,
|
|
@@ -406,9 +455,7 @@ class Scheduler:
|
|
|
406
455
|
log_loop_iteration(func_name, "aggregate_upstream", iteration)
|
|
407
456
|
if upstream.id in results:
|
|
408
457
|
aggregated.append(results[upstream.id])
|
|
409
|
-
log_variable_change(
|
|
410
|
-
func_name, "aggregated", list(aggregated)
|
|
411
|
-
)
|
|
458
|
+
log_variable_change(func_name, "aggregated", list(aggregated))
|
|
412
459
|
log_variable_change(func_name, "payload", aggregated)
|
|
413
460
|
return aggregated
|
|
414
461
|
|
|
@@ -476,7 +523,9 @@ class Scheduler:
|
|
|
476
523
|
log_branch(func_name, "fallback_exit")
|
|
477
524
|
return exit_edges
|
|
478
525
|
log_branch(func_name, "no_matching_edge")
|
|
479
|
-
|
|
526
|
+
raise ValueError(
|
|
527
|
+
f"Switch node '{node.id}' returned unmatched route {route!r} with no fallback edge"
|
|
528
|
+
)
|
|
480
529
|
|
|
481
530
|
def _has_remaining_visits(
|
|
482
531
|
self, graph: Graph, execution_state: ExecutionState, node_id: str
|
|
@@ -495,7 +544,7 @@ class Scheduler:
|
|
|
495
544
|
log_variable_change(func_name, "has_capacity", has_capacity)
|
|
496
545
|
return has_capacity
|
|
497
546
|
|
|
498
|
-
def _load_or_create_state(self, graph_id: str) -> ExecutionState:
|
|
547
|
+
def _load_or_create_state(self, graph_id: str) -> Tuple[ExecutionState, bool]:
|
|
499
548
|
"""Retrieve execution state from snapshots if available."""
|
|
500
549
|
|
|
501
550
|
func_name = "Scheduler._load_or_create_state"
|
|
@@ -504,20 +553,153 @@ class Scheduler:
|
|
|
504
553
|
log_branch(func_name, "no_snapshot_store")
|
|
505
554
|
state = ExecutionState()
|
|
506
555
|
log_variable_change(func_name, "state", state)
|
|
507
|
-
return state
|
|
556
|
+
return state, False
|
|
508
557
|
|
|
509
558
|
try:
|
|
510
559
|
snapshot_payload = self._snapshot_store.load(graph_id)
|
|
511
560
|
except KeyError:
|
|
512
561
|
log_branch(func_name, "snapshot_missing")
|
|
513
562
|
state = ExecutionState()
|
|
563
|
+
resumed_from_snapshot = False
|
|
514
564
|
else:
|
|
515
565
|
log_branch(func_name, "snapshot_loaded")
|
|
516
|
-
|
|
566
|
+
log_variable_change(func_name, "snapshot_payload", snapshot_payload)
|
|
567
|
+
snapshot_dict = dict(snapshot_payload)
|
|
568
|
+
log_variable_change(func_name, "snapshot_dict", snapshot_dict)
|
|
569
|
+
self._validate_snapshot_format(snapshot_dict)
|
|
570
|
+
state = ExecutionState.restore(snapshot_dict)
|
|
571
|
+
self._reset_running_nodes_for_resume(
|
|
572
|
+
execution_state=state,
|
|
573
|
+
snapshot_data=snapshot_dict,
|
|
574
|
+
)
|
|
575
|
+
resumed_from_snapshot = True
|
|
517
576
|
log_variable_change(func_name, "state", state)
|
|
518
|
-
|
|
577
|
+
log_variable_change(func_name, "resumed_from_snapshot", resumed_from_snapshot)
|
|
578
|
+
return state, resumed_from_snapshot
|
|
579
|
+
|
|
580
|
+
def _validate_snapshot_format(self, snapshot_data: Dict[str, Any]) -> None:
|
|
581
|
+
"""Reject unsupported snapshot payloads before restore."""
|
|
582
|
+
|
|
583
|
+
func_name = "Scheduler._validate_snapshot_format"
|
|
584
|
+
log_parameter(func_name, snapshot_data=snapshot_data)
|
|
585
|
+
version = snapshot_data.get("snapshot_format_version")
|
|
586
|
+
log_variable_change(func_name, "version", version)
|
|
587
|
+
log_variable_change(
|
|
588
|
+
func_name,
|
|
589
|
+
"supported_version",
|
|
590
|
+
SUPPORTED_SNAPSHOT_FORMAT_VERSION,
|
|
591
|
+
)
|
|
592
|
+
if version == SUPPORTED_SNAPSHOT_FORMAT_VERSION:
|
|
593
|
+
log_branch(func_name, "supported_version")
|
|
594
|
+
return
|
|
595
|
+
log_branch(func_name, "unsupported_version")
|
|
596
|
+
raise ValueError(
|
|
597
|
+
"Unsupported snapshot format version "
|
|
598
|
+
f"{version!r}; supported version is "
|
|
599
|
+
f"{SUPPORTED_SNAPSHOT_FORMAT_VERSION}. "
|
|
600
|
+
"Discard or migrate the snapshot before resuming."
|
|
601
|
+
)
|
|
519
602
|
|
|
520
|
-
def
|
|
603
|
+
def _reset_running_nodes_for_resume(
|
|
604
|
+
self,
|
|
605
|
+
*,
|
|
606
|
+
execution_state: ExecutionState,
|
|
607
|
+
snapshot_data: Dict[str, Any],
|
|
608
|
+
) -> None:
|
|
609
|
+
"""Reset RUNNING snapshot nodes to PENDING before scheduling."""
|
|
610
|
+
|
|
611
|
+
func_name = "Scheduler._reset_running_nodes_for_resume"
|
|
612
|
+
log_parameter(
|
|
613
|
+
func_name,
|
|
614
|
+
execution_state=execution_state,
|
|
615
|
+
snapshot_data=snapshot_data,
|
|
616
|
+
)
|
|
617
|
+
for iteration, (node_id, node_state) in enumerate(
|
|
618
|
+
snapshot_data.get("states", {}).items()
|
|
619
|
+
):
|
|
620
|
+
log_loop_iteration(func_name, "states", iteration)
|
|
621
|
+
status_value = node_state.get("status")
|
|
622
|
+
log_variable_change(func_name, "status_value", status_value)
|
|
623
|
+
if status_value != NodeStatus.RUNNING.value:
|
|
624
|
+
log_branch(func_name, "state_not_running")
|
|
625
|
+
continue
|
|
626
|
+
log_branch(func_name, "reset_running_to_pending")
|
|
627
|
+
state = execution_state._ensure_state(node_id)
|
|
628
|
+
log_variable_change(func_name, "state_before", state)
|
|
629
|
+
state.status = NodeStatus.PENDING
|
|
630
|
+
log_variable_change(func_name, "state_after", state)
|
|
631
|
+
|
|
632
|
+
def _seed_pending_from_entry_nodes(
|
|
633
|
+
self,
|
|
634
|
+
*,
|
|
635
|
+
graph: Graph,
|
|
636
|
+
completed_nodes: set[str],
|
|
637
|
+
) -> set[str]:
|
|
638
|
+
"""Seed a fresh run from entry nodes only."""
|
|
639
|
+
|
|
640
|
+
func_name = "Scheduler._seed_pending_from_entry_nodes"
|
|
641
|
+
log_parameter(func_name, graph=graph, completed_nodes=completed_nodes)
|
|
642
|
+
entry_nodes = graph.entry_nodes()
|
|
643
|
+
log_variable_change(func_name, "entry_nodes", entry_nodes)
|
|
644
|
+
entry_ids = {node.id for node in entry_nodes}
|
|
645
|
+
log_variable_change(func_name, "entry_ids", entry_ids)
|
|
646
|
+
if graph.nodes and not entry_ids:
|
|
647
|
+
log_branch(func_name, "no_entry_nodes")
|
|
648
|
+
raise ValueError("Graph has no entry nodes (nodes with no upstream edges)")
|
|
649
|
+
log_branch(func_name, "seed_from_entries")
|
|
650
|
+
pending = {node_id for node_id in entry_ids if node_id not in completed_nodes}
|
|
651
|
+
log_variable_change(func_name, "pending", pending)
|
|
652
|
+
return pending
|
|
653
|
+
|
|
654
|
+
def _seed_pending_from_supported_snapshot(
|
|
655
|
+
self,
|
|
656
|
+
*,
|
|
657
|
+
graph: Graph,
|
|
658
|
+
snapshot_data: Dict[str, Any],
|
|
659
|
+
completed_nodes: set[str],
|
|
660
|
+
) -> set[str]:
|
|
661
|
+
"""Seed a resumed run from supported snapshot state plus entry nodes."""
|
|
662
|
+
|
|
663
|
+
func_name = "Scheduler._seed_pending_from_supported_snapshot"
|
|
664
|
+
log_parameter(
|
|
665
|
+
func_name,
|
|
666
|
+
graph=graph,
|
|
667
|
+
snapshot_data=snapshot_data,
|
|
668
|
+
completed_nodes=completed_nodes,
|
|
669
|
+
)
|
|
670
|
+
pending: set[str] = set()
|
|
671
|
+
log_variable_change(func_name, "pending", pending)
|
|
672
|
+
for iteration, node in enumerate(graph.entry_nodes()):
|
|
673
|
+
log_loop_iteration(func_name, "entry_nodes", iteration)
|
|
674
|
+
if node.id in completed_nodes:
|
|
675
|
+
log_branch(func_name, "entry_completed")
|
|
676
|
+
continue
|
|
677
|
+
log_branch(func_name, "entry_pending")
|
|
678
|
+
pending.add(node.id)
|
|
679
|
+
log_variable_change(func_name, "pending", pending)
|
|
680
|
+
for iteration, (node_id, node_state) in enumerate(
|
|
681
|
+
snapshot_data.get("states", {}).items()
|
|
682
|
+
):
|
|
683
|
+
log_loop_iteration(func_name, "snapshot_states", iteration)
|
|
684
|
+
status_value = node_state.get("status")
|
|
685
|
+
log_variable_change(func_name, "status_value", status_value)
|
|
686
|
+
if node_id in completed_nodes:
|
|
687
|
+
log_branch(func_name, "snapshot_node_completed")
|
|
688
|
+
continue
|
|
689
|
+
if status_value not in (
|
|
690
|
+
NodeStatus.PENDING.value,
|
|
691
|
+
NodeStatus.RUNNING.value,
|
|
692
|
+
):
|
|
693
|
+
log_branch(func_name, "snapshot_node_not_activated")
|
|
694
|
+
continue
|
|
695
|
+
log_branch(func_name, "snapshot_node_pending")
|
|
696
|
+
pending.add(node_id)
|
|
697
|
+
log_variable_change(func_name, "pending", pending)
|
|
698
|
+
return pending
|
|
699
|
+
|
|
700
|
+
def _initial_results_from_snapshot(
|
|
701
|
+
self, snapshot: Dict[str, Any]
|
|
702
|
+
) -> Dict[str, Any]:
|
|
521
703
|
"""Extract node results from a snapshot payload."""
|
|
522
704
|
|
|
523
705
|
func_name = "Scheduler._initial_results_from_snapshot"
|
|
@@ -541,9 +723,7 @@ class Scheduler:
|
|
|
541
723
|
log_variable_change(func_name, "results_final", results)
|
|
542
724
|
return results
|
|
543
725
|
|
|
544
|
-
def _persist_snapshot(
|
|
545
|
-
self, execution_state: ExecutionState, graph_id: str
|
|
546
|
-
) -> None:
|
|
726
|
+
def _persist_snapshot(self, execution_state: ExecutionState, graph_id: str) -> None:
|
|
547
727
|
"""Persist execution state snapshot if a store is configured."""
|
|
548
728
|
|
|
549
729
|
func_name = "Scheduler._persist_snapshot"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: loopgraph
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.3.0
|
|
4
4
|
Summary: Event-driven graph workflow engine with native loop support.
|
|
5
5
|
Author: LoopGraph Team
|
|
6
6
|
License: MIT
|
|
@@ -131,6 +131,30 @@ registry.register("my_node", make_handler(bus, my_agent))
|
|
|
131
131
|
- Reset clears upstream-completion tracking and preserves cumulative `visit_count`
|
|
132
132
|
- Overlapping loops sharing any node are rejected at graph construction time
|
|
133
133
|
|
|
134
|
+
## Scheduler Semantics
|
|
135
|
+
|
|
136
|
+
- The scheduler seeds its internal pending set from graph entry nodes only. A
|
|
137
|
+
node enters pending later only when an upstream edge actually activates it.
|
|
138
|
+
- Unselected `SWITCH` branches never enter pending, so leaf branches that were
|
|
139
|
+
not chosen cannot deadlock the workflow.
|
|
140
|
+
- A graph with nodes but no entry nodes now fails fast with `ValueError`
|
|
141
|
+
instead of entering a deadlocked run loop.
|
|
142
|
+
- If a `SWITCH` returns a route that matches no downstream edge and no
|
|
143
|
+
`exit` fallback edge exists, the scheduler raises `ValueError`.
|
|
144
|
+
- `NodeKind.TERMINAL` keeps the same runtime scheduling semantics as `TASK`.
|
|
145
|
+
|
|
146
|
+
## Recovery Boundaries
|
|
147
|
+
|
|
148
|
+
- Persisted scheduler snapshots now include `snapshot_format_version`.
|
|
149
|
+
- Resume is supported only for snapshots with the current supported snapshot
|
|
150
|
+
format version.
|
|
151
|
+
- If a snapshot is missing `snapshot_format_version` or carries an unsupported
|
|
152
|
+
version, resume fails fast with a `ValueError` that reports the actual
|
|
153
|
+
version, the supported version, and discard-or-migrate guidance.
|
|
154
|
+
- On resume, pending is rebuilt from uncompleted entry nodes plus nodes already
|
|
155
|
+
persisted as `PENDING` or `RUNNING`. Persisted `RUNNING` nodes are reset to
|
|
156
|
+
`PENDING` before scheduling.
|
|
157
|
+
|
|
134
158
|
---
|
|
135
159
|
|
|
136
160
|
## Installation
|
|
@@ -60,6 +60,7 @@ async def test_emit_with_on_error_invokes_handler() -> None:
|
|
|
60
60
|
@pytest.mark.asyncio
|
|
61
61
|
async def test_emit_on_error_can_raise() -> None:
|
|
62
62
|
"""If on_error raises, the exception propagates to caller."""
|
|
63
|
+
|
|
63
64
|
async def raising_error_handler(exc: Exception, event: Event) -> None:
|
|
64
65
|
raise RuntimeError(f"critical error handling {exc}")
|
|
65
66
|
|