AbstractRuntime 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. abstractruntime/__init__.py +7 -2
  2. abstractruntime/core/config.py +14 -1
  3. abstractruntime/core/event_keys.py +62 -0
  4. abstractruntime/core/models.py +12 -1
  5. abstractruntime/core/runtime.py +2444 -14
  6. abstractruntime/core/vars.py +95 -0
  7. abstractruntime/evidence/__init__.py +10 -0
  8. abstractruntime/evidence/recorder.py +325 -0
  9. abstractruntime/integrations/abstractcore/__init__.py +3 -0
  10. abstractruntime/integrations/abstractcore/constants.py +19 -0
  11. abstractruntime/integrations/abstractcore/default_tools.py +134 -0
  12. abstractruntime/integrations/abstractcore/effect_handlers.py +255 -6
  13. abstractruntime/integrations/abstractcore/factory.py +95 -10
  14. abstractruntime/integrations/abstractcore/llm_client.py +456 -52
  15. abstractruntime/integrations/abstractcore/mcp_worker.py +586 -0
  16. abstractruntime/integrations/abstractcore/observability.py +80 -0
  17. abstractruntime/integrations/abstractcore/summarizer.py +154 -0
  18. abstractruntime/integrations/abstractcore/tool_executor.py +481 -24
  19. abstractruntime/memory/__init__.py +21 -0
  20. abstractruntime/memory/active_context.py +746 -0
  21. abstractruntime/memory/active_memory.py +452 -0
  22. abstractruntime/memory/compaction.py +105 -0
  23. abstractruntime/rendering/__init__.py +17 -0
  24. abstractruntime/rendering/agent_trace_report.py +256 -0
  25. abstractruntime/rendering/json_stringify.py +136 -0
  26. abstractruntime/scheduler/scheduler.py +93 -2
  27. abstractruntime/storage/__init__.py +3 -1
  28. abstractruntime/storage/artifacts.py +20 -5
  29. abstractruntime/storage/json_files.py +15 -2
  30. abstractruntime/storage/observable.py +99 -0
  31. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.0.dist-info}/METADATA +5 -1
  32. abstractruntime-0.4.0.dist-info/RECORD +49 -0
  33. abstractruntime-0.4.0.dist-info/entry_points.txt +2 -0
  34. abstractruntime-0.2.0.dist-info/RECORD +0 -32
  35. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.0.dist-info}/WHEEL +0 -0
  36. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.0.dist-info}/licenses/LICENSE +0 -0
@@ -20,8 +20,12 @@ from __future__ import annotations
20
20
 
21
21
  from dataclasses import dataclass
22
22
  from datetime import datetime, timezone
23
- from typing import Any, Callable, Dict, Optional
23
+ from typing import Any, Callable, Dict, Optional, List
24
+ import copy
24
25
  import inspect
26
+ import json
27
+ import os
28
+ import re
25
29
 
26
30
  from .config import RuntimeConfig
27
31
  from .models import (
@@ -38,13 +42,164 @@ from .models import (
38
42
  )
39
43
  from .spec import WorkflowSpec
40
44
  from .policy import DefaultEffectPolicy, EffectPolicy
41
- from ..storage.base import LedgerStore, RunStore
45
+ from ..storage.base import LedgerStore, RunStore, QueryableRunStore
46
+ from .event_keys import build_event_wait_key
42
47
 
43
48
 
44
49
  def utc_now_iso() -> str:
45
50
  return datetime.now(timezone.utc).isoformat()
46
51
 
47
52
 
53
+ _DEFAULT_GLOBAL_MEMORY_RUN_ID = "global_memory"
54
+ _SAFE_RUN_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
55
+
56
+
57
+ def _ensure_runtime_namespace(vars: Dict[str, Any]) -> Dict[str, Any]:
58
+ runtime_ns = vars.get("_runtime")
59
+ if not isinstance(runtime_ns, dict):
60
+ runtime_ns = {}
61
+ vars["_runtime"] = runtime_ns
62
+ return runtime_ns
63
+
64
+
65
+ def _ensure_control_namespace(vars: Dict[str, Any]) -> Dict[str, Any]:
66
+ runtime_ns = _ensure_runtime_namespace(vars)
67
+ control = runtime_ns.get("control")
68
+ if not isinstance(control, dict):
69
+ control = {}
70
+ runtime_ns["control"] = control
71
+ return control
72
+
73
+
74
+ def _is_paused_run_vars(vars: Any) -> bool:
75
+ if not isinstance(vars, dict):
76
+ return False
77
+ runtime_ns = vars.get("_runtime")
78
+ if not isinstance(runtime_ns, dict):
79
+ return False
80
+ control = runtime_ns.get("control")
81
+ if not isinstance(control, dict):
82
+ return False
83
+ return bool(control.get("paused") is True)
84
+
85
+
86
+ def _is_pause_wait(waiting: Any, *, run_id: str) -> bool:
87
+ if waiting is None:
88
+ return False
89
+ try:
90
+ reason = getattr(waiting, "reason", None)
91
+ reason_value = reason.value if hasattr(reason, "value") else str(reason) if reason else None
92
+ except Exception:
93
+ reason_value = None
94
+ if reason_value != WaitReason.USER.value:
95
+ return False
96
+ try:
97
+ wait_key = getattr(waiting, "wait_key", None)
98
+ if isinstance(wait_key, str) and wait_key == f"pause:{run_id}":
99
+ return True
100
+ except Exception:
101
+ pass
102
+ try:
103
+ details = getattr(waiting, "details", None)
104
+ if isinstance(details, dict) and details.get("kind") == "pause":
105
+ return True
106
+ except Exception:
107
+ pass
108
+ return False
109
+
110
+
111
+ def _record_node_trace(
112
+ *,
113
+ run: RunState,
114
+ node_id: str,
115
+ effect: Effect,
116
+ outcome: "EffectOutcome",
117
+ idempotency_key: Optional[str],
118
+ reused_prior_result: bool,
119
+ duration_ms: Optional[float] = None,
120
+ max_entries_per_node: int = 100,
121
+ ) -> None:
122
+ """Record a JSON-safe per-node execution trace in run.vars["_runtime"].
123
+
124
+ This trace is runtime-owned and durable (stored in RunStore checkpoints).
125
+ It exists to support higher-level hosts (AbstractFlow, AbstractCode, etc.)
126
+ that need structured "scratchpad"/debug information without inventing
127
+ host-specific persistence formats.
128
+ """
129
+
130
+ runtime_ns = _ensure_runtime_namespace(run.vars)
131
+ traces = runtime_ns.get("node_traces")
132
+ if not isinstance(traces, dict):
133
+ traces = {}
134
+ runtime_ns["node_traces"] = traces
135
+
136
+ node_trace = traces.get(node_id)
137
+ if not isinstance(node_trace, dict):
138
+ node_trace = {"node_id": node_id, "steps": []}
139
+ traces[node_id] = node_trace
140
+
141
+ steps = node_trace.get("steps")
142
+ if not isinstance(steps, list):
143
+ steps = []
144
+ node_trace["steps"] = steps
145
+
146
+ wait_dict: Optional[Dict[str, Any]] = None
147
+ if outcome.status == "waiting" and outcome.wait is not None:
148
+ w = outcome.wait
149
+ wait_dict = {
150
+ "reason": w.reason.value if hasattr(w.reason, "value") else str(w.reason),
151
+ "wait_key": w.wait_key,
152
+ "until": w.until,
153
+ "resume_to_node": w.resume_to_node,
154
+ "result_key": w.result_key,
155
+ "prompt": w.prompt,
156
+ "choices": w.choices,
157
+ "allow_free_text": w.allow_free_text,
158
+ "details": w.details,
159
+ }
160
+
161
+ entry: Dict[str, Any] = {
162
+ "ts": utc_now_iso(),
163
+ "node_id": node_id,
164
+ "status": outcome.status,
165
+ "idempotency_key": idempotency_key,
166
+ "reused_prior_result": reused_prior_result,
167
+ "effect": {
168
+ "type": effect.type.value,
169
+ "payload": effect.payload,
170
+ "result_key": effect.result_key,
171
+ },
172
+ }
173
+ if isinstance(duration_ms, (int, float)) and duration_ms >= 0:
174
+ # UI/UX consumers use this for per-step timing badges (kept JSON-safe).
175
+ entry["duration_ms"] = float(duration_ms)
176
+ if outcome.status == "completed":
177
+ entry["result"] = outcome.result
178
+ elif outcome.status == "failed":
179
+ entry["error"] = outcome.error
180
+ elif wait_dict is not None:
181
+ entry["wait"] = wait_dict
182
+
183
+ # Ensure the trace remains JSON-safe even if a handler violates the contract.
184
+ try:
185
+ json.dumps(entry)
186
+ except TypeError:
187
+ entry = {
188
+ "ts": entry.get("ts"),
189
+ "node_id": node_id,
190
+ "status": outcome.status,
191
+ "idempotency_key": idempotency_key,
192
+ "reused_prior_result": reused_prior_result,
193
+ "effect": {"type": effect.type.value, "result_key": effect.result_key},
194
+ "error": "non_json_safe_trace_entry",
195
+ }
196
+
197
+ steps.append(entry)
198
+ if max_entries_per_node > 0 and len(steps) > max_entries_per_node:
199
+ del steps[: max(0, len(steps) - max_entries_per_node)]
200
+ node_trace["updated_at"] = utc_now_iso()
201
+
202
+
48
203
  @dataclass
49
204
  class DefaultRunContext:
50
205
  def now_iso(self) -> str:
@@ -95,6 +250,7 @@ class Runtime:
95
250
  artifact_store: Optional[Any] = None,
96
251
  effect_policy: Optional[EffectPolicy] = None,
97
252
  config: Optional[RuntimeConfig] = None,
253
+ chat_summarizer: Optional[Any] = None,
98
254
  ):
99
255
  self._run_store = run_store
100
256
  self._ledger_store = ledger_store
@@ -103,6 +259,7 @@ class Runtime:
103
259
  self._artifact_store = artifact_store
104
260
  self._effect_policy: EffectPolicy = effect_policy or DefaultEffectPolicy()
105
261
  self._config: RuntimeConfig = config or RuntimeConfig()
262
+ self._chat_summarizer = chat_summarizer
106
263
 
107
264
  self._handlers: Dict[EffectType, EffectHandler] = {}
108
265
  self._register_builtin_handlers()
@@ -169,6 +326,43 @@ class Runtime:
169
326
  if "_limits" not in vars:
170
327
  vars["_limits"] = self._config.to_limits_dict()
171
328
 
329
+ # Ensure a durable `_runtime` namespace exists and seed default provider/model metadata
330
+ # from the Runtime config (best-effort).
331
+ #
332
+ # Rationale:
333
+ # - The Runtime is the orchestration authority (ADR-0001/0014), and `start()` is the
334
+ # choke point where durable run state is initialized.
335
+ # - Agents/workflows should not have to guess/duplicate routing metadata to make prompt
336
+ # composition decisions (e.g. native-tools => omit Tools(session) prompt catalogs).
337
+ runtime_ns = vars.get("_runtime")
338
+ if not isinstance(runtime_ns, dict):
339
+ runtime_ns = {}
340
+ vars["_runtime"] = runtime_ns
341
+ try:
342
+ provider_id = getattr(self._config, "provider", None)
343
+ model_id = getattr(self._config, "model", None)
344
+ if isinstance(provider_id, str) and provider_id.strip():
345
+ runtime_ns.setdefault("provider", provider_id.strip())
346
+ if isinstance(model_id, str) and model_id.strip():
347
+ runtime_ns.setdefault("model", model_id.strip())
348
+ except Exception:
349
+ pass
350
+
351
+ # Seed tool-support metadata from model capabilities (best-effort).
352
+ #
353
+ # This makes the native-vs-prompted tools decision explicit and durable in run state,
354
+ # so adapters/UI helpers don't have to guess or re-run AbstractCore detection logic.
355
+ try:
356
+ caps = getattr(self._config, "model_capabilities", None)
357
+ if isinstance(caps, dict):
358
+ tool_support = caps.get("tool_support")
359
+ if isinstance(tool_support, str) and tool_support.strip():
360
+ ts = tool_support.strip()
361
+ runtime_ns.setdefault("tool_support", ts)
362
+ runtime_ns.setdefault("supports_native_tools", ts == "native")
363
+ except Exception:
364
+ pass
365
+
172
366
  run = RunState.new(
173
367
  workflow_id=workflow.workflow_id,
174
368
  entry_node=workflow.entry_node,
@@ -205,6 +399,87 @@ class Runtime:
205
399
  run.status = RunStatus.CANCELLED
206
400
  run.error = reason or "Cancelled"
207
401
  run.waiting = None
402
+ try:
403
+ control = _ensure_control_namespace(run.vars)
404
+ control.pop("paused", None)
405
+ except Exception:
406
+ pass
407
+ run.updated_at = utc_now_iso()
408
+ self._run_store.save(run)
409
+ return run
410
+
411
+ def pause_run(self, run_id: str, *, reason: Optional[str] = None) -> RunState:
412
+ """Pause a run (durably) until it is explicitly resumed.
413
+
414
+ Semantics:
415
+ - Pausing a RUNNING run transitions it to WAITING with a synthetic USER wait.
416
+ - Pausing a WAITING run (non-USER waits such as UNTIL/EVENT/SUBWORKFLOW) sets a
417
+ runtime-owned `paused` flag so schedulers/event emitters can skip it.
418
+ - Pausing an ASK_USER wait is a no-op (already blocked by user input).
419
+ """
420
+ run = self.get_state(run_id)
421
+
422
+ if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
423
+ return run
424
+
425
+ # If already paused, keep as-is.
426
+ if _is_paused_run_vars(run.vars):
427
+ return run
428
+
429
+ # Don't interfere with real user prompts (ASK_USER).
430
+ if run.status == RunStatus.WAITING and run.waiting is not None:
431
+ if getattr(run.waiting, "reason", None) == WaitReason.USER and not _is_pause_wait(run.waiting, run_id=run_id):
432
+ return run
433
+
434
+ control = _ensure_control_namespace(run.vars)
435
+ control["paused"] = True
436
+ control["paused_at"] = utc_now_iso()
437
+ if isinstance(reason, str) and reason.strip():
438
+ control["pause_reason"] = reason.strip()
439
+
440
+ if run.status == RunStatus.RUNNING:
441
+ run.status = RunStatus.WAITING
442
+ run.waiting = WaitState(
443
+ reason=WaitReason.USER,
444
+ wait_key=f"pause:{run.run_id}",
445
+ resume_to_node=run.current_node,
446
+ prompt="Paused",
447
+ choices=None,
448
+ allow_free_text=False,
449
+ details={"kind": "pause"},
450
+ )
451
+
452
+ run.updated_at = utc_now_iso()
453
+ self._run_store.save(run)
454
+ return run
455
+
456
+ def resume_run(self, run_id: str) -> RunState:
457
+ """Resume a previously paused run (durably).
458
+
459
+ If the run was paused while RUNNING, this clears the synthetic pause wait
460
+ and returns the run to RUNNING. If the run was paused while WAITING
461
+ (UNTIL/EVENT/SUBWORKFLOW), this only clears the paused flag.
462
+ """
463
+ run = self.get_state(run_id)
464
+
465
+ if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
466
+ return run
467
+
468
+ if not _is_paused_run_vars(run.vars):
469
+ return run
470
+
471
+ try:
472
+ control = _ensure_control_namespace(run.vars)
473
+ control.pop("paused", None)
474
+ control.pop("pause_reason", None)
475
+ control["resumed_at"] = utc_now_iso()
476
+ except Exception:
477
+ pass
478
+
479
+ if run.status == RunStatus.WAITING and _is_pause_wait(run.waiting, run_id=run_id):
480
+ resume_to = getattr(run.waiting, "resume_to_node", None)
481
+ self._apply_resume_payload(run, payload={}, override_node=resume_to)
482
+
208
483
  run.updated_at = utc_now_iso()
209
484
  self._run_store.save(run)
210
485
  return run
@@ -218,6 +493,83 @@ class Runtime:
218
493
  def get_ledger(self, run_id: str) -> list[dict[str, Any]]:
219
494
  return self._ledger_store.list(run_id)
220
495
 
496
+ def subscribe_ledger(
497
+ self,
498
+ callback: Callable[[Dict[str, Any]], None],
499
+ *,
500
+ run_id: Optional[str] = None,
501
+ ) -> Callable[[], None]:
502
+ """Subscribe to ledger append events (in-process only).
503
+
504
+ This is an optional capability: not all LedgerStore implementations
505
+ support subscriptions. When unavailable, wrap the configured store with
506
+ `abstractruntime.storage.observable.ObservableLedgerStore`.
507
+ """
508
+ subscribe = getattr(self._ledger_store, "subscribe", None)
509
+ if not callable(subscribe):
510
+ raise RuntimeError(
511
+ "Configured LedgerStore does not support subscriptions. "
512
+ "Wrap it with ObservableLedgerStore to enable `subscribe_ledger()`."
513
+ )
514
+ return subscribe(callback, run_id=run_id)
515
+
516
+ # ---------------------------------------------------------------------
517
+ # Trace Helpers (Runtime-Owned)
518
+ # ---------------------------------------------------------------------
519
+
520
+ def get_node_traces(self, run_id: str) -> Dict[str, Any]:
521
+ """Return runtime-owned per-node traces for a run.
522
+
523
+ Traces are stored in `RunState.vars["_runtime"]["node_traces"]`.
524
+ Returns a deep copy so callers can safely inspect without mutating the run.
525
+ """
526
+ run = self.get_state(run_id)
527
+ runtime_ns = run.vars.get("_runtime")
528
+ traces = runtime_ns.get("node_traces") if isinstance(runtime_ns, dict) else None
529
+ return copy.deepcopy(traces) if isinstance(traces, dict) else {}
530
+
531
+ def get_node_trace(self, run_id: str, node_id: str) -> Dict[str, Any]:
532
+ """Return a single node trace object for a run.
533
+
534
+ Returns an empty `{node_id, steps: []}` object when missing.
535
+ """
536
+ traces = self.get_node_traces(run_id)
537
+ trace = traces.get(node_id)
538
+ if isinstance(trace, dict):
539
+ return trace
540
+ return {"node_id": node_id, "steps": []}
541
+
542
+ # ---------------------------------------------------------------------
543
+ # Evidence Helpers (Runtime-Owned)
544
+ # ---------------------------------------------------------------------
545
+
546
+ def list_evidence(self, run_id: str) -> list[dict[str, Any]]:
547
+ """List evidence records for a run (index entries only).
548
+
549
+ Evidence is indexed as `kind="evidence"` items inside `vars["_runtime"]["memory_spans"]`.
550
+ """
551
+ run = self.get_state(run_id)
552
+ runtime_ns = run.vars.get("_runtime")
553
+ spans = runtime_ns.get("memory_spans") if isinstance(runtime_ns, dict) else None
554
+ if not isinstance(spans, list):
555
+ return []
556
+ out: list[dict[str, Any]] = []
557
+ for s in spans:
558
+ if not isinstance(s, dict):
559
+ continue
560
+ if s.get("kind") != "evidence":
561
+ continue
562
+ out.append(copy.deepcopy(s))
563
+ return out
564
+
565
+ def load_evidence(self, evidence_id: str) -> Optional[dict[str, Any]]:
566
+ """Load an evidence record payload from ArtifactStore by id."""
567
+ artifact_store = self._artifact_store
568
+ if artifact_store is None:
569
+ raise RuntimeError("Evidence requires an ArtifactStore; configure runtime.set_artifact_store(...)")
570
+ payload = artifact_store.load_json(str(evidence_id))
571
+ return payload if isinstance(payload, dict) else None
572
+
221
573
  # ---------------------------------------------------------------------
222
574
  # Limit Management
223
575
  # ---------------------------------------------------------------------
@@ -340,7 +692,10 @@ class Runtime:
340
692
 
341
693
  def tick(self, *, workflow: WorkflowSpec, run_id: str, max_steps: int = 100) -> RunState:
342
694
  run = self.get_state(run_id)
343
- if run.status in (RunStatus.COMPLETED, RunStatus.FAILED):
695
+ # Terminal runs never progress.
696
+ if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
697
+ return run
698
+ if _is_paused_run_vars(run.vars):
344
699
  return run
345
700
  if run.status == RunStatus.WAITING:
346
701
  # For WAIT_UNTIL we can auto-unblock if time passed
@@ -352,15 +707,40 @@ class Runtime:
352
707
  else:
353
708
  return run
354
709
 
710
+ # IMPORTANT (Web hosts / concurrency):
711
+ # A run may be paused/cancelled by an external control plane (e.g. AbstractFlow Web UI)
712
+ # while we're blocked inside a long-running effect (LLM/tool execution).
713
+ #
714
+ # We make `tick()` resilient to that by re-loading the persisted RunState before
715
+ # committing any updates. If an external pause/cancel is observed, we stop without
716
+ # overwriting it.
717
+ def _abort_if_externally_controlled() -> Optional[RunState]:
718
+ try:
719
+ latest = self.get_state(run_id)
720
+ except Exception:
721
+ return None
722
+ if latest.status == RunStatus.CANCELLED:
723
+ return latest
724
+ if _is_paused_run_vars(latest.vars):
725
+ return latest
726
+ return None
727
+
355
728
  steps = 0
356
729
  while steps < max_steps:
357
730
  steps += 1
358
731
 
732
+ controlled = _abort_if_externally_controlled()
733
+ if controlled is not None:
734
+ return controlled
735
+
359
736
  handler = workflow.get_node(run.current_node)
360
737
  plan = handler(run, self._ctx)
361
738
 
362
739
  # Completion
363
740
  if plan.complete_output is not None:
741
+ controlled = _abort_if_externally_controlled()
742
+ if controlled is not None:
743
+ return controlled
364
744
  run.status = RunStatus.COMPLETED
365
745
  run.output = plan.complete_output
366
746
  run.updated_at = utc_now_iso()
@@ -377,6 +757,9 @@ class Runtime:
377
757
  if plan.effect is None:
378
758
  if not plan.next_node:
379
759
  raise ValueError(f"Node '{plan.node_id}' returned no effect and no next_node")
760
+ controlled = _abort_if_externally_controlled()
761
+ if controlled is not None:
762
+ return controlled
380
763
  run.current_node = plan.next_node
381
764
  run.updated_at = utc_now_iso()
382
765
  self._run_store.save(run)
@@ -387,6 +770,13 @@ class Runtime:
387
770
  run=run, node_id=plan.node_id, effect=plan.effect
388
771
  )
389
772
  prior_result = self._find_prior_completed_result(run.run_id, idempotency_key)
773
+ reused_prior_result = prior_result is not None
774
+
775
+ # Measure effect execution duration (wall-clock). This is used for
776
+ # host-side UX (badges, throughput estimates) and is stored in the
777
+ # runtime-owned node trace (JSON-safe).
778
+ import time
779
+ t0 = time.perf_counter()
390
780
 
391
781
  if prior_result is not None:
392
782
  # Reuse prior result - skip re-execution
@@ -401,7 +791,42 @@ class Runtime:
401
791
  default_next_node=plan.next_node,
402
792
  )
403
793
 
794
+ duration_ms = float((time.perf_counter() - t0) * 1000.0)
795
+
796
+ # Evidence capture (runtime-owned, durable):
797
+ # After tool execution completes, record provenance-first evidence for a small set of
798
+ # external-boundary tools (web_search/fetch_url/execute_command). This must happen
799
+ # BEFORE we persist node traces / result_key outputs so run state remains bounded.
800
+ try:
801
+ if (
802
+ not reused_prior_result
803
+ and plan.effect.type == EffectType.TOOL_CALLS
804
+ and outcome.status == "completed"
805
+ ):
806
+ self._maybe_record_tool_evidence(
807
+ run=run,
808
+ node_id=plan.node_id,
809
+ effect=plan.effect,
810
+ tool_results=outcome.result,
811
+ )
812
+ except Exception:
813
+ # Evidence capture should never crash the run; failures are recorded in run vars.
814
+ pass
815
+
816
+ _record_node_trace(
817
+ run=run,
818
+ node_id=plan.node_id,
819
+ effect=plan.effect,
820
+ outcome=outcome,
821
+ idempotency_key=idempotency_key,
822
+ reused_prior_result=reused_prior_result,
823
+ duration_ms=duration_ms,
824
+ )
825
+
404
826
  if outcome.status == "failed":
827
+ controlled = _abort_if_externally_controlled()
828
+ if controlled is not None:
829
+ return controlled
405
830
  run.status = RunStatus.FAILED
406
831
  run.error = outcome.error or "unknown error"
407
832
  run.updated_at = utc_now_iso()
@@ -410,6 +835,9 @@ class Runtime:
410
835
 
411
836
  if outcome.status == "waiting":
412
837
  assert outcome.wait is not None
838
+ controlled = _abort_if_externally_controlled()
839
+ if controlled is not None:
840
+ return controlled
413
841
  run.status = RunStatus.WAITING
414
842
  run.waiting = outcome.wait
415
843
  run.updated_at = utc_now_iso()
@@ -420,16 +848,85 @@ class Runtime:
420
848
  if plan.effect.result_key and outcome.result is not None:
421
849
  _set_nested(run.vars, plan.effect.result_key, outcome.result)
422
850
 
851
+ # Terminal effect node: treat missing next_node as completion.
852
+ #
853
+ # Rationale: StepPlan.complete_output is evaluated *before* effects
854
+ # execute, so an effectful node cannot both execute an effect and
855
+ # complete the run in a single StepPlan. Allowing next_node=None
856
+ # makes "end on an effect node" valid (Blueprint-style UX).
423
857
  if not plan.next_node:
424
- raise ValueError(f"Node '{plan.node_id}' executed effect but did not specify next_node")
858
+ controlled = _abort_if_externally_controlled()
859
+ if controlled is not None:
860
+ return controlled
861
+ run.status = RunStatus.COMPLETED
862
+ run.output = {"success": True, "result": outcome.result}
863
+ run.updated_at = utc_now_iso()
864
+ self._run_store.save(run)
865
+ return run
866
+ controlled = _abort_if_externally_controlled()
867
+ if controlled is not None:
868
+ return controlled
425
869
  run.current_node = plan.next_node
426
870
  run.updated_at = utc_now_iso()
427
871
  self._run_store.save(run)
428
872
 
429
873
  return run
430
874
 
431
- def resume(self, *, workflow: WorkflowSpec, run_id: str, wait_key: Optional[str], payload: Dict[str, Any]) -> RunState:
875
+ def _maybe_record_tool_evidence(
876
+ self,
877
+ *,
878
+ run: RunState,
879
+ node_id: str,
880
+ effect: Effect,
881
+ tool_results: Optional[Dict[str, Any]],
882
+ ) -> None:
883
+ """Best-effort evidence capture for TOOL_CALLS.
884
+
885
+ This is intentionally non-fatal: evidence capture must not crash the run,
886
+ but failures should be visible in durable run state for debugging.
887
+ """
888
+ if effect.type != EffectType.TOOL_CALLS:
889
+ return
890
+ if not isinstance(tool_results, dict):
891
+ return
892
+ payload = effect.payload if isinstance(effect.payload, dict) else {}
893
+ tool_calls = payload.get("tool_calls")
894
+ if not isinstance(tool_calls, list) or not tool_calls:
895
+ return
896
+
897
+ artifact_store = self._artifact_store
898
+ if artifact_store is None:
899
+ return
900
+
901
+ try:
902
+ from ..evidence import EvidenceRecorder
903
+
904
+ EvidenceRecorder(artifact_store=artifact_store).record_tool_calls(
905
+ run=run,
906
+ node_id=str(node_id or ""),
907
+ tool_calls=list(tool_calls),
908
+ tool_results=tool_results,
909
+ )
910
+ except Exception as e:
911
+ runtime_ns = _ensure_runtime_namespace(run.vars)
912
+ warnings = runtime_ns.get("evidence_warnings")
913
+ if not isinstance(warnings, list):
914
+ warnings = []
915
+ runtime_ns["evidence_warnings"] = warnings
916
+ warnings.append({"ts": utc_now_iso(), "node_id": str(node_id or ""), "error": str(e)})
917
+
918
+ def resume(
919
+ self,
920
+ *,
921
+ workflow: WorkflowSpec,
922
+ run_id: str,
923
+ wait_key: Optional[str],
924
+ payload: Dict[str, Any],
925
+ max_steps: int = 100,
926
+ ) -> RunState:
432
927
  run = self.get_state(run_id)
928
+ if _is_paused_run_vars(run.vars):
929
+ raise ValueError("Run is paused")
433
930
  if run.status != RunStatus.WAITING or run.waiting is None:
434
931
  raise ValueError("Run is not waiting")
435
932
 
@@ -440,14 +937,101 @@ class Runtime:
440
937
  resume_to = run.waiting.resume_to_node
441
938
  result_key = run.waiting.result_key
442
939
 
940
+ # Keep track of what we actually persisted for this resume (tool resumes may
941
+ # merge blocked-by-allowlist entries back into the payload).
942
+ stored_payload: Dict[str, Any] = payload
943
+
443
944
  if result_key:
444
- _set_nested(run.vars, result_key, payload)
945
+ # Tool waits may carry blocked-by-allowlist metadata. External hosts typically only execute
946
+ # the filtered subset of tool calls and resume with results for those calls. To keep agent
947
+ # semantics correct (and evidence indices aligned), merge blocked entries back into the
948
+ # resumed payload deterministically.
949
+ merged_payload: Dict[str, Any] = payload
950
+ try:
951
+ details = run.waiting.details if run.waiting is not None else None
952
+ if isinstance(details, dict):
953
+ blocked = details.get("blocked_by_index")
954
+ original_count = details.get("original_call_count")
955
+ results = payload.get("results") if isinstance(payload, dict) else None
956
+ if (
957
+ isinstance(blocked, dict)
958
+ and isinstance(original_count, int)
959
+ and original_count > 0
960
+ and isinstance(results, list)
961
+ and len(results) != original_count
962
+ ):
963
+ merged_results: list[Any] = []
964
+ executed_iter = iter(results)
965
+
966
+ for idx in range(original_count):
967
+ blocked_entry = blocked.get(str(idx))
968
+ if isinstance(blocked_entry, dict):
969
+ merged_results.append(blocked_entry)
970
+ continue
971
+ try:
972
+ merged_results.append(next(executed_iter))
973
+ except StopIteration:
974
+ merged_results.append(
975
+ {
976
+ "call_id": "",
977
+ "name": "",
978
+ "success": False,
979
+ "output": None,
980
+ "error": "Missing tool result",
981
+ }
982
+ )
983
+
984
+ merged_payload = dict(payload)
985
+ merged_payload["results"] = merged_results
986
+ merged_payload.setdefault("mode", "executed")
987
+ except Exception:
988
+ merged_payload = payload
989
+
990
+ _set_nested(run.vars, result_key, merged_payload)
991
+ stored_payload = merged_payload
992
+ # Passthrough tool execution: the host resumes with tool results. We still want
993
+ # evidence capture and payload-bounding (store large parts as artifacts) before
994
+ # the run continues.
995
+ try:
996
+ details = run.waiting.details if run.waiting is not None else None
997
+ tool_calls_for_evidence = None
998
+ if isinstance(details, dict):
999
+ tool_calls_for_evidence = details.get("tool_calls_for_evidence")
1000
+ if not isinstance(tool_calls_for_evidence, list):
1001
+ tool_calls_for_evidence = details.get("tool_calls")
1002
+
1003
+ if isinstance(tool_calls_for_evidence, list):
1004
+ from ..evidence import EvidenceRecorder
1005
+
1006
+ artifact_store = self._artifact_store
1007
+ if artifact_store is not None and isinstance(payload, dict):
1008
+ EvidenceRecorder(artifact_store=artifact_store).record_tool_calls(
1009
+ run=run,
1010
+ node_id=str(run.current_node or ""),
1011
+ tool_calls=list(tool_calls_for_evidence or []),
1012
+ tool_results=merged_payload,
1013
+ )
1014
+ except Exception:
1015
+ pass
1016
+
1017
+ # Terminal waiting node: if there is no resume target, treat the resume payload as
1018
+ # the final output instead of re-executing the waiting node again (which would
1019
+ # otherwise create an infinite wait/resume loop).
1020
+ if resume_to is None:
1021
+ run.status = RunStatus.COMPLETED
1022
+ run.waiting = None
1023
+ run.output = {"success": True, "result": stored_payload}
1024
+ run.updated_at = utc_now_iso()
1025
+ self._run_store.save(run)
1026
+ return run
445
1027
 
446
1028
  self._apply_resume_payload(run, payload=payload, override_node=resume_to)
447
1029
  run.updated_at = utc_now_iso()
448
1030
  self._run_store.save(run)
449
1031
 
450
- return self.tick(workflow=workflow, run_id=run_id)
1032
+ if max_steps <= 0:
1033
+ return run
1034
+ return self.tick(workflow=workflow, run_id=run_id, max_steps=max_steps)
451
1035
 
452
1036
  # ---------------------------------------------------------------------
453
1037
  # Internals
@@ -457,8 +1041,88 @@ class Runtime:
457
1041
  self._handlers[EffectType.WAIT_EVENT] = self._handle_wait_event
458
1042
  self._handlers[EffectType.WAIT_UNTIL] = self._handle_wait_until
459
1043
  self._handlers[EffectType.ASK_USER] = self._handle_ask_user
1044
+ self._handlers[EffectType.ANSWER_USER] = self._handle_answer_user
1045
+ self._handlers[EffectType.EMIT_EVENT] = self._handle_emit_event
1046
+ self._handlers[EffectType.MEMORY_QUERY] = self._handle_memory_query
1047
+ self._handlers[EffectType.MEMORY_TAG] = self._handle_memory_tag
1048
+ self._handlers[EffectType.MEMORY_COMPACT] = self._handle_memory_compact
1049
+ self._handlers[EffectType.MEMORY_NOTE] = self._handle_memory_note
1050
+ self._handlers[EffectType.MEMORY_REHYDRATE] = self._handle_memory_rehydrate
1051
+ self._handlers[EffectType.VARS_QUERY] = self._handle_vars_query
460
1052
  self._handlers[EffectType.START_SUBWORKFLOW] = self._handle_start_subworkflow
461
1053
 
1054
+ # Built-in memory helpers ------------------------------------------------
1055
+
1056
+ def _global_memory_run_id(self) -> str:
1057
+ """Return the global memory run id (stable).
1058
+
1059
+ Hosts can override via `ABSTRACTRUNTIME_GLOBAL_MEMORY_RUN_ID`.
1060
+ """
1061
+ rid = os.environ.get("ABSTRACTRUNTIME_GLOBAL_MEMORY_RUN_ID")
1062
+ rid = str(rid or "").strip()
1063
+ if rid and _SAFE_RUN_ID_PATTERN.match(rid):
1064
+ return rid
1065
+ return _DEFAULT_GLOBAL_MEMORY_RUN_ID
1066
+
1067
+ def _ensure_global_memory_run(self) -> RunState:
1068
+ """Load or create the global memory run used as the owner for `scope="global"` spans."""
1069
+ rid = self._global_memory_run_id()
1070
+ existing = self._run_store.load(rid)
1071
+ if existing is not None:
1072
+ return existing
1073
+
1074
+ run = RunState(
1075
+ run_id=rid,
1076
+ workflow_id="__global_memory__",
1077
+ status=RunStatus.COMPLETED,
1078
+ current_node="done",
1079
+ vars={
1080
+ "context": {"task": "", "messages": []},
1081
+ "scratchpad": {},
1082
+ "_runtime": {"memory_spans": []},
1083
+ "_temp": {},
1084
+ "_limits": {},
1085
+ },
1086
+ waiting=None,
1087
+ output={"messages": []},
1088
+ error=None,
1089
+ created_at=utc_now_iso(),
1090
+ updated_at=utc_now_iso(),
1091
+ actor_id=None,
1092
+ session_id=None,
1093
+ parent_run_id=None,
1094
+ )
1095
+ self._run_store.save(run)
1096
+ return run
1097
+
1098
+ def _resolve_session_root_run(self, run: RunState) -> RunState:
1099
+ """Resolve the root run of the current run-tree (walk `parent_run_id`)."""
1100
+ cur = run
1101
+ seen: set[str] = set()
1102
+ while True:
1103
+ parent_id = getattr(cur, "parent_run_id", None)
1104
+ if not isinstance(parent_id, str) or not parent_id.strip():
1105
+ return cur
1106
+ pid = parent_id.strip()
1107
+ if pid in seen:
1108
+ # Defensive: break cycles.
1109
+ return cur
1110
+ seen.add(pid)
1111
+ parent = self._run_store.load(pid)
1112
+ if parent is None:
1113
+ return cur
1114
+ cur = parent
1115
+
1116
+ def _resolve_scope_owner_run(self, base_run: RunState, *, scope: str) -> RunState:
1117
+ s = str(scope or "").strip().lower() or "run"
1118
+ if s == "run":
1119
+ return base_run
1120
+ if s == "session":
1121
+ return self._resolve_session_root_run(base_run)
1122
+ if s == "global":
1123
+ return self._ensure_global_memory_run()
1124
+ raise ValueError(f"Unknown memory scope: {scope}")
1125
+
462
1126
  def _find_prior_completed_result(
463
1127
  self, run_id: str, idempotency_key: str
464
1128
  ) -> Optional[Dict[str, Any]]:
@@ -579,16 +1243,226 @@ class Runtime:
579
1243
  def _handle_wait_event(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
580
1244
  wait_key = effect.payload.get("wait_key")
581
1245
  if not wait_key:
582
- return EffectOutcome.failed("wait_event requires payload.wait_key")
1246
+ # Allow structured payloads (scope/name) so hosts can compute stable keys.
1247
+ scope = effect.payload.get("scope", "session")
1248
+ name = effect.payload.get("name") or effect.payload.get("event_name")
1249
+ if not isinstance(name, str) or not name.strip():
1250
+ return EffectOutcome.failed("wait_event requires payload.wait_key or payload.name")
1251
+
1252
+ session_id = effect.payload.get("session_id") or run.session_id or run.run_id
1253
+ try:
1254
+ wait_key = build_event_wait_key(
1255
+ scope=str(scope or "session"),
1256
+ name=str(name),
1257
+ session_id=str(session_id) if session_id is not None else None,
1258
+ workflow_id=run.workflow_id,
1259
+ run_id=run.run_id,
1260
+ )
1261
+ except Exception as e:
1262
+ return EffectOutcome.failed(f"wait_event invalid payload: {e}")
583
1263
  resume_to = effect.payload.get("resume_to_node") or default_next_node
1264
+ # Optional UX metadata for hosts:
1265
+ # - "prompt"/"choices"/"allow_free_text" enable durable human-in-the-loop
1266
+ # waits using EVENT as the wakeup mechanism (useful for thin clients).
1267
+ prompt: Optional[str] = None
1268
+ try:
1269
+ p = effect.payload.get("prompt")
1270
+ if isinstance(p, str) and p.strip():
1271
+ prompt = p
1272
+ except Exception:
1273
+ prompt = None
1274
+
1275
+ choices: Optional[List[str]] = None
1276
+ try:
1277
+ raw_choices = effect.payload.get("choices")
1278
+ if isinstance(raw_choices, list):
1279
+ normalized: List[str] = []
1280
+ for c in raw_choices:
1281
+ if isinstance(c, str) and c.strip():
1282
+ normalized.append(c.strip())
1283
+ choices = normalized
1284
+ except Exception:
1285
+ choices = None
1286
+
1287
+ allow_free_text = True
1288
+ try:
1289
+ aft = effect.payload.get("allow_free_text")
1290
+ if aft is None:
1291
+ aft = effect.payload.get("allowFreeText")
1292
+ if aft is not None:
1293
+ allow_free_text = bool(aft)
1294
+ except Exception:
1295
+ allow_free_text = True
1296
+
1297
+ details = None
1298
+ try:
1299
+ d = effect.payload.get("details")
1300
+ if isinstance(d, dict):
1301
+ details = dict(d)
1302
+ except Exception:
1303
+ details = None
584
1304
  wait = WaitState(
585
1305
  reason=WaitReason.EVENT,
586
1306
  wait_key=str(wait_key),
587
1307
  resume_to_node=resume_to,
588
1308
  result_key=effect.result_key,
1309
+ prompt=prompt,
1310
+ choices=choices,
1311
+ allow_free_text=allow_free_text,
1312
+ details=details,
589
1313
  )
590
1314
  return EffectOutcome.waiting(wait)
591
1315
 
1316
+ def _handle_emit_event(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
1317
+ """Emit a durable event and resume matching WAIT_EVENT runs.
1318
+
1319
+ Payload:
1320
+ - name: str (required) event name
1321
+ - scope: str (optional, default "session") "session" | "workflow" | "run" | "global"
1322
+ - session_id: str (optional) target session id (for cross-workflow targeted delivery)
1323
+ - payload: dict (optional) event payload delivered to listeners
1324
+ - max_steps: int (optional, default 100) tick budget per resumed run
1325
+
1326
+ Notes:
1327
+ - This is durable because it resumes WAIT_EVENT runs via Runtime.resume(), which
1328
+ checkpoints run state and appends ledger records for subsequent steps.
1329
+ - Delivery is best-effort and at-least-once; listeners should be idempotent if needed.
1330
+ """
1331
+ name = effect.payload.get("name") or effect.payload.get("event_name")
1332
+ if not isinstance(name, str) or not name.strip():
1333
+ return EffectOutcome.failed("emit_event requires payload.name")
1334
+
1335
+ scope = effect.payload.get("scope", "session")
1336
+ target_session_id = effect.payload.get("session_id")
1337
+ payload = effect.payload.get("payload") or {}
1338
+ if not isinstance(payload, dict):
1339
+ payload = {"value": payload}
1340
+
1341
+ # NOTE: we intentionally resume listeners with max_steps=0 (no execution).
1342
+ # Hosts (web backend, workers, schedulers) should drive RUNNING runs and
1343
+ # stream their StepRecords deterministically (better observability and UX).
1344
+ try:
1345
+ max_steps = int(effect.payload.get("max_steps", 0) or 0)
1346
+ except Exception:
1347
+ max_steps = 0
1348
+ if max_steps < 0:
1349
+ max_steps = 0
1350
+
1351
+ # Determine target scope id (default: current session/run).
1352
+ session_id = target_session_id
1353
+ if session_id is None and str(scope or "session").strip().lower() == "session":
1354
+ session_id = run.session_id or run.run_id
1355
+
1356
+ try:
1357
+ wait_key = build_event_wait_key(
1358
+ scope=str(scope or "session"),
1359
+ name=str(name),
1360
+ session_id=str(session_id) if session_id is not None else None,
1361
+ workflow_id=run.workflow_id,
1362
+ run_id=run.run_id,
1363
+ )
1364
+ except Exception as e:
1365
+ return EffectOutcome.failed(f"emit_event invalid payload: {e}")
1366
+
1367
+ # Wildcard listeners ("*") receive all events within the same scope_id.
1368
+ wildcard_wait_key: Optional[str] = None
1369
+ try:
1370
+ wildcard_wait_key = build_event_wait_key(
1371
+ scope=str(scope or "session"),
1372
+ name="*",
1373
+ session_id=str(session_id) if session_id is not None else None,
1374
+ workflow_id=run.workflow_id,
1375
+ run_id=run.run_id,
1376
+ )
1377
+ except Exception:
1378
+ wildcard_wait_key = None
1379
+
1380
+ if self._workflow_registry is None:
1381
+ return EffectOutcome.failed(
1382
+ "emit_event requires a workflow_registry to resume target runs. "
1383
+ "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)."
1384
+ )
1385
+
1386
+ if not isinstance(self._run_store, QueryableRunStore):
1387
+ return EffectOutcome.failed(
1388
+ "emit_event requires a QueryableRunStore to find waiting runs. "
1389
+ "Use InMemoryRunStore/JsonFileRunStore or provide a queryable store."
1390
+ )
1391
+
1392
+ # Find all runs waiting for this event key.
1393
+ candidates = self._run_store.list_runs(
1394
+ status=RunStatus.WAITING,
1395
+ wait_reason=WaitReason.EVENT,
1396
+ limit=10_000,
1397
+ )
1398
+
1399
+ delivered_to: list[str] = []
1400
+ resumed: list[Dict[str, Any]] = []
1401
+ envelope = {
1402
+ "event_id": effect.payload.get("event_id") or None,
1403
+ "name": str(name),
1404
+ "scope": str(scope or "session"),
1405
+ "session_id": str(session_id) if session_id is not None else None,
1406
+ "payload": dict(payload),
1407
+ "emitted_at": utc_now_iso(),
1408
+ "emitter": {
1409
+ "run_id": run.run_id,
1410
+ "workflow_id": run.workflow_id,
1411
+ "node_id": run.current_node,
1412
+ },
1413
+ }
1414
+
1415
+ available_in_session: list[str] = []
1416
+ prefix = f"evt:session:{session_id}:"
1417
+
1418
+ for r in candidates:
1419
+ if _is_paused_run_vars(getattr(r, "vars", None)):
1420
+ continue
1421
+ w = getattr(r, "waiting", None)
1422
+ if w is None:
1423
+ continue
1424
+ wk = getattr(w, "wait_key", None)
1425
+ if isinstance(wk, str) and wk.startswith(prefix):
1426
+ # Help users debug name mismatches (best-effort).
1427
+ suffix = wk[len(prefix) :]
1428
+ if suffix and suffix not in available_in_session and len(available_in_session) < 15:
1429
+ available_in_session.append(suffix)
1430
+ if wk != wait_key and (wildcard_wait_key is None or wk != wildcard_wait_key):
1431
+ continue
1432
+
1433
+ wf = self._workflow_registry.get(r.workflow_id)
1434
+ if wf is None:
1435
+ # Can't resume without the spec; skip but include diagnostic in result.
1436
+ resumed.append({"run_id": r.run_id, "status": "skipped", "error": "workflow_not_registered"})
1437
+ continue
1438
+
1439
+ try:
1440
+ # Resume using the run's own wait_key (supports wildcard listeners).
1441
+ resume_key = wk if isinstance(wk, str) and wk else None
1442
+ new_state = self.resume(
1443
+ workflow=wf,
1444
+ run_id=r.run_id,
1445
+ wait_key=resume_key,
1446
+ payload=envelope,
1447
+ max_steps=max_steps,
1448
+ )
1449
+ delivered_to.append(r.run_id)
1450
+ resumed.append({"run_id": r.run_id, "status": new_state.status.value})
1451
+ except Exception as e:
1452
+ resumed.append({"run_id": r.run_id, "status": "error", "error": str(e)})
1453
+
1454
+ out: Dict[str, Any] = {
1455
+ "wait_key": wait_key,
1456
+ "name": str(name),
1457
+ "scope": str(scope or "session"),
1458
+ "delivered": len(delivered_to),
1459
+ "delivered_to": delivered_to,
1460
+ "resumed": resumed,
1461
+ }
1462
+ if not delivered_to and available_in_session:
1463
+ out["available_listeners_in_session"] = available_in_session
1464
+ return EffectOutcome.completed(out)
1465
+
592
1466
  def _handle_wait_until(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
593
1467
  until = effect.payload.get("until")
594
1468
  if not until:
@@ -628,6 +1502,22 @@ class Runtime:
628
1502
  )
629
1503
  return EffectOutcome.waiting(wait)
630
1504
 
1505
+ def _handle_answer_user(
1506
+ self, run: RunState, effect: Effect, default_next_node: Optional[str]
1507
+ ) -> EffectOutcome:
1508
+ """Handle ANSWER_USER effect.
1509
+
1510
+ This effect is intentionally non-blocking: it completes immediately and
1511
+ returns the message payload so the host UI can render it.
1512
+ """
1513
+ message = effect.payload.get("message")
1514
+ if message is None:
1515
+ # Backward/compat convenience aliases.
1516
+ message = effect.payload.get("text") or effect.payload.get("content")
1517
+ if message is None:
1518
+ return EffectOutcome.failed("answer_user requires payload.message")
1519
+ return EffectOutcome.completed({"message": str(message)})
1520
+
631
1521
  def _handle_start_subworkflow(
632
1522
  self, run: RunState, effect: Effect, default_next_node: Optional[str]
633
1523
  ) -> EffectOutcome:
@@ -664,6 +1554,8 @@ class Runtime:
664
1554
 
665
1555
  sub_vars = effect.payload.get("vars") or {}
666
1556
  is_async = bool(effect.payload.get("async", False))
1557
+ wait_for_completion = bool(effect.payload.get("wait", False))
1558
+ include_traces = bool(effect.payload.get("include_traces", False))
667
1559
  resume_to = effect.payload.get("resume_to_node") or default_next_node
668
1560
 
669
1561
  # Start the subworkflow with parent tracking
@@ -676,8 +1568,29 @@ class Runtime:
676
1568
  )
677
1569
 
678
1570
  if is_async:
679
- # Async mode: return immediately with sub_run_id
680
- # The child is started but not ticked - caller is responsible for driving it
1571
+ # Async mode: start the child and return immediately.
1572
+ #
1573
+ # If `wait=True`, we *also* transition the parent into a durable WAITING state
1574
+ # so a host (e.g. AbstractFlow WebSocket runner loop) can:
1575
+ # - tick the child run incrementally (and stream node traces in real time)
1576
+ # - resume the parent once the child completes (by calling runtime.resume(...))
1577
+ #
1578
+ # Without `wait=True`, this remains fire-and-forget.
1579
+ if wait_for_completion:
1580
+ wait = WaitState(
1581
+ reason=WaitReason.SUBWORKFLOW,
1582
+ wait_key=f"subworkflow:{sub_run_id}",
1583
+ resume_to_node=resume_to,
1584
+ result_key=effect.result_key,
1585
+ details={
1586
+ "sub_run_id": sub_run_id,
1587
+ "sub_workflow_id": workflow_id,
1588
+ "async": True,
1589
+ },
1590
+ )
1591
+ return EffectOutcome.waiting(wait)
1592
+
1593
+ # Fire-and-forget: caller is responsible for driving/observing the child.
681
1594
  return EffectOutcome.completed({"sub_run_id": sub_run_id, "async": True})
682
1595
 
683
1596
  # Sync mode: run the subworkflow until completion or waiting
@@ -689,10 +1602,10 @@ class Runtime:
689
1602
 
690
1603
  if sub_state.status == RunStatus.COMPLETED:
691
1604
  # Subworkflow completed - return its output
692
- return EffectOutcome.completed({
693
- "sub_run_id": sub_run_id,
694
- "output": sub_state.output,
695
- })
1605
+ result: Dict[str, Any] = {"sub_run_id": sub_run_id, "output": sub_state.output}
1606
+ if include_traces:
1607
+ result["node_traces"] = self.get_node_traces(sub_run_id)
1608
+ return EffectOutcome.completed(result)
696
1609
 
697
1610
  if sub_state.status == RunStatus.FAILED:
698
1611
  # Subworkflow failed - propagate error
@@ -721,6 +1634,1523 @@ class Runtime:
721
1634
  # Unexpected status
722
1635
  return EffectOutcome.failed(f"Unexpected subworkflow status: {sub_state.status.value}")
723
1636
 
1637
+ # Built-in memory handlers ---------------------------------------------
1638
+
1639
+ def _handle_memory_query(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
1640
+ """Handle MEMORY_QUERY.
1641
+
1642
+ This effect supports provenance-first recall over archived memory spans stored in ArtifactStore.
1643
+ It is intentionally metadata-first and embedding-free (semantic retrieval belongs in AbstractMemory).
1644
+
1645
+ Payload (all optional unless otherwise stated):
1646
+ - span_id: str | int | list[str|int] (artifact_id or 1-based index into _runtime.memory_spans)
1647
+ - query: str (keyword substring match)
1648
+ - since: str (ISO8601, span intersection filter)
1649
+ - until: str (ISO8601, span intersection filter)
1650
+ - tags: dict[str, str|list[str]] (span tag filter; values can be multi-valued)
1651
+ - tags_mode: "all"|"any" (default "all"; AND/OR across tag keys)
1652
+ - authors: list[str] (alias: usernames; matches span.created_by case-insensitively)
1653
+ - locations: list[str] (matches span.location case-insensitively)
1654
+ - limit_spans: int (default 5)
1655
+ - deep: bool (default True when query is set; scans archived messages)
1656
+ - deep_limit_spans: int (default 50)
1657
+ - deep_limit_messages_per_span: int (default 400)
1658
+ - connected: bool (include connected spans via time adjacency + shared tags)
1659
+ - neighbor_hops: int (default 1 when connected=True)
1660
+ - connect_by: list[str] (default ["topic","person"])
1661
+ - max_messages: int (default 80; total messages rendered across all spans)
1662
+ - tool_name: str (default "recall_memory"; for formatting)
1663
+ - call_id: str (tool-call id passthrough)
1664
+ """
1665
+ from .vars import ensure_namespaces, parse_vars_path, resolve_vars_path
1666
+
1667
+ ensure_namespaces(run.vars)
1668
+ runtime_ns = run.vars.get("_runtime")
1669
+ if not isinstance(runtime_ns, dict):
1670
+ runtime_ns = {}
1671
+ run.vars["_runtime"] = runtime_ns
1672
+
1673
+ artifact_store = self._artifact_store
1674
+ if artifact_store is None:
1675
+ return EffectOutcome.failed(
1676
+ "MEMORY_QUERY requires an ArtifactStore; configure runtime.set_artifact_store(...)"
1677
+ )
1678
+
1679
+ payload = dict(effect.payload or {})
1680
+ tool_name = str(payload.get("tool_name") or "recall_memory")
1681
+ call_id = str(payload.get("call_id") or "memory")
1682
+
1683
+ # Scope routing (run-tree/global). Scope affects which run owns the span index queried.
1684
+ scope = str(payload.get("scope") or "run").strip().lower() or "run"
1685
+ if scope not in {"run", "session", "global", "all"}:
1686
+ return EffectOutcome.failed(f"Unknown memory_query scope: {scope}")
1687
+
1688
+ # Return mode controls whether we include structured meta in the tool result.
1689
+ return_mode = str(payload.get("return") or payload.get("return_mode") or "rendered").strip().lower() or "rendered"
1690
+ if return_mode not in {"rendered", "meta", "both"}:
1691
+ return EffectOutcome.failed(f"Unknown memory_query return mode: {return_mode}")
1692
+
1693
+ query = payload.get("query")
1694
+ query_text = str(query or "").strip()
1695
+ since = payload.get("since")
1696
+ until = payload.get("until")
1697
+ tags = payload.get("tags")
1698
+ tags_dict: Optional[Dict[str, Any]] = None
1699
+ if isinstance(tags, dict):
1700
+ # Accept str or list[str] values. Ignore reserved key "kind".
1701
+ out_tags: Dict[str, Any] = {}
1702
+ for k, v in tags.items():
1703
+ if not isinstance(k, str) or not k.strip():
1704
+ continue
1705
+ if k == "kind":
1706
+ continue
1707
+ if isinstance(v, str) and v.strip():
1708
+ out_tags[k.strip()] = v.strip()
1709
+ elif isinstance(v, (list, tuple)):
1710
+ vals = [str(x).strip() for x in v if isinstance(x, str) and str(x).strip()]
1711
+ if vals:
1712
+ out_tags[k.strip()] = vals
1713
+ tags_dict = out_tags or None
1714
+
1715
+ tags_mode_raw = payload.get("tags_mode")
1716
+ if tags_mode_raw is None:
1717
+ tags_mode_raw = payload.get("tagsMode")
1718
+ if tags_mode_raw is None:
1719
+ tags_mode_raw = payload.get("tag_mode")
1720
+ tags_mode = str(tags_mode_raw or "all").strip().lower() or "all"
1721
+ if tags_mode in {"and"}:
1722
+ tags_mode = "all"
1723
+ if tags_mode in {"or"}:
1724
+ tags_mode = "any"
1725
+ if tags_mode not in {"all", "any"}:
1726
+ tags_mode = "all"
1727
+
1728
+ def _norm_str_list(value: Any) -> list[str]:
1729
+ if value is None:
1730
+ return []
1731
+ if isinstance(value, str):
1732
+ v = value.strip()
1733
+ return [v] if v else []
1734
+ if not isinstance(value, list):
1735
+ return []
1736
+ out: list[str] = []
1737
+ for x in value:
1738
+ if isinstance(x, str) and x.strip():
1739
+ out.append(x.strip())
1740
+ # preserve order but dedup (case-insensitive)
1741
+ seen: set[str] = set()
1742
+ deduped: list[str] = []
1743
+ for s in out:
1744
+ key = s.lower()
1745
+ if key in seen:
1746
+ continue
1747
+ seen.add(key)
1748
+ deduped.append(s)
1749
+ return deduped
1750
+
1751
+ authors = _norm_str_list(payload.get("authors") if "authors" in payload else payload.get("usernames"))
1752
+ if not authors:
1753
+ authors = _norm_str_list(payload.get("users"))
1754
+ locations = _norm_str_list(payload.get("locations") if "locations" in payload else payload.get("location"))
1755
+
1756
+ try:
1757
+ limit_spans = int(payload.get("limit_spans", 5) or 5)
1758
+ except Exception:
1759
+ limit_spans = 5
1760
+ if limit_spans < 1:
1761
+ limit_spans = 1
1762
+
1763
+ deep = payload.get("deep")
1764
+ if deep is None:
1765
+ deep_enabled = bool(query_text)
1766
+ else:
1767
+ deep_enabled = bool(deep)
1768
+
1769
+ try:
1770
+ deep_limit_spans = int(payload.get("deep_limit_spans", 50) or 50)
1771
+ except Exception:
1772
+ deep_limit_spans = 50
1773
+ if deep_limit_spans < 1:
1774
+ deep_limit_spans = 1
1775
+
1776
+ try:
1777
+ deep_limit_messages_per_span = int(payload.get("deep_limit_messages_per_span", 400) or 400)
1778
+ except Exception:
1779
+ deep_limit_messages_per_span = 400
1780
+ if deep_limit_messages_per_span < 1:
1781
+ deep_limit_messages_per_span = 1
1782
+
1783
+ connected = bool(payload.get("connected", False))
1784
+ try:
1785
+ neighbor_hops = int(payload.get("neighbor_hops", 1) or 1)
1786
+ except Exception:
1787
+ neighbor_hops = 1
1788
+ if neighbor_hops < 0:
1789
+ neighbor_hops = 0
1790
+
1791
+ connect_by = payload.get("connect_by")
1792
+ if isinstance(connect_by, list):
1793
+ connect_keys = [str(x) for x in connect_by if isinstance(x, (str, int, float)) and str(x).strip()]
1794
+ else:
1795
+ connect_keys = ["topic", "person"]
1796
+
1797
+ try:
1798
+ max_messages = int(payload.get("max_messages", -1) or -1)
1799
+ except Exception:
1800
+ max_messages = -1
1801
+ # `-1` means "no truncation" for rendered messages.
1802
+ if max_messages < -1:
1803
+ max_messages = -1
1804
+ if max_messages != -1 and max_messages < 1:
1805
+ max_messages = 1
1806
+
1807
+ from ..memory.active_context import ActiveContextPolicy, TimeRange
1808
+
1809
+ # Select run(s) to query.
1810
+ runs_to_query: list[RunState] = []
1811
+ if scope == "run":
1812
+ runs_to_query = [run]
1813
+ elif scope == "session":
1814
+ runs_to_query = [self._resolve_scope_owner_run(run, scope="session")]
1815
+ elif scope == "global":
1816
+ runs_to_query = [self._resolve_scope_owner_run(run, scope="global")]
1817
+ else: # all
1818
+ # Deterministic order; dedup by run_id.
1819
+ root = self._resolve_scope_owner_run(run, scope="session")
1820
+ global_run = self._resolve_scope_owner_run(run, scope="global")
1821
+ seen_ids: set[str] = set()
1822
+ for r in (run, root, global_run):
1823
+ if r.run_id in seen_ids:
1824
+ continue
1825
+ seen_ids.add(r.run_id)
1826
+ runs_to_query.append(r)
1827
+
1828
+ # Collect per-run span indexes (metadata) and summary maps for rendering.
1829
+ spans_by_run_id: dict[str, list[dict[str, Any]]] = {}
1830
+ all_spans: list[dict[str, Any]] = []
1831
+ all_summary_by_artifact: dict[str, str] = {}
1832
+ for target in runs_to_query:
1833
+ spans = ActiveContextPolicy.list_memory_spans_from_run(target)
1834
+ # `memory_spans` is a general span-like index (conversation spans, notes, evidence, etc).
1835
+ # MEMORY_QUERY is specifically for provenance-first *memory recall*, not evidence retrieval.
1836
+ spans = [s for s in spans if not (isinstance(s, dict) and str(s.get("kind") or "") == "evidence")]
1837
+ spans_by_run_id[target.run_id] = spans
1838
+ all_spans.extend([dict(s) for s in spans if isinstance(s, dict)])
1839
+ all_summary_by_artifact.update(ActiveContextPolicy.summary_text_by_artifact_id_from_run(target))
1840
+
1841
+ # Resolve explicit span ids if provided.
1842
+ span_id_payload = payload.get("span_id")
1843
+ span_ids_payload = payload.get("span_ids")
1844
+ explicit_ids = span_ids_payload if isinstance(span_ids_payload, list) else span_id_payload
1845
+
1846
+ all_selected: list[str] = []
1847
+
1848
+ if explicit_ids is not None:
1849
+ explicit_list = list(explicit_ids) if isinstance(explicit_ids, list) else [explicit_ids]
1850
+
1851
+ # Indices are inherently scoped to a single run's span list; for `scope="all"`,
1852
+ # require stable artifact ids to avoid ambiguity.
1853
+ if scope == "all":
1854
+ for x in explicit_list:
1855
+ if isinstance(x, int):
1856
+ return EffectOutcome.failed("memory_query scope='all' requires explicit span_ids as artifact ids (no indices)")
1857
+ if isinstance(x, str) and x.strip().isdigit():
1858
+ return EffectOutcome.failed("memory_query scope='all' requires explicit span_ids as artifact ids (no indices)")
1859
+ # Treat as artifact ids.
1860
+ all_selected = _dedup_preserve_order([str(x).strip() for x in explicit_list if str(x).strip()])
1861
+ else:
1862
+ # Single-run resolution for indices.
1863
+ target = runs_to_query[0]
1864
+ spans = spans_by_run_id.get(target.run_id, [])
1865
+ all_selected = ActiveContextPolicy.resolve_span_ids_from_spans(explicit_list, spans)
1866
+ else:
1867
+ # Filter spans per target and union.
1868
+ time_range = None
1869
+ if since or until:
1870
+ time_range = TimeRange(
1871
+ start=str(since) if since else None,
1872
+ end=str(until) if until else None,
1873
+ )
1874
+
1875
+ for target in runs_to_query:
1876
+ spans = spans_by_run_id.get(target.run_id, [])
1877
+ matches = ActiveContextPolicy.filter_spans_from_run(
1878
+ target,
1879
+ artifact_store=artifact_store,
1880
+ time_range=time_range,
1881
+ tags=tags_dict,
1882
+ tags_mode=tags_mode,
1883
+ authors=authors or None,
1884
+ locations=locations or None,
1885
+ query=query_text or None,
1886
+ limit=limit_spans,
1887
+ )
1888
+ selected = [str(s.get("artifact_id") or "") for s in matches if isinstance(s, dict) and s.get("artifact_id")]
1889
+
1890
+ if deep_enabled and query_text:
1891
+ # Deep scan is bounded and should respect metadata filters (tags/authors/locations/time).
1892
+ deep_candidates = ActiveContextPolicy.filter_spans_from_run(
1893
+ target,
1894
+ artifact_store=artifact_store,
1895
+ time_range=time_range,
1896
+ tags=tags_dict,
1897
+ tags_mode=tags_mode,
1898
+ authors=authors or None,
1899
+ locations=locations or None,
1900
+ query=None,
1901
+ limit=deep_limit_spans,
1902
+ )
1903
+ selected = _dedup_preserve_order(
1904
+ selected
1905
+ + _deep_scan_span_ids(
1906
+ spans=deep_candidates,
1907
+ artifact_store=artifact_store,
1908
+ query=query_text,
1909
+ limit_spans=deep_limit_spans,
1910
+ limit_messages_per_span=deep_limit_messages_per_span,
1911
+ )
1912
+ )
1913
+
1914
+ if connected and selected:
1915
+ connect_candidates = ActiveContextPolicy.filter_spans_from_run(
1916
+ target,
1917
+ artifact_store=artifact_store,
1918
+ time_range=time_range,
1919
+ tags=tags_dict,
1920
+ tags_mode=tags_mode,
1921
+ authors=authors or None,
1922
+ locations=locations or None,
1923
+ query=None,
1924
+ limit=max(1000, len(spans)),
1925
+ )
1926
+ selected = _dedup_preserve_order(
1927
+ _expand_connected_span_ids(
1928
+ spans=connect_candidates,
1929
+ seed_artifact_ids=selected,
1930
+ connect_keys=connect_keys,
1931
+ neighbor_hops=neighbor_hops,
1932
+ limit=max(limit_spans, len(selected)),
1933
+ )
1934
+ )
1935
+
1936
+ all_selected = _dedup_preserve_order(all_selected + selected)
1937
+
1938
+ rendered_text = ""
1939
+ if return_mode in {"rendered", "both"}:
1940
+ # Render output (provenance + messages). Note: this may load artifacts.
1941
+ rendered_text = _render_memory_query_output(
1942
+ spans=all_spans,
1943
+ artifact_store=artifact_store,
1944
+ selected_artifact_ids=all_selected,
1945
+ summary_by_artifact=all_summary_by_artifact,
1946
+ max_messages=max_messages,
1947
+ )
1948
+
1949
+ # Structured meta output (for deterministic workflows).
1950
+ meta: dict[str, Any] = {}
1951
+ if return_mode in {"meta", "both"}:
1952
+ # Index span record by artifact id (first match wins deterministically).
1953
+ by_artifact: dict[str, dict[str, Any]] = {}
1954
+ for s in all_spans:
1955
+ try:
1956
+ aid = str(s.get("artifact_id") or "").strip()
1957
+ except Exception:
1958
+ aid = ""
1959
+ if not aid or aid in by_artifact:
1960
+ continue
1961
+ by_artifact[aid] = s
1962
+
1963
+ matches: list[dict[str, Any]] = []
1964
+ for aid in all_selected:
1965
+ span = by_artifact.get(aid)
1966
+ if not isinstance(span, dict):
1967
+ continue
1968
+ m: dict[str, Any] = {
1969
+ "span_id": aid,
1970
+ "kind": span.get("kind"),
1971
+ "created_at": span.get("created_at"),
1972
+ "from_timestamp": span.get("from_timestamp"),
1973
+ "to_timestamp": span.get("to_timestamp"),
1974
+ "tags": span.get("tags") if isinstance(span.get("tags"), dict) else {},
1975
+ }
1976
+ for k in ("created_by", "location"):
1977
+ if k in span:
1978
+ m[k] = span.get(k)
1979
+ # Include known preview fields without enforcing a global schema.
1980
+ for k in ("note_preview", "message_count", "summary_message_id"):
1981
+ if k in span:
1982
+ m[k] = span.get(k)
1983
+ matches.append(m)
1984
+
1985
+ meta = {"matches": matches, "span_ids": list(all_selected)}
1986
+
1987
+ result = {
1988
+ "mode": "executed",
1989
+ "results": [
1990
+ {
1991
+ "call_id": call_id,
1992
+ "name": tool_name,
1993
+ "success": True,
1994
+ "output": rendered_text if return_mode in {"rendered", "both"} else "",
1995
+ "error": None,
1996
+ "meta": meta if meta else None,
1997
+ }
1998
+ ],
1999
+ }
2000
+ return EffectOutcome.completed(result=result)
2001
+
2002
+ def _handle_vars_query(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2003
+ """Handle VARS_QUERY.
2004
+
2005
+ This is a JSON-safe, runtime-owned introspection primitive intended for:
2006
+ - progressive recall/debugging (e.g., inspect `scratchpad`)
2007
+ - host tooling parity (schema-only tools that map to runtime effects)
2008
+
2009
+ Payload (all optional unless stated):
2010
+ - path: str (default "scratchpad"; supports dot path or JSON pointer "/a/b/0")
2011
+ - keys_only: bool (default False; when True, return keys/length instead of full value)
2012
+ - target_run_id: str (optional; inspect another run state)
2013
+ - tool_name: str (default "inspect_vars"; for tool-style output)
2014
+ - call_id: str (tool-call id passthrough)
2015
+ """
2016
+ import json
2017
+
2018
+ from .vars import ensure_namespaces, parse_vars_path, resolve_vars_path
2019
+
2020
+ payload = dict(effect.payload or {})
2021
+ tool_name = str(payload.get("tool_name") or "inspect_vars")
2022
+ call_id = str(payload.get("call_id") or "vars")
2023
+
2024
+ target_run_id = payload.get("target_run_id")
2025
+ if target_run_id is not None:
2026
+ target_run_id = str(target_run_id).strip() or None
2027
+
2028
+ path = payload.get("path")
2029
+ if path is None:
2030
+ path = payload.get("var_path")
2031
+ path_text = str(path or "").strip() or "scratchpad"
2032
+
2033
+ keys_only = bool(payload.get("keys_only", False))
2034
+
2035
+ target_run = run
2036
+ if target_run_id and target_run_id != run.run_id:
2037
+ loaded = self._run_store.load(target_run_id)
2038
+ if loaded is None:
2039
+ return EffectOutcome.completed(
2040
+ result={
2041
+ "mode": "executed",
2042
+ "results": [
2043
+ {
2044
+ "call_id": call_id,
2045
+ "name": tool_name,
2046
+ "success": False,
2047
+ "output": None,
2048
+ "error": f"Unknown target_run_id: {target_run_id}",
2049
+ }
2050
+ ],
2051
+ }
2052
+ )
2053
+ target_run = loaded
2054
+
2055
+ ensure_namespaces(target_run.vars)
2056
+
2057
+ try:
2058
+ tokens = parse_vars_path(path_text)
2059
+ value = resolve_vars_path(target_run.vars, tokens)
2060
+ except Exception as e:
2061
+ return EffectOutcome.completed(
2062
+ result={
2063
+ "mode": "executed",
2064
+ "results": [
2065
+ {
2066
+ "call_id": call_id,
2067
+ "name": tool_name,
2068
+ "success": False,
2069
+ "output": None,
2070
+ "error": str(e),
2071
+ }
2072
+ ],
2073
+ }
2074
+ )
2075
+
2076
+ out: Dict[str, Any] = {"path": path_text, "type": type(value).__name__}
2077
+ if keys_only:
2078
+ if isinstance(value, dict):
2079
+ out["keys"] = sorted([str(k) for k in value.keys()])
2080
+ elif isinstance(value, list):
2081
+ out["length"] = len(value)
2082
+ else:
2083
+ out["value"] = value
2084
+ else:
2085
+ out["value"] = value
2086
+
2087
+ text = json.dumps(out, ensure_ascii=False, indent=2, sort_keys=True, default=str)
2088
+
2089
+ return EffectOutcome.completed(
2090
+ result={
2091
+ "mode": "executed",
2092
+ "results": [
2093
+ {
2094
+ "call_id": call_id,
2095
+ "name": tool_name,
2096
+ "success": True,
2097
+ "output": text,
2098
+ "error": None,
2099
+ }
2100
+ ],
2101
+ }
2102
+ )
2103
+
2104
+ def _handle_memory_tag(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2105
+ """Handle MEMORY_TAG.
2106
+
2107
+ Payload (required unless stated):
2108
+ - span_id: str | int (artifact_id or 1-based index into `_runtime.memory_spans`)
2109
+ - tags: dict[str,str] (merged into span["tags"] by default)
2110
+ - merge: bool (optional, default True; when False, replaces span["tags"])
2111
+ - tool_name: str (optional; for tool-style output, default "remember")
2112
+ - call_id: str (optional; passthrough for tool-style output)
2113
+
2114
+ Notes:
2115
+ - This mutates the in-run span index (`_runtime.memory_spans`) only; it does not change artifacts.
2116
+ - Tagging is intentionally JSON-safe (string->string).
2117
+ """
2118
+ import json
2119
+
2120
+ from .vars import ensure_namespaces
2121
+
2122
+ ensure_namespaces(run.vars)
2123
+ runtime_ns = run.vars.get("_runtime")
2124
+ if not isinstance(runtime_ns, dict):
2125
+ runtime_ns = {}
2126
+ run.vars["_runtime"] = runtime_ns
2127
+
2128
+ spans = runtime_ns.get("memory_spans")
2129
+ if not isinstance(spans, list):
2130
+ return EffectOutcome.failed("MEMORY_TAG requires _runtime.memory_spans to be a list")
2131
+
2132
+ payload = dict(effect.payload or {})
2133
+ tool_name = str(payload.get("tool_name") or "remember")
2134
+ call_id = str(payload.get("call_id") or "memory")
2135
+
2136
+ span_id = payload.get("span_id")
2137
+ tags = payload.get("tags")
2138
+ if span_id is None:
2139
+ return EffectOutcome.failed("MEMORY_TAG requires payload.span_id")
2140
+ if not isinstance(tags, dict) or not tags:
2141
+ return EffectOutcome.failed("MEMORY_TAG requires payload.tags as a non-empty dict[str,str]")
2142
+
2143
+ merge = bool(payload.get("merge", True))
2144
+
2145
+ clean_tags: Dict[str, str] = {}
2146
+ for k, v in tags.items():
2147
+ if isinstance(k, str) and isinstance(v, str) and k and v:
2148
+ clean_tags[k] = v
2149
+ if not clean_tags:
2150
+ return EffectOutcome.failed("MEMORY_TAG requires at least one non-empty string tag")
2151
+
2152
+ artifact_id: Optional[str] = None
2153
+ target_index: Optional[int] = None
2154
+
2155
+ if isinstance(span_id, int):
2156
+ idx = span_id - 1
2157
+ if idx < 0 or idx >= len(spans):
2158
+ return EffectOutcome.failed(f"Unknown span index: {span_id}")
2159
+ span = spans[idx]
2160
+ if not isinstance(span, dict):
2161
+ return EffectOutcome.failed(f"Invalid span record at index {span_id}")
2162
+ artifact_id = str(span.get("artifact_id") or "").strip() or None
2163
+ target_index = idx
2164
+ elif isinstance(span_id, str):
2165
+ s = span_id.strip()
2166
+ if not s:
2167
+ return EffectOutcome.failed("MEMORY_TAG requires a non-empty span_id")
2168
+ if s.isdigit():
2169
+ idx = int(s) - 1
2170
+ if idx < 0 or idx >= len(spans):
2171
+ return EffectOutcome.failed(f"Unknown span index: {s}")
2172
+ span = spans[idx]
2173
+ if not isinstance(span, dict):
2174
+ return EffectOutcome.failed(f"Invalid span record at index {s}")
2175
+ artifact_id = str(span.get("artifact_id") or "").strip() or None
2176
+ target_index = idx
2177
+ else:
2178
+ artifact_id = s
2179
+ else:
2180
+ return EffectOutcome.failed("MEMORY_TAG requires span_id as str or int")
2181
+
2182
+ if not artifact_id:
2183
+ return EffectOutcome.failed("Could not resolve span_id to an artifact_id")
2184
+
2185
+ if target_index is None:
2186
+ for i, span in enumerate(spans):
2187
+ if not isinstance(span, dict):
2188
+ continue
2189
+ if str(span.get("artifact_id") or "") == artifact_id:
2190
+ target_index = i
2191
+ break
2192
+
2193
+ if target_index is None:
2194
+ return EffectOutcome.failed(f"Unknown span_id: {artifact_id}")
2195
+
2196
+ target = spans[target_index]
2197
+ if not isinstance(target, dict):
2198
+ return EffectOutcome.failed(f"Invalid span record at index {target_index + 1}")
2199
+
2200
+ existing_tags = target.get("tags")
2201
+ if not isinstance(existing_tags, dict):
2202
+ existing_tags = {}
2203
+
2204
+ if merge:
2205
+ merged_tags = dict(existing_tags)
2206
+ merged_tags.update(clean_tags)
2207
+ else:
2208
+ merged_tags = dict(clean_tags)
2209
+
2210
+ target["tags"] = merged_tags
2211
+ target["tagged_at"] = utc_now_iso()
2212
+ if run.actor_id:
2213
+ target["tagged_by"] = str(run.actor_id)
2214
+
2215
+ rendered_tags = json.dumps(merged_tags, ensure_ascii=False, sort_keys=True)
2216
+ text = f"Tagged span_id={artifact_id} tags={rendered_tags}"
2217
+
2218
+ result = {
2219
+ "mode": "executed",
2220
+ "results": [
2221
+ {
2222
+ "call_id": call_id,
2223
+ "name": tool_name,
2224
+ "success": True,
2225
+ "output": text,
2226
+ "error": None,
2227
+ }
2228
+ ],
2229
+ }
2230
+ return EffectOutcome.completed(result=result)
2231
+
2232
+ def _handle_memory_compact(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2233
+ """Handle MEMORY_COMPACT.
2234
+
2235
+ This is a runtime-owned compaction of a run's active context:
2236
+ - archives the compacted messages to ArtifactStore (provenance preserved)
2237
+ - inserts a system summary message that includes `span_id=...` (LLM-visible handle)
2238
+ - updates `_runtime.memory_spans` index with metadata/tags
2239
+
2240
+ Payload (optional unless stated):
2241
+ - preserve_recent: int (default 6; preserves N most recent non-system messages)
2242
+ - compression_mode: str ("light"|"standard"|"heavy", default "standard")
2243
+ - focus: str (optional; topic to prioritize)
2244
+ - target_run_id: str (optional; defaults to current run)
2245
+ - tool_name: str (optional; for tool-style output, default "compact_memory")
2246
+ - call_id: str (optional)
2247
+ """
2248
+ import json
2249
+ from uuid import uuid4
2250
+
2251
+ from .vars import ensure_namespaces
2252
+ from ..memory.compaction import normalize_messages, split_for_compaction, span_metadata_from_messages
2253
+
2254
+ ensure_namespaces(run.vars)
2255
+
2256
+ artifact_store = self._artifact_store
2257
+ if artifact_store is None:
2258
+ return EffectOutcome.failed(
2259
+ "MEMORY_COMPACT requires an ArtifactStore; configure runtime.set_artifact_store(...)"
2260
+ )
2261
+
2262
+ payload = dict(effect.payload or {})
2263
+ tool_name = str(payload.get("tool_name") or "compact_memory")
2264
+ call_id = str(payload.get("call_id") or "memory")
2265
+
2266
+ target_run_id = payload.get("target_run_id")
2267
+ if target_run_id is not None:
2268
+ target_run_id = str(target_run_id).strip() or None
2269
+
2270
+ try:
2271
+ preserve_recent = int(payload.get("preserve_recent", 6) or 6)
2272
+ except Exception:
2273
+ preserve_recent = 6
2274
+ if preserve_recent < 0:
2275
+ preserve_recent = 0
2276
+
2277
+ compression_mode = str(payload.get("compression_mode") or "standard").strip().lower()
2278
+ if compression_mode not in ("light", "standard", "heavy"):
2279
+ compression_mode = "standard"
2280
+
2281
+ focus = payload.get("focus")
2282
+ focus_text = str(focus).strip() if isinstance(focus, str) else ""
2283
+ focus_text = focus_text or None
2284
+
2285
+ # Resolve which run is being compacted.
2286
+ target_run = run
2287
+ if target_run_id and target_run_id != run.run_id:
2288
+ loaded = self._run_store.load(target_run_id)
2289
+ if loaded is None:
2290
+ return EffectOutcome.failed(f"Unknown target_run_id: {target_run_id}")
2291
+ target_run = loaded
2292
+ ensure_namespaces(target_run.vars)
2293
+
2294
+ ctx = target_run.vars.get("context")
2295
+ if not isinstance(ctx, dict):
2296
+ return EffectOutcome.failed("MEMORY_COMPACT requires vars.context to be a dict")
2297
+ messages_raw = ctx.get("messages")
2298
+ if not isinstance(messages_raw, list) or not messages_raw:
2299
+ return EffectOutcome.completed(
2300
+ result={
2301
+ "mode": "executed",
2302
+ "results": [
2303
+ {
2304
+ "call_id": call_id,
2305
+ "name": tool_name,
2306
+ "success": True,
2307
+ "output": "No messages to compact.",
2308
+ "error": None,
2309
+ }
2310
+ ],
2311
+ }
2312
+ )
2313
+
2314
+ now_iso = utc_now_iso
2315
+ messages = normalize_messages(messages_raw, now_iso=now_iso)
2316
+ split = split_for_compaction(messages, preserve_recent=preserve_recent)
2317
+
2318
+ if not split.older_messages:
2319
+ return EffectOutcome.completed(
2320
+ result={
2321
+ "mode": "executed",
2322
+ "results": [
2323
+ {
2324
+ "call_id": call_id,
2325
+ "name": tool_name,
2326
+ "success": True,
2327
+ "output": f"Nothing to compact (non-system messages <= preserve_recent={preserve_recent}).",
2328
+ "error": None,
2329
+ }
2330
+ ],
2331
+ }
2332
+ )
2333
+
2334
+ # ------------------------------------------------------------------
2335
+ # 1) LLM summary - use integration layer summarizer if available
2336
+ # ------------------------------------------------------------------
2337
+ #
2338
+ # When chat_summarizer is injected (from AbstractCore integration layer),
2339
+ # use it for adaptive chunking based on max_tokens. This handles cases
2340
+ # where the environment can't use the model's full context window
2341
+ # (e.g., GPU memory constraints).
2342
+ #
2343
+ # When max_tokens == -1 (AUTO): Uses model's full capability
2344
+ # When max_tokens > 0: Chunks messages if they exceed the limit
2345
+
2346
+ sub_run_id: Optional[str] = None # Track for provenance if using fallback
2347
+
2348
+ if self._chat_summarizer is not None:
2349
+ # Use AbstractCore's BasicSummarizer with adaptive chunking
2350
+ try:
2351
+ summarizer_result = self._chat_summarizer.summarize_chat_history(
2352
+ messages=split.older_messages,
2353
+ preserve_recent=0, # Already split; don't preserve again
2354
+ focus=focus_text,
2355
+ compression_mode=compression_mode,
2356
+ )
2357
+ summary_text_out = summarizer_result.get("summary", "(summary unavailable)")
2358
+ key_points = list(summarizer_result.get("key_points") or [])
2359
+ confidence = summarizer_result.get("confidence")
2360
+ except Exception as e:
2361
+ return EffectOutcome.failed(f"Summarizer failed: {e}")
2362
+ else:
2363
+ # Fallback: Original prompt-based approach (for non-AbstractCore runtimes)
2364
+ older_text = "\n".join([f"{m.get('role')}: {m.get('content')}" for m in split.older_messages])
2365
+ focus_line = f"Focus: {focus_text}\n" if focus_text else ""
2366
+ mode_line = f"Compression mode: {compression_mode}\n"
2367
+
2368
+ prompt = (
2369
+ "You are compressing older conversation context for an agent runtime.\n"
2370
+ "Write a faithful, compact summary that preserves decisions, constraints, names, file paths, commands, and open questions.\n"
2371
+ "Do NOT invent details. If something is unknown, say so.\n"
2372
+ f"{mode_line}"
2373
+ f"{focus_line}"
2374
+ "Return STRICT JSON with keys: summary (string), key_points (array of strings), confidence (number 0..1).\n\n"
2375
+ "OLDER MESSAGES (to be archived):\n"
2376
+ f"{older_text}\n"
2377
+ )
2378
+
2379
+ # Best-effort output budget for the summary itself.
2380
+ limits = target_run.vars.get("_limits") if isinstance(target_run.vars.get("_limits"), dict) else {}
2381
+ max_out = limits.get("max_output_tokens")
2382
+ try:
2383
+ max_out_tokens = int(max_out) if max_out is not None else None
2384
+ except Exception:
2385
+ max_out_tokens = None
2386
+
2387
+ llm_payload: Dict[str, Any] = {"prompt": prompt}
2388
+ if max_out_tokens is not None:
2389
+ llm_payload["params"] = {"max_tokens": max_out_tokens}
2390
+
2391
+ def llm_node(sub_run: RunState, sub_ctx) -> StepPlan:
2392
+ return StepPlan(
2393
+ node_id="llm",
2394
+ effect=Effect(type=EffectType.LLM_CALL, payload=llm_payload, result_key="_temp.llm"),
2395
+ next_node="done",
2396
+ )
2397
+
2398
+ def done_node(sub_run: RunState, sub_ctx) -> StepPlan:
2399
+ temp = sub_run.vars.get("_temp") if isinstance(sub_run.vars.get("_temp"), dict) else {}
2400
+ return StepPlan(node_id="done", complete_output={"response": temp.get("llm")})
2401
+
2402
+ wf = WorkflowSpec(workflow_id="wf_memory_compact_llm", entry_node="llm", nodes={"llm": llm_node, "done": done_node})
2403
+
2404
+ sub_run_id = self.start(
2405
+ workflow=wf,
2406
+ vars={"context": {"prompt": prompt}, "scratchpad": {}, "_runtime": {}, "_temp": {}, "_limits": dict(limits)},
2407
+ actor_id=run.actor_id,
2408
+ session_id=getattr(run, "session_id", None),
2409
+ parent_run_id=run.run_id,
2410
+ )
2411
+
2412
+ sub_state = self.tick(workflow=wf, run_id=sub_run_id)
2413
+ if sub_state.status == RunStatus.WAITING:
2414
+ return EffectOutcome.failed("MEMORY_COMPACT does not support waiting subworkflows yet")
2415
+ if sub_state.status == RunStatus.FAILED:
2416
+ return EffectOutcome.failed(sub_state.error or "Compaction LLM subworkflow failed")
2417
+ response = (sub_state.output or {}).get("response")
2418
+ if not isinstance(response, dict):
2419
+ response = {}
2420
+
2421
+ content = response.get("content")
2422
+ content_text = "" if content is None else str(content).strip()
2423
+ lowered = content_text.lower()
2424
+ if any(
2425
+ keyword in lowered
2426
+ for keyword in (
2427
+ "operation not permitted",
2428
+ "failed to connect",
2429
+ "connection refused",
2430
+ "timed out",
2431
+ "timeout",
2432
+ "not running",
2433
+ "model not found",
2434
+ )
2435
+ ):
2436
+ return EffectOutcome.failed(f"Compaction LLM unavailable: {content_text}")
2437
+
2438
+ summary_text_out = content_text
2439
+ key_points: list[str] = []
2440
+ confidence: Optional[float] = None
2441
+
2442
+ # Parse JSON if present (support fenced output).
2443
+ if content_text:
2444
+ candidate = content_text
2445
+ if "```" in candidate:
2446
+ # extract first JSON-ish block
2447
+ start = candidate.find("{")
2448
+ end = candidate.rfind("}")
2449
+ if 0 <= start < end:
2450
+ candidate = candidate[start : end + 1]
2451
+ try:
2452
+ parsed = json.loads(candidate)
2453
+ if isinstance(parsed, dict):
2454
+ if parsed.get("summary") is not None:
2455
+ summary_text_out = str(parsed.get("summary") or "").strip() or summary_text_out
2456
+ kp = parsed.get("key_points")
2457
+ if isinstance(kp, list):
2458
+ key_points = [str(x) for x in kp if isinstance(x, (str, int, float))][:20]
2459
+ conf = parsed.get("confidence")
2460
+ if isinstance(conf, (int, float)):
2461
+ confidence = float(conf)
2462
+ except Exception:
2463
+ pass
2464
+
2465
+ summary_text_out = summary_text_out.strip()
2466
+ if not summary_text_out:
2467
+ summary_text_out = "(summary unavailable)"
2468
+
2469
+ # ------------------------------------------------------------------
2470
+ # 2) Archive older messages + update run state with summary
2471
+ # ------------------------------------------------------------------
2472
+
2473
+ span_meta = span_metadata_from_messages(split.older_messages)
2474
+ artifact_payload = {
2475
+ "messages": split.older_messages,
2476
+ "span": span_meta,
2477
+ "created_at": now_iso(),
2478
+ }
2479
+ artifact_tags: Dict[str, str] = {
2480
+ "kind": "conversation_span",
2481
+ "compression_mode": compression_mode,
2482
+ "preserve_recent": str(preserve_recent),
2483
+ }
2484
+ if focus_text:
2485
+ artifact_tags["focus"] = focus_text
2486
+
2487
+ meta = artifact_store.store_json(artifact_payload, run_id=target_run.run_id, tags=artifact_tags)
2488
+ archived_ref = meta.artifact_id
2489
+
2490
+ summary_message_id = f"msg_{uuid4().hex}"
2491
+ summary_prefix = f"[CONVERSATION HISTORY SUMMARY span_id={archived_ref}]"
2492
+ summary_metadata: Dict[str, Any] = {
2493
+ "message_id": summary_message_id,
2494
+ "kind": "memory_summary",
2495
+ "compression_mode": compression_mode,
2496
+ "preserve_recent": preserve_recent,
2497
+ "source_artifact_id": archived_ref,
2498
+ "source_message_count": int(span_meta.get("message_count") or 0),
2499
+ "source_from_timestamp": span_meta.get("from_timestamp"),
2500
+ "source_to_timestamp": span_meta.get("to_timestamp"),
2501
+ "source_from_message_id": span_meta.get("from_message_id"),
2502
+ "source_to_message_id": span_meta.get("to_message_id"),
2503
+ }
2504
+ if focus_text:
2505
+ summary_metadata["focus"] = focus_text
2506
+
2507
+ summary_message = {
2508
+ "role": "system",
2509
+ "content": f"{summary_prefix}: {summary_text_out}",
2510
+ "timestamp": now_iso(),
2511
+ "metadata": summary_metadata,
2512
+ }
2513
+
2514
+ new_messages = list(split.system_messages) + [summary_message] + list(split.recent_messages)
2515
+ ctx["messages"] = new_messages
2516
+ if isinstance(getattr(target_run, "output", None), dict):
2517
+ target_run.output["messages"] = new_messages
2518
+
2519
+ runtime_ns = target_run.vars.get("_runtime")
2520
+ if not isinstance(runtime_ns, dict):
2521
+ runtime_ns = {}
2522
+ target_run.vars["_runtime"] = runtime_ns
2523
+ spans = runtime_ns.get("memory_spans")
2524
+ if not isinstance(spans, list):
2525
+ spans = []
2526
+ runtime_ns["memory_spans"] = spans
2527
+ span_record: Dict[str, Any] = {
2528
+ "kind": "conversation_span",
2529
+ "artifact_id": archived_ref,
2530
+ "created_at": now_iso(),
2531
+ "summary_message_id": summary_message_id,
2532
+ "from_timestamp": span_meta.get("from_timestamp"),
2533
+ "to_timestamp": span_meta.get("to_timestamp"),
2534
+ "from_message_id": span_meta.get("from_message_id"),
2535
+ "to_message_id": span_meta.get("to_message_id"),
2536
+ "message_count": int(span_meta.get("message_count") or 0),
2537
+ "compression_mode": compression_mode,
2538
+ "focus": focus_text,
2539
+ }
2540
+ if run.actor_id:
2541
+ span_record["created_by"] = str(run.actor_id)
2542
+ spans.append(span_record)
2543
+
2544
+ if target_run is not run:
2545
+ target_run.updated_at = now_iso()
2546
+ self._run_store.save(target_run)
2547
+
2548
+ out = {
2549
+ "llm_run_id": sub_run_id,
2550
+ "span_id": archived_ref,
2551
+ "summary_message_id": summary_message_id,
2552
+ "preserve_recent": preserve_recent,
2553
+ "compression_mode": compression_mode,
2554
+ "focus": focus_text,
2555
+ "key_points": key_points,
2556
+ "confidence": confidence,
2557
+ }
2558
+ text = f"Compacted {len(split.older_messages)} messages into span_id={archived_ref}."
2559
+ result = {
2560
+ "mode": "executed",
2561
+ "results": [
2562
+ {
2563
+ "call_id": call_id,
2564
+ "name": tool_name,
2565
+ "success": True,
2566
+ "output": text,
2567
+ "error": None,
2568
+ "meta": out,
2569
+ }
2570
+ ],
2571
+ }
2572
+ return EffectOutcome.completed(result=result)
2573
+
2574
+ def _handle_memory_note(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2575
+ """Handle MEMORY_NOTE.
2576
+
2577
+ Store a small, durable memory note (key insight/decision) with tags and provenance sources.
2578
+
2579
+ Payload:
2580
+ - note: str (required)
2581
+ - tags: dict[str,str] (optional)
2582
+ - sources: dict (optional)
2583
+ - run_id: str (optional; defaults to current run_id)
2584
+ - span_ids: list[str] (optional; referenced span ids)
2585
+ - message_ids: list[str] (optional; referenced message ids)
2586
+ - target_run_id: str (optional; defaults to current run_id)
2587
+ - tool_name: str (optional; default "remember_note")
2588
+ - call_id: str (optional; passthrough)
2589
+ """
2590
+ import json
2591
+
2592
+ from .vars import ensure_namespaces
2593
+
2594
+ ensure_namespaces(run.vars)
2595
+ runtime_ns = run.vars.get("_runtime")
2596
+ if not isinstance(runtime_ns, dict):
2597
+ runtime_ns = {}
2598
+ run.vars["_runtime"] = runtime_ns
2599
+
2600
+ artifact_store = self._artifact_store
2601
+ if artifact_store is None:
2602
+ return EffectOutcome.failed(
2603
+ "MEMORY_NOTE requires an ArtifactStore; configure runtime.set_artifact_store(...)"
2604
+ )
2605
+
2606
+ payload = dict(effect.payload or {})
2607
+ tool_name = str(payload.get("tool_name") or "remember_note")
2608
+ call_id = str(payload.get("call_id") or "memory")
2609
+
2610
+ base_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
2611
+ base_run = run
2612
+ if base_run_id != run.run_id:
2613
+ loaded = self._run_store.load(base_run_id)
2614
+ if loaded is None:
2615
+ return EffectOutcome.failed(f"Unknown target_run_id: {base_run_id}")
2616
+ base_run = loaded
2617
+ ensure_namespaces(base_run.vars)
2618
+
2619
+ scope = str(payload.get("scope") or "run").strip().lower() or "run"
2620
+ try:
2621
+ target_run = self._resolve_scope_owner_run(base_run, scope=scope)
2622
+ except Exception as e:
2623
+ return EffectOutcome.failed(str(e))
2624
+ ensure_namespaces(target_run.vars)
2625
+
2626
+ target_runtime_ns = target_run.vars.get("_runtime")
2627
+ if not isinstance(target_runtime_ns, dict):
2628
+ target_runtime_ns = {}
2629
+ target_run.vars["_runtime"] = target_runtime_ns
2630
+ spans = target_runtime_ns.get("memory_spans")
2631
+ if not isinstance(spans, list):
2632
+ spans = []
2633
+ target_runtime_ns["memory_spans"] = spans
2634
+
2635
+ note = payload.get("note")
2636
+ note_text = str(note or "").strip()
2637
+ if not note_text:
2638
+ return EffectOutcome.failed("MEMORY_NOTE requires payload.note (non-empty string)")
2639
+
2640
+ location_raw = payload.get("location")
2641
+ location = str(location_raw).strip() if isinstance(location_raw, str) else ""
2642
+
2643
+ tags = payload.get("tags")
2644
+ clean_tags: Dict[str, str] = {}
2645
+ if isinstance(tags, dict):
2646
+ for k, v in tags.items():
2647
+ if isinstance(k, str) and isinstance(v, str) and k and v:
2648
+ if k == "kind":
2649
+ continue
2650
+ clean_tags[k] = v
2651
+
2652
+ sources = payload.get("sources")
2653
+ sources_dict = dict(sources) if isinstance(sources, dict) else {}
2654
+
2655
+ def _norm_list(value: Any) -> list[str]:
2656
+ if not isinstance(value, list):
2657
+ return []
2658
+ out: list[str] = []
2659
+ for item in value:
2660
+ if isinstance(item, str):
2661
+ s = item.strip()
2662
+ if s:
2663
+ out.append(s)
2664
+ elif isinstance(item, int):
2665
+ out.append(str(item))
2666
+ # preserve order but dedup
2667
+ seen: set[str] = set()
2668
+ deduped: list[str] = []
2669
+ for s in out:
2670
+ if s in seen:
2671
+ continue
2672
+ seen.add(s)
2673
+ deduped.append(s)
2674
+ return deduped
2675
+
2676
+ # Provenance default: the run that emitted this effect (not the scope owner).
2677
+ source_run_id = str(sources_dict.get("run_id") or run.run_id).strip() or run.run_id
2678
+ span_ids = _norm_list(sources_dict.get("span_ids"))
2679
+ message_ids = _norm_list(sources_dict.get("message_ids"))
2680
+
2681
+ created_at = utc_now_iso()
2682
+ artifact_payload: Dict[str, Any] = {
2683
+ "note": note_text,
2684
+ "sources": {"run_id": source_run_id, "span_ids": span_ids, "message_ids": message_ids},
2685
+ "created_at": created_at,
2686
+ }
2687
+ if location:
2688
+ artifact_payload["location"] = location
2689
+ if run.actor_id:
2690
+ artifact_payload["actor_id"] = str(run.actor_id)
2691
+ session_id = getattr(target_run, "session_id", None) or getattr(run, "session_id", None)
2692
+ if session_id:
2693
+ artifact_payload["session_id"] = str(session_id)
2694
+
2695
+ artifact_tags: Dict[str, str] = {"kind": "memory_note"}
2696
+ artifact_tags.update(clean_tags)
2697
+ meta = artifact_store.store_json(artifact_payload, run_id=target_run.run_id, tags=artifact_tags)
2698
+ artifact_id = meta.artifact_id
2699
+
2700
+ preview = note_text
2701
+ if len(preview) > 160:
2702
+ preview = preview[:157] + "…"
2703
+
2704
+ span_record: Dict[str, Any] = {
2705
+ "kind": "memory_note",
2706
+ "artifact_id": artifact_id,
2707
+ "created_at": created_at,
2708
+ # Treat notes as point-in-time spans for time-range filtering.
2709
+ "from_timestamp": created_at,
2710
+ "to_timestamp": created_at,
2711
+ "message_count": 0,
2712
+ "note_preview": preview,
2713
+ }
2714
+ if location:
2715
+ span_record["location"] = location
2716
+ if clean_tags:
2717
+ span_record["tags"] = dict(clean_tags)
2718
+ if span_ids or message_ids:
2719
+ span_record["sources"] = {"run_id": source_run_id, "span_ids": span_ids, "message_ids": message_ids}
2720
+ if run.actor_id:
2721
+ span_record["created_by"] = str(run.actor_id)
2722
+
2723
+ spans.append(span_record)
2724
+
2725
+ def _coerce_bool(value: Any) -> bool:
2726
+ if isinstance(value, bool):
2727
+ return bool(value)
2728
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
2729
+ try:
2730
+ return float(value) != 0.0
2731
+ except Exception:
2732
+ return False
2733
+ if isinstance(value, str):
2734
+ s = value.strip().lower()
2735
+ if not s:
2736
+ return False
2737
+ if s in {"false", "0", "no", "off"}:
2738
+ return False
2739
+ if s in {"true", "1", "yes", "on"}:
2740
+ return True
2741
+ return False
2742
+
2743
+ # Optional UX convenience: keep the stored note immediately visible to downstream LLM calls by
2744
+ # rehydrating it into `base_run.context.messages` as a synthetic system message.
2745
+ keep_raw = payload.get("keep_in_context")
2746
+ if keep_raw is None:
2747
+ keep_raw = payload.get("keepInContext")
2748
+ keep_in_context = _coerce_bool(keep_raw)
2749
+ kept: Optional[Dict[str, Any]] = None
2750
+ if keep_in_context:
2751
+ try:
2752
+ from ..memory.active_context import ActiveContextPolicy
2753
+
2754
+ policy = ActiveContextPolicy(run_store=self._run_store, artifact_store=artifact_store)
2755
+ out = policy.rehydrate_into_context_from_run(
2756
+ base_run,
2757
+ span_ids=[artifact_id],
2758
+ placement="end",
2759
+ dedup_by="message_id",
2760
+ max_messages=1,
2761
+ )
2762
+ kept = {"inserted": out.get("inserted", 0), "skipped": out.get("skipped", 0)}
2763
+
2764
+ # Persist when mutating a different run than the currently executing one.
2765
+ if base_run is not run:
2766
+ base_run.updated_at = utc_now_iso()
2767
+ self._run_store.save(base_run)
2768
+ except Exception as e:
2769
+ kept = {"inserted": 0, "skipped": 0, "error": str(e)}
2770
+
2771
+ if target_run is not run:
2772
+ target_run.updated_at = utc_now_iso()
2773
+ self._run_store.save(target_run)
2774
+
2775
+ rendered_tags = json.dumps(clean_tags, ensure_ascii=False, sort_keys=True) if clean_tags else "{}"
2776
+ text = f"Stored memory_note span_id={artifact_id} tags={rendered_tags}"
2777
+ meta_out: Dict[str, Any] = {"span_id": artifact_id, "created_at": created_at, "note_preview": preview}
2778
+ if isinstance(kept, dict):
2779
+ meta_out["kept_in_context"] = kept
2780
+
2781
+ result = {
2782
+ "mode": "executed",
2783
+ "results": [
2784
+ {
2785
+ "call_id": call_id,
2786
+ "name": tool_name,
2787
+ "success": True,
2788
+ "output": text,
2789
+ "error": None,
2790
+ "meta": meta_out,
2791
+ }
2792
+ ],
2793
+ }
2794
+ return EffectOutcome.completed(result=result)
2795
+
2796
+ def _handle_memory_rehydrate(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2797
+ """Handle MEMORY_REHYDRATE.
2798
+
2799
+ This is a runtime-owned, deterministic mutation of `context.messages`:
2800
+ - loads archived conversation span artifacts from ArtifactStore
2801
+ - inserts them into `context.messages` with dedup
2802
+ - persists the mutated run (RunStore checkpoint)
2803
+
2804
+ Payload (required unless stated):
2805
+ - span_ids: list[str|int] (required; artifact ids preferred; indices allowed)
2806
+ - placement: str ("after_summary"|"after_system"|"end", default "after_summary")
2807
+ - dedup_by: str (default "message_id")
2808
+ - max_messages: int (optional; max inserted messages)
2809
+ - target_run_id: str (optional; defaults to current run)
2810
+ """
2811
+ from .vars import ensure_namespaces
2812
+
2813
+ ensure_namespaces(run.vars)
2814
+ artifact_store = self._artifact_store
2815
+ if artifact_store is None:
2816
+ return EffectOutcome.failed(
2817
+ "MEMORY_REHYDRATE requires an ArtifactStore; configure runtime.set_artifact_store(...)"
2818
+ )
2819
+
2820
+ payload = dict(effect.payload or {})
2821
+ target_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
2822
+
2823
+ # Normalize span_ids (accept legacy `span_id` too).
2824
+ raw_span_ids = payload.get("span_ids")
2825
+ if raw_span_ids is None:
2826
+ raw_span_ids = payload.get("span_id")
2827
+ span_ids: list[Any] = []
2828
+ if isinstance(raw_span_ids, list):
2829
+ span_ids = list(raw_span_ids)
2830
+ elif raw_span_ids is not None:
2831
+ span_ids = [raw_span_ids]
2832
+ if not span_ids:
2833
+ return EffectOutcome.failed("MEMORY_REHYDRATE requires payload.span_ids (non-empty list)")
2834
+
2835
+ placement = str(payload.get("placement") or "after_summary").strip() or "after_summary"
2836
+ dedup_by = str(payload.get("dedup_by") or "message_id").strip() or "message_id"
2837
+ max_messages = payload.get("max_messages")
2838
+
2839
+ # Load the target run (may be different from current).
2840
+ target_run = run
2841
+ if target_run_id != run.run_id:
2842
+ loaded = self._run_store.load(target_run_id)
2843
+ if loaded is None:
2844
+ return EffectOutcome.failed(f"Unknown target_run_id: {target_run_id}")
2845
+ target_run = loaded
2846
+ ensure_namespaces(target_run.vars)
2847
+
2848
+ # Best-effort: rehydrate only span kinds that are meaningful to inject into
2849
+ # `context.messages` for downstream LLM calls.
2850
+ #
2851
+ # Rationale:
2852
+ # - conversation_span: archived chat messages
2853
+ # - memory_note: durable notes (rehydrated as a synthetic message by ActiveContextPolicy)
2854
+ #
2855
+ # Evidence and other span kinds are intentionally skipped by default.
2856
+ from ..memory.active_context import ActiveContextPolicy
2857
+
2858
+ spans = ActiveContextPolicy.list_memory_spans_from_run(target_run)
2859
+ resolved = ActiveContextPolicy.resolve_span_ids_from_spans(span_ids, spans)
2860
+ if not resolved:
2861
+ return EffectOutcome.completed(result={"inserted": 0, "skipped": 0, "artifacts": []})
2862
+
2863
+ kind_by_artifact: dict[str, str] = {}
2864
+ for s in spans:
2865
+ if not isinstance(s, dict):
2866
+ continue
2867
+ aid = str(s.get("artifact_id") or "").strip()
2868
+ if not aid or aid in kind_by_artifact:
2869
+ continue
2870
+ kind_by_artifact[aid] = str(s.get("kind") or "").strip()
2871
+
2872
+ to_rehydrate: list[str] = []
2873
+ skipped_artifacts: list[dict[str, Any]] = []
2874
+ allowed_kinds = {"conversation_span", "memory_note"}
2875
+ for aid in resolved:
2876
+ kind = kind_by_artifact.get(aid, "")
2877
+ if kind and kind not in allowed_kinds:
2878
+ skipped_artifacts.append(
2879
+ {"span_id": aid, "inserted": 0, "skipped": 0, "error": None, "kind": kind}
2880
+ )
2881
+ continue
2882
+ to_rehydrate.append(aid)
2883
+
2884
+ # Reuse the canonical policy implementation (no duplicated logic).
2885
+ # Mutate the in-memory RunState to keep runtime tick semantics consistent.
2886
+ policy = ActiveContextPolicy(run_store=self._run_store, artifact_store=artifact_store)
2887
+ out = policy.rehydrate_into_context_from_run(
2888
+ target_run,
2889
+ span_ids=to_rehydrate,
2890
+ placement=placement,
2891
+ dedup_by=dedup_by,
2892
+ max_messages=max_messages,
2893
+ )
2894
+
2895
+ # Persist when mutating a different run than the currently executing one.
2896
+ if target_run is not run:
2897
+ target_run.updated_at = utc_now_iso()
2898
+ self._run_store.save(target_run)
2899
+
2900
+ # Normalize output shape to match backlog expectations (`span_id` field, optional kind).
2901
+ artifacts_out: list[dict[str, Any]] = []
2902
+ artifacts = out.get("artifacts")
2903
+ if isinstance(artifacts, list):
2904
+ for a in artifacts:
2905
+ if not isinstance(a, dict):
2906
+ continue
2907
+ aid = str(a.get("artifact_id") or "").strip()
2908
+ artifacts_out.append(
2909
+ {
2910
+ "span_id": aid,
2911
+ "inserted": a.get("inserted"),
2912
+ "skipped": a.get("skipped"),
2913
+ "error": a.get("error"),
2914
+ "kind": kind_by_artifact.get(aid) or None,
2915
+ "preview": a.get("preview"),
2916
+ }
2917
+ )
2918
+ artifacts_out.extend(skipped_artifacts)
2919
+
2920
+ return EffectOutcome.completed(
2921
+ result={
2922
+ "inserted": out.get("inserted", 0),
2923
+ "skipped": out.get("skipped", 0),
2924
+ "artifacts": artifacts_out,
2925
+ }
2926
+ )
2927
+
2928
+
2929
+ def _dedup_preserve_order(values: list[str]) -> list[str]:
2930
+ seen: set[str] = set()
2931
+ out: list[str] = []
2932
+ for v in values:
2933
+ s = str(v or "").strip()
2934
+ if not s or s in seen:
2935
+ continue
2936
+ seen.add(s)
2937
+ out.append(s)
2938
+ return out
2939
+
2940
+
2941
+ def _span_sort_key(span: dict) -> tuple[str, str]:
2942
+ """Sort key for span adjacency. Prefer from_timestamp, then created_at."""
2943
+ from_ts = str(span.get("from_timestamp") or "")
2944
+ created = str(span.get("created_at") or "")
2945
+ return (from_ts or created, created)
2946
+
2947
+
2948
+ def _expand_connected_span_ids(
2949
+ *,
2950
+ spans: list[dict[str, Any]],
2951
+ seed_artifact_ids: list[str],
2952
+ connect_keys: list[str],
2953
+ neighbor_hops: int,
2954
+ limit: int,
2955
+ ) -> list[str]:
2956
+ """Expand seed spans to include deterministic neighbors (time + shared tags)."""
2957
+ if not spans or not seed_artifact_ids:
2958
+ return list(seed_artifact_ids)
2959
+
2960
+ ordered = [s for s in spans if isinstance(s, dict) and s.get("artifact_id")]
2961
+ ordered.sort(key=_span_sort_key)
2962
+ idx_by_artifact: dict[str, int] = {str(s["artifact_id"]): i for i, s in enumerate(ordered)}
2963
+
2964
+ # Build tag index for requested keys.
2965
+ tag_index: dict[tuple[str, str], list[str]] = {}
2966
+ for s in ordered:
2967
+ tags = s.get("tags") if isinstance(s.get("tags"), dict) else {}
2968
+ for k in connect_keys:
2969
+ v = tags.get(k)
2970
+ if isinstance(v, str) and v:
2971
+ tag_index.setdefault((k, v), []).append(str(s["artifact_id"]))
2972
+
2973
+ out: list[str] = []
2974
+ for aid in seed_artifact_ids:
2975
+ if len(out) >= limit:
2976
+ break
2977
+ out.append(aid)
2978
+
2979
+ idx = idx_by_artifact.get(aid)
2980
+ if idx is not None and neighbor_hops > 0:
2981
+ for delta in range(1, neighbor_hops + 1):
2982
+ for j in (idx - delta, idx + delta):
2983
+ if 0 <= j < len(ordered):
2984
+ out.append(str(ordered[j]["artifact_id"]))
2985
+
2986
+ if connect_keys:
2987
+ s = ordered[idx] if idx is not None and 0 <= idx < len(ordered) else None
2988
+ if isinstance(s, dict):
2989
+ tags = s.get("tags") if isinstance(s.get("tags"), dict) else {}
2990
+ for k in connect_keys:
2991
+ v = tags.get(k)
2992
+ if isinstance(v, str) and v:
2993
+ out.extend(tag_index.get((k, v), []))
2994
+
2995
+ return _dedup_preserve_order(out)[:limit]
2996
+
2997
+
2998
+ def _deep_scan_span_ids(
2999
+ *,
3000
+ spans: list[dict[str, Any]],
3001
+ artifact_store: Any,
3002
+ query: str,
3003
+ limit_spans: int,
3004
+ limit_messages_per_span: int,
3005
+ ) -> list[str]:
3006
+ """Fallback keyword scan over archived messages when metadata/summary is insufficient."""
3007
+ q = str(query or "").strip().lower()
3008
+ if not q:
3009
+ return []
3010
+
3011
+ scanned = 0
3012
+ matches: list[str] = []
3013
+ for s in spans:
3014
+ if scanned >= limit_spans:
3015
+ break
3016
+ if not isinstance(s, dict):
3017
+ continue
3018
+ artifact_id = s.get("artifact_id")
3019
+ if not isinstance(artifact_id, str) or not artifact_id:
3020
+ continue
3021
+ scanned += 1
3022
+
3023
+ payload = artifact_store.load_json(artifact_id)
3024
+ if not isinstance(payload, dict):
3025
+ continue
3026
+ messages = payload.get("messages")
3027
+ if not isinstance(messages, list) or not messages:
3028
+ continue
3029
+
3030
+ for m in messages[:limit_messages_per_span]:
3031
+ if not isinstance(m, dict):
3032
+ continue
3033
+ content = m.get("content")
3034
+ if not content:
3035
+ continue
3036
+ if q in str(content).lower():
3037
+ matches.append(artifact_id)
3038
+ break
3039
+
3040
+ return _dedup_preserve_order(matches)
3041
+
3042
+
3043
+ def _render_memory_query_output(
3044
+ *,
3045
+ spans: list[dict[str, Any]],
3046
+ artifact_store: Any,
3047
+ selected_artifact_ids: list[str],
3048
+ summary_by_artifact: dict[str, str],
3049
+ max_messages: int,
3050
+ ) -> str:
3051
+ if not selected_artifact_ids:
3052
+ return "No matching memory spans."
3053
+
3054
+ span_by_id: dict[str, dict[str, Any]] = {
3055
+ str(s.get("artifact_id")): s for s in spans if isinstance(s, dict) and s.get("artifact_id")
3056
+ }
3057
+
3058
+ lines: list[str] = []
3059
+ lines.append("Recalled memory spans (provenance-preserving):")
3060
+
3061
+ remaining: Optional[int] = None if int(max_messages) == -1 else int(max_messages)
3062
+ for i, aid in enumerate(selected_artifact_ids, start=1):
3063
+ span = span_by_id.get(aid, {})
3064
+ kind = span.get("kind") or "span"
3065
+ created = span.get("created_at") or ""
3066
+ from_ts = span.get("from_timestamp") or ""
3067
+ to_ts = span.get("to_timestamp") or ""
3068
+ count = span.get("message_count") or ""
3069
+ created_by = span.get("created_by") or ""
3070
+ location = span.get("location") or ""
3071
+ tags = span.get("tags") if isinstance(span.get("tags"), dict) else {}
3072
+ tags_txt = ", ".join([f"{k}={v}" for k, v in sorted(tags.items()) if isinstance(v, str) and v])
3073
+
3074
+ lines.append("")
3075
+ lines.append(f"[{i}] span_id={aid} kind={kind} msgs={count} created_at={created}")
3076
+ if from_ts or to_ts:
3077
+ lines.append(f" time_range: {from_ts} .. {to_ts}")
3078
+ if isinstance(created_by, str) and str(created_by).strip():
3079
+ lines.append(f" created_by: {str(created_by).strip()}")
3080
+ if isinstance(location, str) and str(location).strip():
3081
+ lines.append(f" location: {str(location).strip()}")
3082
+ if tags_txt:
3083
+ lines.append(f" tags: {tags_txt}")
3084
+
3085
+ summary = summary_by_artifact.get(aid)
3086
+ if summary:
3087
+ lines.append(f" summary: {str(summary).strip()}")
3088
+
3089
+ if remaining is not None and remaining <= 0:
3090
+ continue
3091
+
3092
+ payload = artifact_store.load_json(aid)
3093
+ if not isinstance(payload, dict):
3094
+ lines.append(" (artifact payload unavailable)")
3095
+ continue
3096
+ if kind == "memory_note" or "note" in payload:
3097
+ note = str(payload.get("note") or "").strip()
3098
+ if note:
3099
+ lines.append(" note: " + note)
3100
+ else:
3101
+ lines.append(" (note payload missing note text)")
3102
+
3103
+ if not (isinstance(location, str) and location.strip()):
3104
+ loc = payload.get("location")
3105
+ if isinstance(loc, str) and loc.strip():
3106
+ lines.append(f" location: {loc.strip()}")
3107
+
3108
+ sources = payload.get("sources")
3109
+ if isinstance(sources, dict):
3110
+ src_run = sources.get("run_id")
3111
+ span_ids = sources.get("span_ids")
3112
+ msg_ids = sources.get("message_ids")
3113
+ if isinstance(src_run, str) and src_run:
3114
+ lines.append(f" sources.run_id: {src_run}")
3115
+ if isinstance(span_ids, list) and span_ids:
3116
+ cleaned = [str(x) for x in span_ids if isinstance(x, (str, int))]
3117
+ if cleaned:
3118
+ lines.append(f" sources.span_ids: {', '.join(cleaned[:12])}")
3119
+ if isinstance(msg_ids, list) and msg_ids:
3120
+ cleaned = [str(x) for x in msg_ids if isinstance(x, (str, int))]
3121
+ if cleaned:
3122
+ lines.append(f" sources.message_ids: {', '.join(cleaned[:12])}")
3123
+ continue
3124
+
3125
+ messages = payload.get("messages")
3126
+ if not isinstance(messages, list):
3127
+ lines.append(" (artifact missing messages)")
3128
+ continue
3129
+
3130
+ # Render messages with a global cap.
3131
+ rendered = 0
3132
+ for m in messages:
3133
+ if remaining is not None and remaining <= 0:
3134
+ break
3135
+ if not isinstance(m, dict):
3136
+ continue
3137
+ role = str(m.get("role") or "unknown")
3138
+ content = str(m.get("content") or "")
3139
+ ts = str(m.get("timestamp") or "")
3140
+ prefix = f" - {role}: "
3141
+ if ts:
3142
+ prefix = f" - {ts} {role}: "
3143
+ lines.append(prefix + content)
3144
+ rendered += 1
3145
+ if remaining is not None:
3146
+ remaining -= 1
3147
+
3148
+ total = sum(1 for m in messages if isinstance(m, dict))
3149
+ if remaining is not None and rendered < total:
3150
+ lines.append(f" (remaining {total - rendered} messages omitted by max_messages={int(max_messages)})")
3151
+
3152
+ return "\n".join(lines)
3153
+
724
3154
 
725
3155
  def _set_nested(target: Dict[str, Any], dotted_key: str, value: Any) -> None:
726
3156
  """Set nested dict value using dot notation."""