AbstractRuntime 0.2.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. abstractruntime/__init__.py +83 -3
  2. abstractruntime/core/config.py +82 -2
  3. abstractruntime/core/event_keys.py +62 -0
  4. abstractruntime/core/models.py +17 -1
  5. abstractruntime/core/policy.py +74 -3
  6. abstractruntime/core/runtime.py +3334 -28
  7. abstractruntime/core/vars.py +103 -2
  8. abstractruntime/evidence/__init__.py +10 -0
  9. abstractruntime/evidence/recorder.py +325 -0
  10. abstractruntime/history_bundle.py +772 -0
  11. abstractruntime/integrations/abstractcore/__init__.py +6 -0
  12. abstractruntime/integrations/abstractcore/constants.py +19 -0
  13. abstractruntime/integrations/abstractcore/default_tools.py +258 -0
  14. abstractruntime/integrations/abstractcore/effect_handlers.py +2622 -32
  15. abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
  16. abstractruntime/integrations/abstractcore/factory.py +149 -16
  17. abstractruntime/integrations/abstractcore/llm_client.py +891 -55
  18. abstractruntime/integrations/abstractcore/mcp_worker.py +587 -0
  19. abstractruntime/integrations/abstractcore/observability.py +80 -0
  20. abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
  21. abstractruntime/integrations/abstractcore/summarizer.py +154 -0
  22. abstractruntime/integrations/abstractcore/tool_executor.py +509 -31
  23. abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
  24. abstractruntime/integrations/abstractmemory/__init__.py +3 -0
  25. abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
  26. abstractruntime/memory/__init__.py +21 -0
  27. abstractruntime/memory/active_context.py +751 -0
  28. abstractruntime/memory/active_memory.py +452 -0
  29. abstractruntime/memory/compaction.py +105 -0
  30. abstractruntime/memory/kg_packets.py +164 -0
  31. abstractruntime/memory/memact_composer.py +175 -0
  32. abstractruntime/memory/recall_levels.py +163 -0
  33. abstractruntime/memory/token_budget.py +86 -0
  34. abstractruntime/rendering/__init__.py +17 -0
  35. abstractruntime/rendering/agent_trace_report.py +256 -0
  36. abstractruntime/rendering/json_stringify.py +136 -0
  37. abstractruntime/scheduler/scheduler.py +93 -2
  38. abstractruntime/storage/__init__.py +7 -2
  39. abstractruntime/storage/artifacts.py +175 -32
  40. abstractruntime/storage/base.py +17 -1
  41. abstractruntime/storage/commands.py +339 -0
  42. abstractruntime/storage/in_memory.py +41 -1
  43. abstractruntime/storage/json_files.py +210 -14
  44. abstractruntime/storage/observable.py +136 -0
  45. abstractruntime/storage/offloading.py +433 -0
  46. abstractruntime/storage/sqlite.py +836 -0
  47. abstractruntime/visualflow_compiler/__init__.py +29 -0
  48. abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
  49. abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
  50. abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
  51. abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
  52. abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
  53. abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
  54. abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
  55. abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
  56. abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
  57. abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
  58. abstractruntime/visualflow_compiler/compiler.py +3832 -0
  59. abstractruntime/visualflow_compiler/flow.py +247 -0
  60. abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
  61. abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
  62. abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
  63. abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
  64. abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
  65. abstractruntime/visualflow_compiler/visual/models.py +211 -0
  66. abstractruntime/workflow_bundle/__init__.py +52 -0
  67. abstractruntime/workflow_bundle/models.py +236 -0
  68. abstractruntime/workflow_bundle/packer.py +317 -0
  69. abstractruntime/workflow_bundle/reader.py +87 -0
  70. abstractruntime/workflow_bundle/registry.py +587 -0
  71. abstractruntime-0.4.1.dist-info/METADATA +177 -0
  72. abstractruntime-0.4.1.dist-info/RECORD +86 -0
  73. abstractruntime-0.4.1.dist-info/entry_points.txt +2 -0
  74. abstractruntime-0.2.0.dist-info/METADATA +0 -163
  75. abstractruntime-0.2.0.dist-info/RECORD +0 -32
  76. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
  77. {abstractruntime-0.2.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -18,10 +18,15 @@ We keep the design explicitly modular:
18
18
 
19
19
  from __future__ import annotations
20
20
 
21
- from dataclasses import dataclass
21
+ from dataclasses import dataclass, asdict, is_dataclass
22
22
  from datetime import datetime, timezone
23
- from typing import Any, Callable, Dict, Optional
23
+ from typing import Any, Callable, Dict, Optional, List
24
+ import copy
25
+ import hashlib
24
26
  import inspect
27
+ import json
28
+ import os
29
+ import re
25
30
 
26
31
  from .config import RuntimeConfig
27
32
  from .models import (
@@ -38,13 +43,356 @@ from .models import (
38
43
  )
39
44
  from .spec import WorkflowSpec
40
45
  from .policy import DefaultEffectPolicy, EffectPolicy
41
- from ..storage.base import LedgerStore, RunStore
46
+ from ..storage.base import LedgerStore, RunStore, QueryableRunStore
47
+ from .event_keys import build_event_wait_key
42
48
 
43
49
 
44
50
  def utc_now_iso() -> str:
45
51
  return datetime.now(timezone.utc).isoformat()
46
52
 
47
53
 
54
+ def _jsonable(value: Any, *, _path: Optional[set[int]] = None, _depth: int = 0) -> Any:
55
+ """Best-effort conversion to JSON-safe objects.
56
+
57
+ The ledger is persisted as JSON. Any value stored in StepRecord.result must be JSON-safe.
58
+ """
59
+ if _path is None:
60
+ _path = set()
61
+ # Avoid pathological recursion and cyclic structures.
62
+ if _depth > 200:
63
+ return "<max_depth>"
64
+ if value is None:
65
+ return None
66
+ if isinstance(value, (str, int, float, bool)):
67
+ return value
68
+ if isinstance(value, dict):
69
+ vid = id(value)
70
+ if vid in _path:
71
+ return "<cycle>"
72
+ _path.add(vid)
73
+ try:
74
+ return {str(k): _jsonable(v, _path=_path, _depth=_depth + 1) for k, v in value.items()}
75
+ finally:
76
+ _path.discard(vid)
77
+ if isinstance(value, list):
78
+ vid = id(value)
79
+ if vid in _path:
80
+ return "<cycle>"
81
+ _path.add(vid)
82
+ try:
83
+ return [_jsonable(v, _path=_path, _depth=_depth + 1) for v in value]
84
+ finally:
85
+ _path.discard(vid)
86
+ try:
87
+ if is_dataclass(value):
88
+ vid = id(value)
89
+ if vid in _path:
90
+ return "<cycle>"
91
+ _path.add(vid)
92
+ try:
93
+ return _jsonable(asdict(value), _path=_path, _depth=_depth + 1)
94
+ finally:
95
+ _path.discard(vid)
96
+ except Exception:
97
+ pass
98
+ try:
99
+ md = getattr(value, "model_dump", None)
100
+ if callable(md):
101
+ vid = id(value)
102
+ if vid in _path:
103
+ return "<cycle>"
104
+ _path.add(vid)
105
+ try:
106
+ return _jsonable(md(), _path=_path, _depth=_depth + 1)
107
+ finally:
108
+ _path.discard(vid)
109
+ except Exception:
110
+ pass
111
+ try:
112
+ td = getattr(value, "to_dict", None)
113
+ if callable(td):
114
+ vid = id(value)
115
+ if vid in _path:
116
+ return "<cycle>"
117
+ _path.add(vid)
118
+ try:
119
+ return _jsonable(td(), _path=_path, _depth=_depth + 1)
120
+ finally:
121
+ _path.discard(vid)
122
+ except Exception:
123
+ pass
124
+ try:
125
+ json.dumps(value)
126
+ return value
127
+ except Exception:
128
+ return str(value)
129
+
130
+
131
+ _DEFAULT_GLOBAL_MEMORY_RUN_ID = "global_memory"
132
+ _DEFAULT_SESSION_MEMORY_RUN_PREFIX = "session_memory_"
133
+ _SAFE_RUN_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
134
+
135
+ _RUNTIME_TOOL_CALL_ID_PREFIX = "rtcall_"
136
+
137
+
138
+ def _ensure_tool_calls_have_runtime_ids(
139
+ *,
140
+ effect: Effect,
141
+ idempotency_key: str,
142
+ ) -> Effect:
143
+ """Attach stable runtime-owned IDs to tool calls without mutating semantics.
144
+
145
+ - Preserves provider/model `call_id` when present (used for OpenAI transcripts).
146
+ - Adds `runtime_call_id` derived from the effect idempotency key + call index.
147
+ - Ensures each tool call has a non-empty `call_id` (falls back to runtime id).
148
+ - Canonicalizes allowlist ordering (`allowed_tools`) for deterministic payloads.
149
+ """
150
+
151
+ if effect.type != EffectType.TOOL_CALLS:
152
+ return effect
153
+ if not isinstance(effect.payload, dict):
154
+ return effect
155
+
156
+ payload = dict(effect.payload)
157
+ raw_tool_calls = payload.get("tool_calls")
158
+ if not isinstance(raw_tool_calls, list):
159
+ return effect
160
+
161
+ tool_calls: list[Any] = []
162
+ for idx, tc in enumerate(raw_tool_calls):
163
+ if not isinstance(tc, dict):
164
+ tool_calls.append(tc)
165
+ continue
166
+
167
+ tc2 = dict(tc)
168
+ runtime_call_id = tc2.get("runtime_call_id")
169
+ runtime_call_id_str = str(runtime_call_id).strip() if runtime_call_id is not None else ""
170
+ if not runtime_call_id_str:
171
+ runtime_call_id_str = f"{_RUNTIME_TOOL_CALL_ID_PREFIX}{idempotency_key}_{idx+1}"
172
+ tc2["runtime_call_id"] = runtime_call_id_str
173
+
174
+ call_id = tc2.get("call_id")
175
+ if call_id is None:
176
+ call_id = tc2.get("id")
177
+ call_id_str = str(call_id).strip() if call_id is not None else ""
178
+ if call_id_str:
179
+ tc2["call_id"] = call_id_str
180
+ else:
181
+ # When the model/provider didn't emit a call id (or the caller omitted it),
182
+ # fall back to a runtime-owned stable id so result correlation still works.
183
+ tc2["call_id"] = runtime_call_id_str
184
+
185
+ name = tc2.get("name")
186
+ if isinstance(name, str):
187
+ tc2["name"] = name.strip()
188
+
189
+ tool_calls.append(tc2)
190
+
191
+ payload["tool_calls"] = tool_calls
192
+
193
+ allowed_tools = payload.get("allowed_tools")
194
+ if isinstance(allowed_tools, list):
195
+ uniq = {
196
+ str(t).strip()
197
+ for t in allowed_tools
198
+ if isinstance(t, str) and t.strip()
199
+ }
200
+ payload["allowed_tools"] = sorted(uniq)
201
+
202
+ return Effect(type=effect.type, payload=payload, result_key=effect.result_key)
203
+
204
+ def _maybe_inject_llm_call_grounding_for_ledger(*, effect: Effect) -> Effect:
205
+ """Inject per-call time/location grounding into LLM_CALL payloads for auditability.
206
+
207
+ Why:
208
+ - The ledger is the replay/source-of-truth for thin clients.
209
+ - Grounding is injected at the integration boundary (AbstractCore LLM client) so the
210
+ model always knows "when/where" it is.
211
+ - But that injection historically happened *after* the runtime recorded the LLM_CALL
212
+ payload, making it appear missing in ledger UIs.
213
+
214
+ Contract:
215
+ - Only mutates the effect payload (never the durable run context/messages).
216
+ - Must not influence idempotency keys; callers should compute idempotency before calling this.
217
+ """
218
+
219
+ if effect.type != EffectType.LLM_CALL:
220
+ return effect
221
+ if not isinstance(effect.payload, dict):
222
+ return effect
223
+
224
+ payload = dict(effect.payload)
225
+ prompt = payload.get("prompt")
226
+ messages = payload.get("messages")
227
+ prompt_str = str(prompt or "")
228
+ messages_list = messages if isinstance(messages, list) else None
229
+
230
+ try:
231
+ from abstractruntime.integrations.abstractcore.llm_client import _inject_turn_grounding
232
+ except Exception:
233
+ return effect
234
+
235
+ updated_prompt, updated_messages = _inject_turn_grounding(prompt=prompt_str, messages=messages_list)
236
+
237
+ changed = False
238
+ if updated_prompt != prompt_str:
239
+ payload["prompt"] = updated_prompt
240
+ changed = True
241
+
242
+ if messages_list is not None:
243
+ if updated_messages != messages_list:
244
+ payload["messages"] = updated_messages
245
+ changed = True
246
+
247
+ return Effect(type=effect.type, payload=payload, result_key=effect.result_key) if changed else effect
248
+
249
+
250
+ def _ensure_runtime_namespace(vars: Dict[str, Any]) -> Dict[str, Any]:
251
+ runtime_ns = vars.get("_runtime")
252
+ if not isinstance(runtime_ns, dict):
253
+ runtime_ns = {}
254
+ vars["_runtime"] = runtime_ns
255
+ return runtime_ns
256
+
257
+
258
+ def _ensure_control_namespace(vars: Dict[str, Any]) -> Dict[str, Any]:
259
+ runtime_ns = _ensure_runtime_namespace(vars)
260
+ control = runtime_ns.get("control")
261
+ if not isinstance(control, dict):
262
+ control = {}
263
+ runtime_ns["control"] = control
264
+ return control
265
+
266
+
267
+ def _is_paused_run_vars(vars: Any) -> bool:
268
+ if not isinstance(vars, dict):
269
+ return False
270
+ runtime_ns = vars.get("_runtime")
271
+ if not isinstance(runtime_ns, dict):
272
+ return False
273
+ control = runtime_ns.get("control")
274
+ if not isinstance(control, dict):
275
+ return False
276
+ return bool(control.get("paused") is True)
277
+
278
+
279
+ def _is_pause_wait(waiting: Any, *, run_id: str) -> bool:
280
+ if waiting is None:
281
+ return False
282
+ try:
283
+ reason = getattr(waiting, "reason", None)
284
+ reason_value = reason.value if hasattr(reason, "value") else str(reason) if reason else None
285
+ except Exception:
286
+ reason_value = None
287
+ if reason_value != WaitReason.USER.value:
288
+ return False
289
+ try:
290
+ wait_key = getattr(waiting, "wait_key", None)
291
+ if isinstance(wait_key, str) and wait_key == f"pause:{run_id}":
292
+ return True
293
+ except Exception:
294
+ pass
295
+ try:
296
+ details = getattr(waiting, "details", None)
297
+ if isinstance(details, dict) and details.get("kind") == "pause":
298
+ return True
299
+ except Exception:
300
+ pass
301
+ return False
302
+
303
+
304
+ def _record_node_trace(
305
+ *,
306
+ run: RunState,
307
+ node_id: str,
308
+ effect: Effect,
309
+ outcome: "EffectOutcome",
310
+ idempotency_key: Optional[str],
311
+ reused_prior_result: bool,
312
+ duration_ms: Optional[float] = None,
313
+ max_entries_per_node: int = 100,
314
+ ) -> None:
315
+ """Record a JSON-safe per-node execution trace in run.vars["_runtime"].
316
+
317
+ This trace is runtime-owned and durable (stored in RunStore checkpoints).
318
+ It exists to support higher-level hosts (AbstractFlow, AbstractCode, etc.)
319
+ that need structured "scratchpad"/debug information without inventing
320
+ host-specific persistence formats.
321
+ """
322
+
323
+ runtime_ns = _ensure_runtime_namespace(run.vars)
324
+ traces = runtime_ns.get("node_traces")
325
+ if not isinstance(traces, dict):
326
+ traces = {}
327
+ runtime_ns["node_traces"] = traces
328
+
329
+ node_trace = traces.get(node_id)
330
+ if not isinstance(node_trace, dict):
331
+ node_trace = {"node_id": node_id, "steps": []}
332
+ traces[node_id] = node_trace
333
+
334
+ steps = node_trace.get("steps")
335
+ if not isinstance(steps, list):
336
+ steps = []
337
+ node_trace["steps"] = steps
338
+
339
+ wait_dict: Optional[Dict[str, Any]] = None
340
+ if outcome.status == "waiting" and outcome.wait is not None:
341
+ w = outcome.wait
342
+ wait_dict = {
343
+ "reason": w.reason.value if hasattr(w.reason, "value") else str(w.reason),
344
+ "wait_key": w.wait_key,
345
+ "until": w.until,
346
+ "resume_to_node": w.resume_to_node,
347
+ "result_key": w.result_key,
348
+ "prompt": w.prompt,
349
+ "choices": w.choices,
350
+ "allow_free_text": w.allow_free_text,
351
+ "details": w.details,
352
+ }
353
+
354
+ entry: Dict[str, Any] = {
355
+ "ts": utc_now_iso(),
356
+ "node_id": node_id,
357
+ "status": outcome.status,
358
+ "idempotency_key": idempotency_key,
359
+ "reused_prior_result": reused_prior_result,
360
+ "effect": {
361
+ "type": effect.type.value,
362
+ "payload": effect.payload,
363
+ "result_key": effect.result_key,
364
+ },
365
+ }
366
+ if isinstance(duration_ms, (int, float)) and duration_ms >= 0:
367
+ # UI/UX consumers use this for per-step timing badges (kept JSON-safe).
368
+ entry["duration_ms"] = float(duration_ms)
369
+ if outcome.status == "completed":
370
+ entry["result"] = outcome.result
371
+ elif outcome.status == "failed":
372
+ entry["error"] = outcome.error
373
+ elif wait_dict is not None:
374
+ entry["wait"] = wait_dict
375
+
376
+ # Ensure the trace remains JSON-safe even if a handler violates the contract.
377
+ try:
378
+ json.dumps(entry)
379
+ except TypeError:
380
+ entry = {
381
+ "ts": entry.get("ts"),
382
+ "node_id": node_id,
383
+ "status": outcome.status,
384
+ "idempotency_key": idempotency_key,
385
+ "reused_prior_result": reused_prior_result,
386
+ "effect": {"type": effect.type.value, "result_key": effect.result_key},
387
+ "error": "non_json_safe_trace_entry",
388
+ }
389
+
390
+ steps.append(entry)
391
+ if max_entries_per_node > 0 and len(steps) > max_entries_per_node:
392
+ del steps[: max(0, len(steps) - max_entries_per_node)]
393
+ node_trace["updated_at"] = utc_now_iso()
394
+
395
+
48
396
  @dataclass
49
397
  class DefaultRunContext:
50
398
  def now_iso(self) -> str:
@@ -95,6 +443,7 @@ class Runtime:
95
443
  artifact_store: Optional[Any] = None,
96
444
  effect_policy: Optional[EffectPolicy] = None,
97
445
  config: Optional[RuntimeConfig] = None,
446
+ chat_summarizer: Optional[Any] = None,
98
447
  ):
99
448
  self._run_store = run_store
100
449
  self._ledger_store = ledger_store
@@ -103,6 +452,7 @@ class Runtime:
103
452
  self._artifact_store = artifact_store
104
453
  self._effect_policy: EffectPolicy = effect_policy or DefaultEffectPolicy()
105
454
  self._config: RuntimeConfig = config or RuntimeConfig()
455
+ self._chat_summarizer = chat_summarizer
106
456
 
107
457
  self._handlers: Dict[EffectType, EffectHandler] = {}
108
458
  self._register_builtin_handlers()
@@ -169,6 +519,43 @@ class Runtime:
169
519
  if "_limits" not in vars:
170
520
  vars["_limits"] = self._config.to_limits_dict()
171
521
 
522
+ # Ensure a durable `_runtime` namespace exists and seed default provider/model metadata
523
+ # from the Runtime config (best-effort).
524
+ #
525
+ # Rationale:
526
+ # - The Runtime is the orchestration authority (ADR-0001/0014), and `start()` is the
527
+ # choke point where durable run state is initialized.
528
+ # - Agents/workflows should not have to guess/duplicate routing metadata to make prompt
529
+ # composition decisions (e.g. native-tools => omit Tools(session) prompt catalogs).
530
+ runtime_ns = vars.get("_runtime")
531
+ if not isinstance(runtime_ns, dict):
532
+ runtime_ns = {}
533
+ vars["_runtime"] = runtime_ns
534
+ try:
535
+ provider_id = getattr(self._config, "provider", None)
536
+ model_id = getattr(self._config, "model", None)
537
+ if isinstance(provider_id, str) and provider_id.strip():
538
+ runtime_ns.setdefault("provider", provider_id.strip())
539
+ if isinstance(model_id, str) and model_id.strip():
540
+ runtime_ns.setdefault("model", model_id.strip())
541
+ except Exception:
542
+ pass
543
+
544
+ # Seed tool-support metadata from model capabilities (best-effort).
545
+ #
546
+ # This makes the native-vs-prompted tools decision explicit and durable in run state,
547
+ # so adapters/UI helpers don't have to guess or re-run AbstractCore detection logic.
548
+ try:
549
+ caps = getattr(self._config, "model_capabilities", None)
550
+ if isinstance(caps, dict):
551
+ tool_support = caps.get("tool_support")
552
+ if isinstance(tool_support, str) and tool_support.strip():
553
+ ts = tool_support.strip()
554
+ runtime_ns.setdefault("tool_support", ts)
555
+ runtime_ns.setdefault("supports_native_tools", ts == "native")
556
+ except Exception:
557
+ pass
558
+
172
559
  run = RunState.new(
173
560
  workflow_id=workflow.workflow_id,
174
561
  entry_node=workflow.entry_node,
@@ -205,6 +592,88 @@ class Runtime:
205
592
  run.status = RunStatus.CANCELLED
206
593
  run.error = reason or "Cancelled"
207
594
  run.waiting = None
595
+ try:
596
+ control = _ensure_control_namespace(run.vars)
597
+ control.pop("paused", None)
598
+ except Exception:
599
+ pass
600
+ run.updated_at = utc_now_iso()
601
+ self._run_store.save(run)
602
+ self._append_terminal_status_event(run)
603
+ return run
604
+
605
+ def pause_run(self, run_id: str, *, reason: Optional[str] = None) -> RunState:
606
+ """Pause a run (durably) until it is explicitly resumed.
607
+
608
+ Semantics:
609
+ - Pausing a RUNNING run transitions it to WAITING with a synthetic USER wait.
610
+ - Pausing a WAITING run (non-USER waits such as UNTIL/EVENT/SUBWORKFLOW) sets a
611
+ runtime-owned `paused` flag so schedulers/event emitters can skip it.
612
+ - Pausing an ASK_USER wait is a no-op (already blocked by user input).
613
+ """
614
+ run = self.get_state(run_id)
615
+
616
+ if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
617
+ return run
618
+
619
+ # If already paused, keep as-is.
620
+ if _is_paused_run_vars(run.vars):
621
+ return run
622
+
623
+ # Don't interfere with real user prompts (ASK_USER).
624
+ if run.status == RunStatus.WAITING and run.waiting is not None:
625
+ if getattr(run.waiting, "reason", None) == WaitReason.USER and not _is_pause_wait(run.waiting, run_id=run_id):
626
+ return run
627
+
628
+ control = _ensure_control_namespace(run.vars)
629
+ control["paused"] = True
630
+ control["paused_at"] = utc_now_iso()
631
+ if isinstance(reason, str) and reason.strip():
632
+ control["pause_reason"] = reason.strip()
633
+
634
+ if run.status == RunStatus.RUNNING:
635
+ run.status = RunStatus.WAITING
636
+ run.waiting = WaitState(
637
+ reason=WaitReason.USER,
638
+ wait_key=f"pause:{run.run_id}",
639
+ resume_to_node=run.current_node,
640
+ prompt="Paused",
641
+ choices=None,
642
+ allow_free_text=False,
643
+ details={"kind": "pause"},
644
+ )
645
+
646
+ run.updated_at = utc_now_iso()
647
+ self._run_store.save(run)
648
+ return run
649
+
650
+ def resume_run(self, run_id: str) -> RunState:
651
+ """Resume a previously paused run (durably).
652
+
653
+ If the run was paused while RUNNING, this clears the synthetic pause wait
654
+ and returns the run to RUNNING. If the run was paused while WAITING
655
+ (UNTIL/EVENT/SUBWORKFLOW), this only clears the paused flag.
656
+ """
657
+ run = self.get_state(run_id)
658
+
659
+ if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
660
+ return run
661
+
662
+ if not _is_paused_run_vars(run.vars):
663
+ return run
664
+
665
+ try:
666
+ control = _ensure_control_namespace(run.vars)
667
+ control.pop("paused", None)
668
+ control.pop("pause_reason", None)
669
+ control["resumed_at"] = utc_now_iso()
670
+ except Exception:
671
+ pass
672
+
673
+ if run.status == RunStatus.WAITING and _is_pause_wait(run.waiting, run_id=run_id):
674
+ resume_to = getattr(run.waiting, "resume_to_node", None)
675
+ self._apply_resume_payload(run, payload={}, override_node=resume_to)
676
+
208
677
  run.updated_at = utc_now_iso()
209
678
  self._run_store.save(run)
210
679
  return run
@@ -218,6 +687,83 @@ class Runtime:
218
687
  def get_ledger(self, run_id: str) -> list[dict[str, Any]]:
219
688
  return self._ledger_store.list(run_id)
220
689
 
690
+ def subscribe_ledger(
691
+ self,
692
+ callback: Callable[[Dict[str, Any]], None],
693
+ *,
694
+ run_id: Optional[str] = None,
695
+ ) -> Callable[[], None]:
696
+ """Subscribe to ledger append events (in-process only).
697
+
698
+ This is an optional capability: not all LedgerStore implementations
699
+ support subscriptions. When unavailable, wrap the configured store with
700
+ `abstractruntime.storage.observable.ObservableLedgerStore`.
701
+ """
702
+ subscribe = getattr(self._ledger_store, "subscribe", None)
703
+ if not callable(subscribe):
704
+ raise RuntimeError(
705
+ "Configured LedgerStore does not support subscriptions. "
706
+ "Wrap it with ObservableLedgerStore to enable `subscribe_ledger()`."
707
+ )
708
+ return subscribe(callback, run_id=run_id)
709
+
710
+ # ---------------------------------------------------------------------
711
+ # Trace Helpers (Runtime-Owned)
712
+ # ---------------------------------------------------------------------
713
+
714
+ def get_node_traces(self, run_id: str) -> Dict[str, Any]:
715
+ """Return runtime-owned per-node traces for a run.
716
+
717
+ Traces are stored in `RunState.vars["_runtime"]["node_traces"]`.
718
+ Returns a deep copy so callers can safely inspect without mutating the run.
719
+ """
720
+ run = self.get_state(run_id)
721
+ runtime_ns = run.vars.get("_runtime")
722
+ traces = runtime_ns.get("node_traces") if isinstance(runtime_ns, dict) else None
723
+ return copy.deepcopy(traces) if isinstance(traces, dict) else {}
724
+
725
+ def get_node_trace(self, run_id: str, node_id: str) -> Dict[str, Any]:
726
+ """Return a single node trace object for a run.
727
+
728
+ Returns an empty `{node_id, steps: []}` object when missing.
729
+ """
730
+ traces = self.get_node_traces(run_id)
731
+ trace = traces.get(node_id)
732
+ if isinstance(trace, dict):
733
+ return trace
734
+ return {"node_id": node_id, "steps": []}
735
+
736
+ # ---------------------------------------------------------------------
737
+ # Evidence Helpers (Runtime-Owned)
738
+ # ---------------------------------------------------------------------
739
+
740
+ def list_evidence(self, run_id: str) -> list[dict[str, Any]]:
741
+ """List evidence records for a run (index entries only).
742
+
743
+ Evidence is indexed as `kind="evidence"` items inside `vars["_runtime"]["memory_spans"]`.
744
+ """
745
+ run = self.get_state(run_id)
746
+ runtime_ns = run.vars.get("_runtime")
747
+ spans = runtime_ns.get("memory_spans") if isinstance(runtime_ns, dict) else None
748
+ if not isinstance(spans, list):
749
+ return []
750
+ out: list[dict[str, Any]] = []
751
+ for s in spans:
752
+ if not isinstance(s, dict):
753
+ continue
754
+ if s.get("kind") != "evidence":
755
+ continue
756
+ out.append(copy.deepcopy(s))
757
+ return out
758
+
759
+ def load_evidence(self, evidence_id: str) -> Optional[dict[str, Any]]:
760
+ """Load an evidence record payload from ArtifactStore by id."""
761
+ artifact_store = self._artifact_store
762
+ if artifact_store is None:
763
+ raise RuntimeError("Evidence requires an ArtifactStore; configure runtime.set_artifact_store(...)")
764
+ payload = artifact_store.load_json(str(evidence_id))
765
+ return payload if isinstance(payload, dict) else None
766
+
221
767
  # ---------------------------------------------------------------------
222
768
  # Limit Management
223
769
  # ---------------------------------------------------------------------
@@ -243,10 +789,14 @@ class Runtime:
243
789
  def pct(current: int, maximum: int) -> float:
244
790
  return round(current / maximum * 100, 1) if maximum > 0 else 0
245
791
 
792
+ from .vars import DEFAULT_MAX_TOKENS
793
+
246
794
  current_iter = int(limits.get("current_iteration", 0) or 0)
247
795
  max_iter = int(limits.get("max_iterations", 25) or 25)
248
796
  tokens_used = int(limits.get("estimated_tokens_used", 0) or 0)
249
- max_tokens = int(limits.get("max_tokens", 32768) or 32768)
797
+ max_tokens = int(limits.get("max_tokens", DEFAULT_MAX_TOKENS) or DEFAULT_MAX_TOKENS)
798
+ max_input_tokens = limits.get("max_input_tokens")
799
+ max_output_tokens = limits.get("max_output_tokens")
250
800
 
251
801
  return {
252
802
  "iterations": {
@@ -258,6 +808,8 @@ class Runtime:
258
808
  "tokens": {
259
809
  "estimated_used": tokens_used,
260
810
  "max": max_tokens,
811
+ "max_input_tokens": max_input_tokens,
812
+ "max_output_tokens": max_output_tokens,
261
813
  "pct": pct(tokens_used, max_tokens),
262
814
  "warning": pct(tokens_used, max_tokens) >= limits.get("warn_tokens_pct", 80),
263
815
  },
@@ -294,7 +846,9 @@ class Runtime:
294
846
 
295
847
  # Check tokens
296
848
  tokens_used = int(limits.get("estimated_tokens_used", 0) or 0)
297
- max_tokens = int(limits.get("max_tokens", 32768) or 32768)
849
+ from .vars import DEFAULT_MAX_TOKENS
850
+
851
+ max_tokens = int(limits.get("max_tokens", DEFAULT_MAX_TOKENS) or DEFAULT_MAX_TOKENS)
298
852
  warn_tokens_pct = int(limits.get("warn_tokens_pct", 80) or 80)
299
853
 
300
854
  if max_tokens > 0 and tokens_used > 0:
@@ -325,6 +879,7 @@ class Runtime:
325
879
  "max_iterations",
326
880
  "max_tokens",
327
881
  "max_output_tokens",
882
+ "max_input_tokens",
328
883
  "max_history_messages",
329
884
  "warn_iterations_pct",
330
885
  "warn_tokens_pct",
@@ -338,9 +893,41 @@ class Runtime:
338
893
 
339
894
  self._run_store.save(run)
340
895
 
896
+ def _append_terminal_status_event(self, run: RunState) -> None:
897
+ """Best-effort: append a durable `abstract.status` event on terminal runs.
898
+
899
+ This exists for UI clients that rely on `emit_event` records (e.g. status bars)
900
+ and should not be required for correctness. Failures must be non-fatal.
901
+ """
902
+ try:
903
+ status = getattr(getattr(run, "status", None), "value", None) or str(getattr(run, "status", "") or "")
904
+ status_str = str(status or "").strip().lower()
905
+ if status_str not in {RunStatus.COMPLETED.value, RunStatus.FAILED.value, RunStatus.CANCELLED.value}:
906
+ return
907
+
908
+ node_id = str(getattr(run, "current_node", None) or "").strip() or "runtime"
909
+ eff = Effect(
910
+ type=EffectType.EMIT_EVENT,
911
+ payload={"name": "abstract.status", "scope": "session", "payload": {"text": status_str}},
912
+ )
913
+ rec = StepRecord.start(
914
+ run=run,
915
+ node_id=node_id,
916
+ effect=eff,
917
+ idempotency_key=f"system:terminal_status:{status_str}",
918
+ )
919
+ rec.finish_success({"emitted": True, "name": "abstract.status", "payload": {"text": status_str}})
920
+ self._ledger_store.append(rec)
921
+ except Exception:
922
+ # Observability must never compromise durability/execution.
923
+ return
924
+
341
925
  def tick(self, *, workflow: WorkflowSpec, run_id: str, max_steps: int = 100) -> RunState:
342
926
  run = self.get_state(run_id)
343
- if run.status in (RunStatus.COMPLETED, RunStatus.FAILED):
927
+ # Terminal runs never progress.
928
+ if run.status in (RunStatus.COMPLETED, RunStatus.FAILED, RunStatus.CANCELLED):
929
+ return run
930
+ if _is_paused_run_vars(run.vars):
344
931
  return run
345
932
  if run.status == RunStatus.WAITING:
346
933
  # For WAIT_UNTIL we can auto-unblock if time passed
@@ -352,15 +939,40 @@ class Runtime:
352
939
  else:
353
940
  return run
354
941
 
942
+ # IMPORTANT (Web hosts / concurrency):
943
+ # A run may be paused/cancelled by an external control plane (e.g. AbstractFlow Web UI)
944
+ # while we're blocked inside a long-running effect (LLM/tool execution).
945
+ #
946
+ # We make `tick()` resilient to that by re-loading the persisted RunState before
947
+ # committing any updates. If an external pause/cancel is observed, we stop without
948
+ # overwriting it.
949
+ def _abort_if_externally_controlled() -> Optional[RunState]:
950
+ try:
951
+ latest = self.get_state(run_id)
952
+ except Exception:
953
+ return None
954
+ if latest.status == RunStatus.CANCELLED:
955
+ return latest
956
+ if _is_paused_run_vars(latest.vars):
957
+ return latest
958
+ return None
959
+
355
960
  steps = 0
356
961
  while steps < max_steps:
357
962
  steps += 1
358
963
 
964
+ controlled = _abort_if_externally_controlled()
965
+ if controlled is not None:
966
+ return controlled
967
+
359
968
  handler = workflow.get_node(run.current_node)
360
969
  plan = handler(run, self._ctx)
361
970
 
362
971
  # Completion
363
972
  if plan.complete_output is not None:
973
+ controlled = _abort_if_externally_controlled()
974
+ if controlled is not None:
975
+ return controlled
364
976
  run.status = RunStatus.COMPLETED
365
977
  run.output = plan.complete_output
366
978
  run.updated_at = utc_now_iso()
@@ -368,48 +980,127 @@ class Runtime:
368
980
  # ledger: completion record (no effect)
369
981
  rec = StepRecord.start(run=run, node_id=plan.node_id, effect=None)
370
982
  rec.status = StepStatus.COMPLETED
371
- rec.result = {"completed": True}
983
+ rec.result = {"completed": True, "output": _jsonable(run.output)}
372
984
  rec.ended_at = utc_now_iso()
373
985
  self._ledger_store.append(rec)
986
+ self._append_terminal_status_event(run)
374
987
  return run
375
988
 
376
989
  # Pure transition
377
990
  if plan.effect is None:
378
991
  if not plan.next_node:
379
992
  raise ValueError(f"Node '{plan.node_id}' returned no effect and no next_node")
993
+ controlled = _abort_if_externally_controlled()
994
+ if controlled is not None:
995
+ return controlled
380
996
  run.current_node = plan.next_node
381
997
  run.updated_at = utc_now_iso()
382
998
  self._run_store.save(run)
383
999
  continue
384
1000
 
385
1001
  # Effectful step - check for prior completed result (idempotency)
1002
+ effect = plan.effect
386
1003
  idempotency_key = self._effect_policy.idempotency_key(
387
- run=run, node_id=plan.node_id, effect=plan.effect
1004
+ run=run, node_id=plan.node_id, effect=effect
388
1005
  )
1006
+ effect = _ensure_tool_calls_have_runtime_ids(effect=effect, idempotency_key=idempotency_key)
389
1007
  prior_result = self._find_prior_completed_result(run.run_id, idempotency_key)
1008
+ reused_prior_result = prior_result is not None
1009
+
1010
+ # Measure effect execution duration (wall-clock). This is used for
1011
+ # host-side UX (badges, throughput estimates) and is stored in the
1012
+ # runtime-owned node trace (JSON-safe).
1013
+ import time
1014
+ t0 = time.perf_counter()
390
1015
 
391
1016
  if prior_result is not None:
392
1017
  # Reuse prior result - skip re-execution
393
1018
  outcome = EffectOutcome.completed(prior_result)
394
1019
  else:
1020
+ # For LLM calls, inject runtime grounding into the effect payload so ledger consumers
1021
+ # can see exactly what the model was sent (timestamp + country), without mutating the
1022
+ # durable run context.
1023
+ effect = _maybe_inject_llm_call_grounding_for_ledger(effect=effect)
395
1024
  # Execute with retry logic
396
1025
  outcome = self._execute_effect_with_retry(
397
1026
  run=run,
398
1027
  node_id=plan.node_id,
399
- effect=plan.effect,
1028
+ effect=effect,
400
1029
  idempotency_key=idempotency_key,
401
1030
  default_next_node=plan.next_node,
402
1031
  )
403
1032
 
1033
+ duration_ms = float((time.perf_counter() - t0) * 1000.0)
1034
+
1035
+ # Evidence capture (runtime-owned, durable):
1036
+ # After tool execution completes, record provenance-first evidence for a small set of
1037
+ # external-boundary tools (web_search/fetch_url/execute_command). This must happen
1038
+ # BEFORE we persist node traces / result_key outputs so run state remains bounded.
1039
+ try:
1040
+ if (
1041
+ not reused_prior_result
1042
+ and effect.type == EffectType.TOOL_CALLS
1043
+ and outcome.status == "completed"
1044
+ ):
1045
+ self._maybe_record_tool_evidence(
1046
+ run=run,
1047
+ node_id=plan.node_id,
1048
+ effect=effect,
1049
+ tool_results=outcome.result,
1050
+ )
1051
+ except Exception:
1052
+ # Evidence capture should never crash the run; failures are recorded in run vars.
1053
+ pass
1054
+
1055
+ _record_node_trace(
1056
+ run=run,
1057
+ node_id=plan.node_id,
1058
+ effect=effect,
1059
+ outcome=outcome,
1060
+ idempotency_key=idempotency_key,
1061
+ reused_prior_result=reused_prior_result,
1062
+ duration_ms=duration_ms,
1063
+ )
1064
+
1065
+ # Best-effort token observability: surface last-known input token usage in `_limits`.
1066
+ #
1067
+ # AbstractCore responses generally populate `usage` (prompt/input/output/total tokens).
1068
+ # We store the input-side usage as `estimated_tokens_used` so host UIs and workflows
1069
+ # can reason about compaction budgets without re-tokenizing.
1070
+ try:
1071
+ if effect.type == EffectType.LLM_CALL and outcome.status == "completed" and isinstance(outcome.result, dict):
1072
+ usage = outcome.result.get("usage")
1073
+ if isinstance(usage, dict):
1074
+ raw_in = usage.get("input_tokens")
1075
+ if raw_in is None:
1076
+ raw_in = usage.get("prompt_tokens")
1077
+ if raw_in is None:
1078
+ raw_in = usage.get("total_tokens")
1079
+ if raw_in is not None and not isinstance(raw_in, bool):
1080
+ limits = run.vars.get("_limits")
1081
+ if not isinstance(limits, dict):
1082
+ limits = {}
1083
+ run.vars["_limits"] = limits
1084
+ limits["estimated_tokens_used"] = int(raw_in)
1085
+ except Exception:
1086
+ pass
1087
+
404
1088
  if outcome.status == "failed":
1089
+ controlled = _abort_if_externally_controlled()
1090
+ if controlled is not None:
1091
+ return controlled
405
1092
  run.status = RunStatus.FAILED
406
1093
  run.error = outcome.error or "unknown error"
407
1094
  run.updated_at = utc_now_iso()
408
1095
  self._run_store.save(run)
1096
+ self._append_terminal_status_event(run)
409
1097
  return run
410
1098
 
411
1099
  if outcome.status == "waiting":
412
1100
  assert outcome.wait is not None
1101
+ controlled = _abort_if_externally_controlled()
1102
+ if controlled is not None:
1103
+ return controlled
413
1104
  run.status = RunStatus.WAITING
414
1105
  run.waiting = outcome.wait
415
1106
  run.updated_at = utc_now_iso()
@@ -417,19 +1108,89 @@ class Runtime:
417
1108
  return run
418
1109
 
419
1110
  # completed
420
- if plan.effect.result_key and outcome.result is not None:
421
- _set_nested(run.vars, plan.effect.result_key, outcome.result)
422
-
1111
+ if effect.result_key and outcome.result is not None:
1112
+ _set_nested(run.vars, effect.result_key, outcome.result)
1113
+
1114
+ # Terminal effect node: treat missing next_node as completion.
1115
+ #
1116
+ # Rationale: StepPlan.complete_output is evaluated *before* effects
1117
+ # execute, so an effectful node cannot both execute an effect and
1118
+ # complete the run in a single StepPlan. Allowing next_node=None
1119
+ # makes "end on an effect node" valid (Blueprint-style UX).
423
1120
  if not plan.next_node:
424
- raise ValueError(f"Node '{plan.node_id}' executed effect but did not specify next_node")
1121
+ controlled = _abort_if_externally_controlled()
1122
+ if controlled is not None:
1123
+ return controlled
1124
+ run.status = RunStatus.COMPLETED
1125
+ run.output = {"success": True, "result": outcome.result}
1126
+ run.updated_at = utc_now_iso()
1127
+ self._run_store.save(run)
1128
+ self._append_terminal_status_event(run)
1129
+ return run
1130
+ controlled = _abort_if_externally_controlled()
1131
+ if controlled is not None:
1132
+ return controlled
425
1133
  run.current_node = plan.next_node
426
1134
  run.updated_at = utc_now_iso()
427
1135
  self._run_store.save(run)
428
1136
 
429
1137
  return run
430
1138
 
431
- def resume(self, *, workflow: WorkflowSpec, run_id: str, wait_key: Optional[str], payload: Dict[str, Any]) -> RunState:
1139
+ def _maybe_record_tool_evidence(
1140
+ self,
1141
+ *,
1142
+ run: RunState,
1143
+ node_id: str,
1144
+ effect: Effect,
1145
+ tool_results: Optional[Dict[str, Any]],
1146
+ ) -> None:
1147
+ """Best-effort evidence capture for TOOL_CALLS.
1148
+
1149
+ This is intentionally non-fatal: evidence capture must not crash the run,
1150
+ but failures should be visible in durable run state for debugging.
1151
+ """
1152
+ if effect.type != EffectType.TOOL_CALLS:
1153
+ return
1154
+ if not isinstance(tool_results, dict):
1155
+ return
1156
+ payload = effect.payload if isinstance(effect.payload, dict) else {}
1157
+ tool_calls = payload.get("tool_calls")
1158
+ if not isinstance(tool_calls, list) or not tool_calls:
1159
+ return
1160
+
1161
+ artifact_store = self._artifact_store
1162
+ if artifact_store is None:
1163
+ return
1164
+
1165
+ try:
1166
+ from ..evidence import EvidenceRecorder
1167
+
1168
+ EvidenceRecorder(artifact_store=artifact_store).record_tool_calls(
1169
+ run=run,
1170
+ node_id=str(node_id or ""),
1171
+ tool_calls=list(tool_calls),
1172
+ tool_results=tool_results,
1173
+ )
1174
+ except Exception as e:
1175
+ runtime_ns = _ensure_runtime_namespace(run.vars)
1176
+ warnings = runtime_ns.get("evidence_warnings")
1177
+ if not isinstance(warnings, list):
1178
+ warnings = []
1179
+ runtime_ns["evidence_warnings"] = warnings
1180
+ warnings.append({"ts": utc_now_iso(), "node_id": str(node_id or ""), "error": str(e)})
1181
+
1182
+ def resume(
1183
+ self,
1184
+ *,
1185
+ workflow: WorkflowSpec,
1186
+ run_id: str,
1187
+ wait_key: Optional[str],
1188
+ payload: Dict[str, Any],
1189
+ max_steps: int = 100,
1190
+ ) -> RunState:
432
1191
  run = self.get_state(run_id)
1192
+ if _is_paused_run_vars(run.vars):
1193
+ raise ValueError("Run is paused")
433
1194
  if run.status != RunStatus.WAITING or run.waiting is None:
434
1195
  raise ValueError("Run is not waiting")
435
1196
 
@@ -440,14 +1201,256 @@ class Runtime:
440
1201
  resume_to = run.waiting.resume_to_node
441
1202
  result_key = run.waiting.result_key
442
1203
 
1204
+ # Keep track of what we actually persisted for this resume (tool resumes may
1205
+ # merge blocked-by-allowlist entries back into the payload).
1206
+ stored_payload: Dict[str, Any] = payload
1207
+
443
1208
  if result_key:
444
- _set_nested(run.vars, result_key, payload)
1209
+ details = run.waiting.details if run.waiting is not None else None
1210
+
1211
+ # Special case: subworkflow completion resumed as a tool-style observation.
1212
+ if (
1213
+ run.waiting.reason == WaitReason.SUBWORKFLOW
1214
+ and isinstance(details, dict)
1215
+ and bool(details.get("wrap_as_tool_result", False))
1216
+ and isinstance(payload, dict)
1217
+ and not ("mode" in payload and "results" in payload)
1218
+ ):
1219
+ tool_name = str(details.get("tool_name") or "start_subworkflow").strip() or "start_subworkflow"
1220
+ call_id = str(details.get("call_id") or "subworkflow").strip() or "subworkflow"
1221
+ sub_run_id = str(payload.get("sub_run_id") or details.get("sub_run_id") or "").strip()
1222
+ child_output = payload.get("output")
1223
+
1224
+ answer = ""
1225
+ report = ""
1226
+ err = None
1227
+ success = True
1228
+ if isinstance(child_output, dict):
1229
+ # Generic failure envelope support (VisualFlow style).
1230
+ if child_output.get("success") is False:
1231
+ success = False
1232
+ err = str(child_output.get("error") or "Subworkflow failed")
1233
+ a = child_output.get("answer")
1234
+ if isinstance(a, str) and a.strip():
1235
+ answer = a.strip()
1236
+ r = child_output.get("report")
1237
+ if isinstance(r, str) and r.strip():
1238
+ report = r.strip()
1239
+
1240
+ if not answer:
1241
+ if isinstance(child_output, str) and child_output.strip():
1242
+ answer = child_output.strip()
1243
+ else:
1244
+ try:
1245
+ answer = json.dumps(child_output, ensure_ascii=False)
1246
+ except Exception:
1247
+ answer = "" if child_output is None else str(child_output)
1248
+
1249
+ tool_output: Dict[str, Any] = {"rendered": answer, "answer": answer, "sub_run_id": sub_run_id}
1250
+ if report and len(report) <= 4000:
1251
+ tool_output["report"] = report
1252
+
1253
+ merged_payload: Dict[str, Any] = {
1254
+ "mode": "executed",
1255
+ "results": [
1256
+ {
1257
+ "call_id": call_id,
1258
+ "name": tool_name,
1259
+ "success": bool(success),
1260
+ "output": tool_output if success else None,
1261
+ "error": None if success else err,
1262
+ }
1263
+ ],
1264
+ }
1265
+ else:
1266
+ # Tool waits may carry blocked-by-allowlist metadata. External hosts typically only execute
1267
+ # the filtered subset of tool calls and resume with results for those calls. To keep agent
1268
+ # semantics correct (and evidence indices aligned), merge blocked entries back into the
1269
+ # resumed payload deterministically.
1270
+ merged_payload = payload
1271
+ try:
1272
+ if isinstance(details, dict):
1273
+ blocked = details.get("blocked_by_index")
1274
+ pre_results = details.get("pre_results_by_index")
1275
+ original_count = details.get("original_call_count")
1276
+ results = payload.get("results") if isinstance(payload, dict) else None
1277
+ fixed_by_index: Dict[str, Any] = {}
1278
+ if isinstance(blocked, dict):
1279
+ fixed_by_index.update(blocked)
1280
+ if isinstance(pre_results, dict):
1281
+ fixed_by_index.update(pre_results)
1282
+ if (
1283
+ fixed_by_index
1284
+ and isinstance(original_count, int)
1285
+ and original_count > 0
1286
+ and isinstance(results, list)
1287
+ and len(results) != original_count
1288
+ ):
1289
+ merged_results: list[Any] = []
1290
+ executed_iter = iter(results)
1291
+
1292
+ for idx in range(original_count):
1293
+ fixed_entry = fixed_by_index.get(str(idx))
1294
+ if isinstance(fixed_entry, dict):
1295
+ merged_results.append(fixed_entry)
1296
+ continue
1297
+ try:
1298
+ merged_results.append(next(executed_iter))
1299
+ except StopIteration:
1300
+ merged_results.append(
1301
+ {
1302
+ "call_id": "",
1303
+ "runtime_call_id": None,
1304
+ "name": "",
1305
+ "success": False,
1306
+ "output": None,
1307
+ "error": "Missing tool result",
1308
+ }
1309
+ )
1310
+
1311
+ merged_payload = dict(payload)
1312
+ merged_payload["results"] = merged_results
1313
+ merged_payload.setdefault("mode", "executed")
1314
+ except Exception:
1315
+ merged_payload = payload
1316
+
1317
+ _set_nested(run.vars, result_key, merged_payload)
1318
+ stored_payload = merged_payload
1319
+ # Passthrough tool execution: the host resumes with tool results. We still want
1320
+ # evidence capture and payload-bounding (store large parts as artifacts) before
1321
+ # the run continues.
1322
+ try:
1323
+ details = run.waiting.details if run.waiting is not None else None
1324
+ tool_calls_for_evidence = None
1325
+ if isinstance(details, dict):
1326
+ tool_calls_for_evidence = details.get("tool_calls_for_evidence")
1327
+ if not isinstance(tool_calls_for_evidence, list):
1328
+ tool_calls_for_evidence = details.get("tool_calls")
1329
+
1330
+ if isinstance(tool_calls_for_evidence, list):
1331
+ from ..evidence import EvidenceRecorder
1332
+
1333
+ artifact_store = self._artifact_store
1334
+ if artifact_store is not None and isinstance(payload, dict):
1335
+ EvidenceRecorder(artifact_store=artifact_store).record_tool_calls(
1336
+ run=run,
1337
+ node_id=str(run.current_node or ""),
1338
+ tool_calls=list(tool_calls_for_evidence or []),
1339
+ tool_results=merged_payload,
1340
+ )
1341
+ except Exception:
1342
+ pass
1343
+
1344
+ # Append a durable "resume" record to the ledger for replay-first clients.
1345
+ #
1346
+ # Why:
1347
+ # - The ledger is the source-of-truth for replay/streaming (ADR-0011/0018).
1348
+ # - Without a resume record, user input payloads (ASK_USER / abstract.ask / tool approvals)
1349
+ # only live in RunState.vars and are not visible during ledger-only replay.
1350
+ #
1351
+ # This is best-effort: failure to append must not compromise correctness.
1352
+ try:
1353
+ wait_before = run.waiting
1354
+ wait_reason_value = None
1355
+ wait_key_value = None
1356
+ try:
1357
+ if wait_before is not None:
1358
+ r0 = getattr(wait_before, "reason", None)
1359
+ wait_reason_value = r0.value if hasattr(r0, "value") else str(r0) if r0 is not None else None
1360
+ wait_key_value = getattr(wait_before, "wait_key", None)
1361
+ except Exception:
1362
+ wait_reason_value = None
1363
+ wait_key_value = None
1364
+
1365
+ payload_for_ledger: Any = stored_payload
1366
+ try:
1367
+ from ..storage.offloading import _default_max_inline_bytes, offload_large_values
1368
+
1369
+ if self._artifact_store is not None:
1370
+ payload_for_ledger = offload_large_values(
1371
+ stored_payload,
1372
+ artifact_store=self._artifact_store,
1373
+ run_id=str(run.run_id or ""),
1374
+ max_inline_bytes=_default_max_inline_bytes(),
1375
+ base_tags={"source": "resume", "kind": "resume_payload"},
1376
+ root_path="resume.payload",
1377
+ allow_root_replace=False,
1378
+ )
1379
+ except Exception:
1380
+ payload_for_ledger = stored_payload
1381
+
1382
+ node_id0 = str(getattr(run, "current_node", None) or "")
1383
+ rec = StepRecord.start(run=run, node_id=node_id0 or "unknown", effect=None)
1384
+ rec.status = StepStatus.COMPLETED
1385
+ rec.effect = {
1386
+ "type": "resume",
1387
+ "payload": {
1388
+ "wait_reason": wait_reason_value,
1389
+ "wait_key": wait_key_value,
1390
+ "resume_to_node": resume_to,
1391
+ "result_key": result_key,
1392
+ "payload": payload_for_ledger,
1393
+ },
1394
+ "result_key": None,
1395
+ }
1396
+ rec.result = {"resumed": True}
1397
+ rec.ended_at = utc_now_iso()
1398
+ self._ledger_store.append(rec)
1399
+ except Exception:
1400
+ pass
1401
+
1402
+ # Terminal waiting node: if there is no resume target, treat the resume payload as
1403
+ # the final output instead of re-executing the waiting node again (which would
1404
+ # otherwise create an infinite wait/resume loop).
1405
+ if resume_to is None:
1406
+ # Capture the wait context for observability before clearing it.
1407
+ wait_before = run.waiting
1408
+ wait_reason = None
1409
+ wait_key0 = None
1410
+ try:
1411
+ if wait_before is not None:
1412
+ r0 = getattr(wait_before, "reason", None)
1413
+ wait_reason = r0.value if hasattr(r0, "value") else str(r0) if r0 is not None else None
1414
+ wait_key0 = getattr(wait_before, "wait_key", None)
1415
+ except Exception:
1416
+ wait_reason = None
1417
+ wait_key0 = None
1418
+
1419
+ run.status = RunStatus.COMPLETED
1420
+ run.waiting = None
1421
+ run.output = {"success": True, "result": stored_payload}
1422
+ run.updated_at = utc_now_iso()
1423
+ self._run_store.save(run)
1424
+
1425
+ # Ledger must remain the source-of-truth for replay/streaming.
1426
+ # When a terminal wait is resumed, there is no follow-up `tick()` to append a
1427
+ # completion record, so we append one here.
1428
+ try:
1429
+ node_id0 = str(getattr(run, "current_node", None) or "")
1430
+ rec = StepRecord.start(run=run, node_id=node_id0 or "unknown", effect=None)
1431
+ rec.status = StepStatus.COMPLETED
1432
+ rec.result = {
1433
+ "completed": True,
1434
+ "via": "resume",
1435
+ "wait_reason": wait_reason,
1436
+ "wait_key": wait_key0,
1437
+ "output": _jsonable(run.output),
1438
+ }
1439
+ rec.ended_at = utc_now_iso()
1440
+ self._ledger_store.append(rec)
1441
+ except Exception:
1442
+ # Observability must never compromise durability/execution.
1443
+ pass
1444
+ self._append_terminal_status_event(run)
1445
+ return run
445
1446
 
446
1447
  self._apply_resume_payload(run, payload=payload, override_node=resume_to)
447
1448
  run.updated_at = utc_now_iso()
448
1449
  self._run_store.save(run)
449
1450
 
450
- return self.tick(workflow=workflow, run_id=run_id)
1451
+ if max_steps <= 0:
1452
+ return run
1453
+ return self.tick(workflow=workflow, run_id=run_id, max_steps=max_steps)
451
1454
 
452
1455
  # ---------------------------------------------------------------------
453
1456
  # Internals
@@ -457,8 +1460,140 @@ class Runtime:
457
1460
  self._handlers[EffectType.WAIT_EVENT] = self._handle_wait_event
458
1461
  self._handlers[EffectType.WAIT_UNTIL] = self._handle_wait_until
459
1462
  self._handlers[EffectType.ASK_USER] = self._handle_ask_user
1463
+ self._handlers[EffectType.ANSWER_USER] = self._handle_answer_user
1464
+ self._handlers[EffectType.EMIT_EVENT] = self._handle_emit_event
1465
+ self._handlers[EffectType.MEMORY_QUERY] = self._handle_memory_query
1466
+ self._handlers[EffectType.MEMORY_TAG] = self._handle_memory_tag
1467
+ self._handlers[EffectType.MEMORY_COMPACT] = self._handle_memory_compact
1468
+ self._handlers[EffectType.MEMORY_NOTE] = self._handle_memory_note
1469
+ self._handlers[EffectType.MEMORY_REHYDRATE] = self._handle_memory_rehydrate
1470
+ self._handlers[EffectType.VARS_QUERY] = self._handle_vars_query
460
1471
  self._handlers[EffectType.START_SUBWORKFLOW] = self._handle_start_subworkflow
461
1472
 
1473
+ # Built-in memory helpers ------------------------------------------------
1474
+
1475
+ def _global_memory_run_id(self) -> str:
1476
+ """Return the global memory run id (stable).
1477
+
1478
+ Hosts can override via `ABSTRACTRUNTIME_GLOBAL_MEMORY_RUN_ID`.
1479
+ """
1480
+ rid = os.environ.get("ABSTRACTRUNTIME_GLOBAL_MEMORY_RUN_ID")
1481
+ rid = str(rid or "").strip()
1482
+ if rid and _SAFE_RUN_ID_PATTERN.match(rid):
1483
+ return rid
1484
+ return _DEFAULT_GLOBAL_MEMORY_RUN_ID
1485
+
1486
+ def _ensure_global_memory_run(self) -> RunState:
1487
+ """Load or create the global memory run used as the owner for `scope="global"` spans."""
1488
+ rid = self._global_memory_run_id()
1489
+ existing = self._run_store.load(rid)
1490
+ if existing is not None:
1491
+ return existing
1492
+
1493
+ run = RunState(
1494
+ run_id=rid,
1495
+ workflow_id="__global_memory__",
1496
+ status=RunStatus.COMPLETED,
1497
+ current_node="done",
1498
+ vars={
1499
+ "context": {"task": "", "messages": []},
1500
+ "scratchpad": {},
1501
+ "_runtime": {"memory_spans": []},
1502
+ "_temp": {},
1503
+ "_limits": {},
1504
+ },
1505
+ waiting=None,
1506
+ output={"messages": []},
1507
+ error=None,
1508
+ created_at=utc_now_iso(),
1509
+ updated_at=utc_now_iso(),
1510
+ actor_id=None,
1511
+ session_id=None,
1512
+ parent_run_id=None,
1513
+ )
1514
+ self._run_store.save(run)
1515
+ return run
1516
+
1517
+ def _session_memory_run_id(self, session_id: str) -> str:
1518
+ """Return a stable session memory run id for a durable `session_id`.
1519
+
1520
+ This run is internal and is used only as the owner for `scope="session"` span indices.
1521
+ """
1522
+ sid = str(session_id or "").strip()
1523
+ if not sid:
1524
+ raise ValueError("session_id is required")
1525
+ if _SAFE_RUN_ID_PATTERN.match(sid):
1526
+ rid = f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}{sid}"
1527
+ if _SAFE_RUN_ID_PATTERN.match(rid):
1528
+ return rid
1529
+ digest = hashlib.sha256(sid.encode("utf-8")).hexdigest()[:32]
1530
+ return f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}sha_{digest}"
1531
+
1532
+ def _ensure_session_memory_run(self, session_id: str) -> RunState:
1533
+ """Load or create the session memory run used as the owner for `scope=\"session\"` spans."""
1534
+ rid = self._session_memory_run_id(session_id)
1535
+ existing = self._run_store.load(rid)
1536
+ if existing is not None:
1537
+ return existing
1538
+
1539
+ run = RunState(
1540
+ run_id=rid,
1541
+ workflow_id="__session_memory__",
1542
+ status=RunStatus.COMPLETED,
1543
+ current_node="done",
1544
+ vars={
1545
+ "context": {"task": "", "messages": []},
1546
+ "scratchpad": {},
1547
+ "_runtime": {"memory_spans": []},
1548
+ "_temp": {},
1549
+ "_limits": {},
1550
+ },
1551
+ waiting=None,
1552
+ output={"messages": []},
1553
+ error=None,
1554
+ created_at=utc_now_iso(),
1555
+ updated_at=utc_now_iso(),
1556
+ actor_id=None,
1557
+ session_id=str(session_id or "").strip() or None,
1558
+ parent_run_id=None,
1559
+ )
1560
+ self._run_store.save(run)
1561
+ return run
1562
+
1563
+ def _resolve_run_tree_root_run(self, run: RunState) -> RunState:
1564
+ """Resolve the root run of the current run-tree (walk `parent_run_id`).
1565
+
1566
+ This is used as a backward-compatible fallback for legacy runs without `session_id`.
1567
+ """
1568
+ cur = run
1569
+ seen: set[str] = set()
1570
+ while True:
1571
+ parent_id = getattr(cur, "parent_run_id", None)
1572
+ if not isinstance(parent_id, str) or not parent_id.strip():
1573
+ return cur
1574
+ pid = parent_id.strip()
1575
+ if pid in seen:
1576
+ # Defensive: break cycles.
1577
+ return cur
1578
+ seen.add(pid)
1579
+ parent = self._run_store.load(pid)
1580
+ if parent is None:
1581
+ return cur
1582
+ cur = parent
1583
+
1584
+ def _resolve_scope_owner_run(self, base_run: RunState, *, scope: str) -> RunState:
1585
+ s = str(scope or "").strip().lower() or "run"
1586
+ if s == "run":
1587
+ return base_run
1588
+ if s == "session":
1589
+ sid = getattr(base_run, "session_id", None)
1590
+ if isinstance(sid, str) and sid.strip():
1591
+ return self._ensure_session_memory_run(sid.strip())
1592
+ return self._resolve_run_tree_root_run(base_run)
1593
+ if s == "global":
1594
+ return self._ensure_global_memory_run()
1595
+ raise ValueError(f"Unknown memory scope: {scope}")
1596
+
462
1597
  def _find_prior_completed_result(
463
1598
  self, run_id: str, idempotency_key: str
464
1599
  ) -> Optional[Dict[str, Any]]:
@@ -579,16 +1714,250 @@ class Runtime:
579
1714
  def _handle_wait_event(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
580
1715
  wait_key = effect.payload.get("wait_key")
581
1716
  if not wait_key:
582
- return EffectOutcome.failed("wait_event requires payload.wait_key")
1717
+ # Allow structured payloads (scope/name) so hosts can compute stable keys.
1718
+ scope = effect.payload.get("scope", "session")
1719
+ name = effect.payload.get("name") or effect.payload.get("event_name")
1720
+ if not isinstance(name, str) or not name.strip():
1721
+ return EffectOutcome.failed("wait_event requires payload.wait_key or payload.name")
1722
+
1723
+ session_id = effect.payload.get("session_id") or run.session_id or run.run_id
1724
+ try:
1725
+ wait_key = build_event_wait_key(
1726
+ scope=str(scope or "session"),
1727
+ name=str(name),
1728
+ session_id=str(session_id) if session_id is not None else None,
1729
+ workflow_id=run.workflow_id,
1730
+ run_id=run.run_id,
1731
+ )
1732
+ except Exception as e:
1733
+ return EffectOutcome.failed(f"wait_event invalid payload: {e}")
583
1734
  resume_to = effect.payload.get("resume_to_node") or default_next_node
1735
+ # Optional UX metadata for hosts:
1736
+ # - "prompt"/"choices"/"allow_free_text" enable durable human-in-the-loop
1737
+ # waits using EVENT as the wakeup mechanism (useful for thin clients).
1738
+ prompt: Optional[str] = None
1739
+ try:
1740
+ p = effect.payload.get("prompt")
1741
+ if isinstance(p, str) and p.strip():
1742
+ prompt = p
1743
+ except Exception:
1744
+ prompt = None
1745
+
1746
+ choices: Optional[List[str]] = None
1747
+ try:
1748
+ raw_choices = effect.payload.get("choices")
1749
+ if isinstance(raw_choices, list):
1750
+ normalized: List[str] = []
1751
+ for c in raw_choices:
1752
+ if isinstance(c, str) and c.strip():
1753
+ normalized.append(c.strip())
1754
+ choices = normalized
1755
+ except Exception:
1756
+ choices = None
1757
+
1758
+ allow_free_text = True
1759
+ try:
1760
+ aft = effect.payload.get("allow_free_text")
1761
+ if aft is None:
1762
+ aft = effect.payload.get("allowFreeText")
1763
+ if aft is not None:
1764
+ allow_free_text = bool(aft)
1765
+ except Exception:
1766
+ allow_free_text = True
1767
+
1768
+ details = None
1769
+ try:
1770
+ d = effect.payload.get("details")
1771
+ if isinstance(d, dict):
1772
+ details = dict(d)
1773
+ except Exception:
1774
+ details = None
584
1775
  wait = WaitState(
585
1776
  reason=WaitReason.EVENT,
586
1777
  wait_key=str(wait_key),
587
1778
  resume_to_node=resume_to,
588
1779
  result_key=effect.result_key,
1780
+ prompt=prompt,
1781
+ choices=choices,
1782
+ allow_free_text=allow_free_text,
1783
+ details=details,
589
1784
  )
590
1785
  return EffectOutcome.waiting(wait)
591
1786
 
1787
+ def _handle_emit_event(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
1788
+ """Emit a durable event and resume matching WAIT_EVENT runs.
1789
+
1790
+ Payload:
1791
+ - name: str (required) event name
1792
+ - scope: str (optional, default "session") "session" | "workflow" | "run" | "global"
1793
+ - session_id: str (optional) target session id (for cross-workflow targeted delivery)
1794
+ - payload: dict (optional) event payload delivered to listeners
1795
+ - max_steps: int (optional, default 100) tick budget per resumed run
1796
+
1797
+ Notes:
1798
+ - This is durable because it resumes WAIT_EVENT runs via Runtime.resume(), which
1799
+ checkpoints run state and appends ledger records for subsequent steps.
1800
+ - Delivery is best-effort and at-least-once; listeners should be idempotent if needed.
1801
+ """
1802
+ name = effect.payload.get("name") or effect.payload.get("event_name")
1803
+ if not isinstance(name, str) or not name.strip():
1804
+ return EffectOutcome.failed("emit_event requires payload.name")
1805
+
1806
+ scope = effect.payload.get("scope", "session")
1807
+ target_session_id = effect.payload.get("session_id")
1808
+ payload = effect.payload.get("payload") or {}
1809
+ if not isinstance(payload, dict):
1810
+ payload = {"value": payload}
1811
+
1812
+ # NOTE: we intentionally resume listeners with max_steps=0 (no execution).
1813
+ # Hosts (web backend, workers, schedulers) should drive RUNNING runs and
1814
+ # stream their StepRecords deterministically (better observability and UX).
1815
+ try:
1816
+ max_steps = int(effect.payload.get("max_steps", 0) or 0)
1817
+ except Exception:
1818
+ max_steps = 0
1819
+ if max_steps < 0:
1820
+ max_steps = 0
1821
+
1822
+ # Determine target scope id (default: current session/run).
1823
+ session_id = target_session_id
1824
+ if session_id is None and str(scope or "session").strip().lower() == "session":
1825
+ session_id = run.session_id or run.run_id
1826
+
1827
+ try:
1828
+ wait_key = build_event_wait_key(
1829
+ scope=str(scope or "session"),
1830
+ name=str(name),
1831
+ session_id=str(session_id) if session_id is not None else None,
1832
+ workflow_id=run.workflow_id,
1833
+ run_id=run.run_id,
1834
+ )
1835
+ except Exception as e:
1836
+ return EffectOutcome.failed(f"emit_event invalid payload: {e}")
1837
+
1838
+ # Wildcard listeners ("*") receive all events within the same scope_id.
1839
+ wildcard_wait_key: Optional[str] = None
1840
+ try:
1841
+ wildcard_wait_key = build_event_wait_key(
1842
+ scope=str(scope or "session"),
1843
+ name="*",
1844
+ session_id=str(session_id) if session_id is not None else None,
1845
+ workflow_id=run.workflow_id,
1846
+ run_id=run.run_id,
1847
+ )
1848
+ except Exception:
1849
+ wildcard_wait_key = None
1850
+
1851
+ if not isinstance(self._run_store, QueryableRunStore):
1852
+ return EffectOutcome.failed(
1853
+ "emit_event requires a QueryableRunStore to find waiting runs. "
1854
+ "Use InMemoryRunStore/JsonFileRunStore or provide a queryable store."
1855
+ )
1856
+
1857
+ # Find all runs waiting for this event key.
1858
+ candidates = self._run_store.list_runs(
1859
+ status=RunStatus.WAITING,
1860
+ wait_reason=WaitReason.EVENT,
1861
+ limit=10_000,
1862
+ )
1863
+
1864
+ delivered_to: list[str] = []
1865
+ resumed: list[Dict[str, Any]] = []
1866
+ envelope = {
1867
+ "event_id": effect.payload.get("event_id") or None,
1868
+ "name": str(name),
1869
+ "scope": str(scope or "session"),
1870
+ "session_id": str(session_id) if session_id is not None else None,
1871
+ "payload": dict(payload),
1872
+ "emitted_at": utc_now_iso(),
1873
+ "emitter": {
1874
+ "run_id": run.run_id,
1875
+ "workflow_id": run.workflow_id,
1876
+ "node_id": run.current_node,
1877
+ },
1878
+ }
1879
+
1880
+ available_in_session: list[str] = []
1881
+ prefix = f"evt:session:{session_id}:"
1882
+
1883
+ # First pass: find matching runs and compute best-effort diagnostics without
1884
+ # requiring a workflow_registry. This allows UI-only EMIT_EVENT usage
1885
+ # (e.g. AbstractCode notifications) in deployments that do not use
1886
+ # WAIT_EVENT listeners.
1887
+ matched: list[tuple[RunState, Optional[str]]] = []
1888
+ for r in candidates:
1889
+ if _is_paused_run_vars(getattr(r, "vars", None)):
1890
+ continue
1891
+ w = getattr(r, "waiting", None)
1892
+ if w is None:
1893
+ continue
1894
+ wk = getattr(w, "wait_key", None)
1895
+ if isinstance(wk, str) and wk.startswith(prefix):
1896
+ # Help users debug name mismatches (best-effort).
1897
+ suffix = wk[len(prefix) :]
1898
+ if suffix and suffix not in available_in_session and len(available_in_session) < 15:
1899
+ available_in_session.append(suffix)
1900
+ if wk != wait_key and (wildcard_wait_key is None or wk != wildcard_wait_key):
1901
+ continue
1902
+
1903
+ matched.append((r, wk if isinstance(wk, str) else None))
1904
+
1905
+ # If there are no matching listeners, emitting is still a useful side effect
1906
+ # for hosts (ledger observability, UI events). In that case, do not require
1907
+ # a workflow_registry.
1908
+ if not matched:
1909
+ out0: Dict[str, Any] = {
1910
+ "wait_key": wait_key,
1911
+ "name": str(name),
1912
+ "scope": str(scope or "session"),
1913
+ "delivered": 0,
1914
+ "delivered_to": [],
1915
+ "resumed": [],
1916
+ }
1917
+ if available_in_session:
1918
+ out0["available_listeners_in_session"] = available_in_session
1919
+ return EffectOutcome.completed(out0)
1920
+
1921
+ if self._workflow_registry is None:
1922
+ return EffectOutcome.failed(
1923
+ "emit_event requires a workflow_registry to resume target runs. "
1924
+ "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)."
1925
+ )
1926
+
1927
+ for r, wk in matched:
1928
+ wf = self._workflow_registry.get(r.workflow_id)
1929
+ if wf is None:
1930
+ # Can't resume without the spec; skip but include diagnostic in result.
1931
+ resumed.append({"run_id": r.run_id, "status": "skipped", "error": "workflow_not_registered"})
1932
+ continue
1933
+
1934
+ try:
1935
+ # Resume using the run's own wait_key (supports wildcard listeners).
1936
+ resume_key = wk if isinstance(wk, str) and wk else None
1937
+ new_state = self.resume(
1938
+ workflow=wf,
1939
+ run_id=r.run_id,
1940
+ wait_key=resume_key,
1941
+ payload=envelope,
1942
+ max_steps=max_steps,
1943
+ )
1944
+ delivered_to.append(r.run_id)
1945
+ resumed.append({"run_id": r.run_id, "status": new_state.status.value})
1946
+ except Exception as e:
1947
+ resumed.append({"run_id": r.run_id, "status": "error", "error": str(e)})
1948
+
1949
+ out: Dict[str, Any] = {
1950
+ "wait_key": wait_key,
1951
+ "name": str(name),
1952
+ "scope": str(scope or "session"),
1953
+ "delivered": len(delivered_to),
1954
+ "delivered_to": delivered_to,
1955
+ "resumed": resumed,
1956
+ }
1957
+ if not delivered_to and available_in_session:
1958
+ out["available_listeners_in_session"] = available_in_session
1959
+ return EffectOutcome.completed(out)
1960
+
592
1961
  def _handle_wait_until(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
593
1962
  until = effect.payload.get("until")
594
1963
  if not until:
@@ -628,6 +1997,30 @@ class Runtime:
628
1997
  )
629
1998
  return EffectOutcome.waiting(wait)
630
1999
 
2000
+ def _handle_answer_user(
2001
+ self, run: RunState, effect: Effect, default_next_node: Optional[str]
2002
+ ) -> EffectOutcome:
2003
+ """Handle ANSWER_USER effect.
2004
+
2005
+ This effect is intentionally non-blocking: it completes immediately and
2006
+ returns the message payload so the host UI can render it.
2007
+ """
2008
+ message = effect.payload.get("message")
2009
+ if message is None:
2010
+ # Backward/compat convenience aliases.
2011
+ message = effect.payload.get("text") or effect.payload.get("content")
2012
+ if message is None:
2013
+ return EffectOutcome.failed("answer_user requires payload.message")
2014
+ level_raw = effect.payload.get("level")
2015
+ level = str(level_raw).strip().lower() if isinstance(level_raw, str) else ""
2016
+ if level in {"warn"}:
2017
+ level = "warning"
2018
+ if level not in {"message", "warning", "error", "info"}:
2019
+ level = "message"
2020
+ if level == "info":
2021
+ level = "message"
2022
+ return EffectOutcome.completed({"message": str(message), "level": level})
2023
+
631
2024
  def _handle_start_subworkflow(
632
2025
  self, run: RunState, effect: Effect, default_next_node: Optional[str]
633
2026
  ) -> EffectOutcome:
@@ -647,11 +2040,77 @@ class Runtime:
647
2040
  - Starts the subworkflow and returns immediately
648
2041
  - Returns {"sub_run_id": "..."} so parent can track it
649
2042
  """
2043
+ payload0 = effect.payload if isinstance(effect.payload, dict) else {}
2044
+ wrap_as_tool_result = bool(payload0.get("wrap_as_tool_result", False))
2045
+ tool_name_raw = payload0.get("tool_name")
2046
+ if tool_name_raw is None:
2047
+ tool_name_raw = payload0.get("toolName")
2048
+ tool_name = str(tool_name_raw or "").strip()
2049
+ call_id_raw = payload0.get("call_id")
2050
+ if call_id_raw is None:
2051
+ call_id_raw = payload0.get("callId")
2052
+ call_id = str(call_id_raw or "").strip()
2053
+
2054
+ def _tool_result(*, success: bool, output: Any, error: Optional[str]) -> Dict[str, Any]:
2055
+ name = tool_name or "start_subworkflow"
2056
+ cid = call_id or "subworkflow"
2057
+ return {
2058
+ "mode": "executed",
2059
+ "results": [
2060
+ {
2061
+ "call_id": cid,
2062
+ "name": name,
2063
+ "success": bool(success),
2064
+ "output": output if success else None,
2065
+ "error": None if success else str(error or "Subworkflow failed"),
2066
+ }
2067
+ ],
2068
+ }
2069
+
2070
+ def _tool_output_for_subworkflow(*, sub_run_id: str, output: Any) -> Dict[str, Any]:
2071
+ rendered = ""
2072
+ answer = ""
2073
+ report = ""
2074
+ if isinstance(output, dict):
2075
+ a = output.get("answer")
2076
+ if isinstance(a, str) and a.strip():
2077
+ answer = a.strip()
2078
+ r = output.get("report")
2079
+ if isinstance(r, str) and r.strip():
2080
+ report = r.strip()
2081
+ if not answer:
2082
+ if isinstance(output, str) and output.strip():
2083
+ answer = output.strip()
2084
+ else:
2085
+ try:
2086
+ answer = json.dumps(output, ensure_ascii=False)
2087
+ except Exception:
2088
+ answer = str(output)
2089
+ rendered = answer
2090
+ out = {"rendered": rendered, "answer": answer, "sub_run_id": str(sub_run_id)}
2091
+ # Keep the tool observation bounded; the full child run can be inspected via run id if needed.
2092
+ if report and len(report) <= 4000:
2093
+ out["report"] = report
2094
+ return out
2095
+
650
2096
  workflow_id = effect.payload.get("workflow_id")
651
2097
  if not workflow_id:
2098
+ if wrap_as_tool_result:
2099
+ return EffectOutcome.completed(_tool_result(success=False, output=None, error="start_subworkflow requires payload.workflow_id"))
652
2100
  return EffectOutcome.failed("start_subworkflow requires payload.workflow_id")
653
2101
 
654
2102
  if self._workflow_registry is None:
2103
+ if wrap_as_tool_result:
2104
+ return EffectOutcome.completed(
2105
+ _tool_result(
2106
+ success=False,
2107
+ output=None,
2108
+ error=(
2109
+ "start_subworkflow requires a workflow_registry. "
2110
+ "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)"
2111
+ ),
2112
+ )
2113
+ )
655
2114
  return EffectOutcome.failed(
656
2115
  "start_subworkflow requires a workflow_registry. "
657
2116
  "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)"
@@ -660,24 +2119,100 @@ class Runtime:
660
2119
  # Look up the subworkflow
661
2120
  sub_workflow = self._workflow_registry.get(workflow_id)
662
2121
  if sub_workflow is None:
2122
+ if wrap_as_tool_result:
2123
+ return EffectOutcome.completed(
2124
+ _tool_result(success=False, output=None, error=f"Workflow '{workflow_id}' not found in registry")
2125
+ )
663
2126
  return EffectOutcome.failed(f"Workflow '{workflow_id}' not found in registry")
664
2127
 
665
- sub_vars = effect.payload.get("vars") or {}
2128
+ sub_vars_raw = effect.payload.get("vars")
2129
+ sub_vars: Dict[str, Any] = dict(sub_vars_raw) if isinstance(sub_vars_raw, dict) else {}
2130
+
2131
+ # Inherit workspace policy into child runs by default.
2132
+ #
2133
+ # Why: in VisualFlow, agents/subflows run as START_SUBWORKFLOW runs. Tool execution inside the
2134
+ # child must respect the same workspace scope the user configured for the parent run.
2135
+ #
2136
+ # Policy: only inherit when the child did not explicitly override the keys.
2137
+ try:
2138
+ parent_vars = run.vars if isinstance(getattr(run, "vars", None), dict) else {}
2139
+ for k in ("workspace_root", "workspace_access_mode", "workspace_allowed_paths", "workspace_ignored_paths"):
2140
+ if k in sub_vars:
2141
+ continue
2142
+ v = parent_vars.get(k)
2143
+ if v is None:
2144
+ continue
2145
+ if isinstance(v, str):
2146
+ if not v.strip():
2147
+ continue
2148
+ sub_vars[k] = v
2149
+ continue
2150
+ sub_vars[k] = v
2151
+ except Exception:
2152
+ pass
666
2153
  is_async = bool(effect.payload.get("async", False))
2154
+ wait_for_completion = bool(effect.payload.get("wait", False))
2155
+ include_traces = bool(effect.payload.get("include_traces", False))
667
2156
  resume_to = effect.payload.get("resume_to_node") or default_next_node
668
2157
 
2158
+ # Optional override: allow the caller (e.g. VisualFlow compiler) to pass an explicit
2159
+ # session_id for the child run. When omitted, children inherit the parent's session.
2160
+ session_override = effect.payload.get("session_id")
2161
+ if session_override is None:
2162
+ session_override = effect.payload.get("sessionId")
2163
+ session_id: Optional[str]
2164
+ if isinstance(session_override, str) and session_override.strip():
2165
+ session_id = session_override.strip()
2166
+ else:
2167
+ session_id = getattr(run, "session_id", None)
2168
+
669
2169
  # Start the subworkflow with parent tracking
670
2170
  sub_run_id = self.start(
671
2171
  workflow=sub_workflow,
672
2172
  vars=sub_vars,
673
2173
  actor_id=run.actor_id, # Inherit actor from parent
674
- session_id=getattr(run, "session_id", None), # Inherit session from parent
2174
+ session_id=session_id,
675
2175
  parent_run_id=run.run_id, # Track parent for hierarchy
676
2176
  )
677
2177
 
678
2178
  if is_async:
679
- # Async mode: return immediately with sub_run_id
680
- # The child is started but not ticked - caller is responsible for driving it
2179
+ # Async mode: start the child and return immediately.
2180
+ #
2181
+ # If `wait=True`, we *also* transition the parent into a durable WAITING state
2182
+ # so a host (e.g. AbstractFlow WebSocket runner loop) can:
2183
+ # - tick the child run incrementally (and stream node traces in real time)
2184
+ # - resume the parent once the child completes (by calling runtime.resume(...))
2185
+ #
2186
+ # Without `wait=True`, this remains fire-and-forget.
2187
+ if wait_for_completion:
2188
+ wait = WaitState(
2189
+ reason=WaitReason.SUBWORKFLOW,
2190
+ wait_key=f"subworkflow:{sub_run_id}",
2191
+ resume_to_node=resume_to,
2192
+ result_key=effect.result_key,
2193
+ details={
2194
+ "sub_run_id": sub_run_id,
2195
+ "sub_workflow_id": workflow_id,
2196
+ "async": True,
2197
+ "include_traces": include_traces,
2198
+ },
2199
+ )
2200
+ if wrap_as_tool_result:
2201
+ if isinstance(wait.details, dict):
2202
+ wait.details["wrap_as_tool_result"] = True
2203
+ wait.details["tool_name"] = tool_name or "start_subworkflow"
2204
+ wait.details["call_id"] = call_id or "subworkflow"
2205
+ return EffectOutcome.waiting(wait)
2206
+
2207
+ # Fire-and-forget: caller is responsible for driving/observing the child.
2208
+ if wrap_as_tool_result:
2209
+ return EffectOutcome.completed(
2210
+ _tool_result(
2211
+ success=True,
2212
+ output={"rendered": f"Started subworkflow {sub_run_id}", "sub_run_id": sub_run_id, "async": True},
2213
+ error=None,
2214
+ )
2215
+ )
681
2216
  return EffectOutcome.completed({"sub_run_id": sub_run_id, "async": True})
682
2217
 
683
2218
  # Sync mode: run the subworkflow until completion or waiting
@@ -689,16 +2224,30 @@ class Runtime:
689
2224
 
690
2225
  if sub_state.status == RunStatus.COMPLETED:
691
2226
  # Subworkflow completed - return its output
692
- return EffectOutcome.completed({
693
- "sub_run_id": sub_run_id,
694
- "output": sub_state.output,
695
- })
2227
+ if wrap_as_tool_result:
2228
+ return EffectOutcome.completed(
2229
+ _tool_result(
2230
+ success=True,
2231
+ output=_tool_output_for_subworkflow(sub_run_id=sub_run_id, output=sub_state.output),
2232
+ error=None,
2233
+ )
2234
+ )
2235
+ result: Dict[str, Any] = {"sub_run_id": sub_run_id, "output": sub_state.output}
2236
+ if include_traces:
2237
+ result["node_traces"] = self.get_node_traces(sub_run_id)
2238
+ return EffectOutcome.completed(result)
696
2239
 
697
2240
  if sub_state.status == RunStatus.FAILED:
698
2241
  # Subworkflow failed - propagate error
699
- return EffectOutcome.failed(
700
- f"Subworkflow '{workflow_id}' failed: {sub_state.error}"
701
- )
2242
+ if wrap_as_tool_result:
2243
+ return EffectOutcome.completed(
2244
+ _tool_result(
2245
+ success=False,
2246
+ output=None,
2247
+ error=f"Subworkflow '{workflow_id}' failed: {sub_state.error}",
2248
+ )
2249
+ )
2250
+ return EffectOutcome.failed(f"Subworkflow '{workflow_id}' failed: {sub_state.error}")
702
2251
 
703
2252
  if sub_state.status == RunStatus.WAITING:
704
2253
  # Subworkflow is waiting - parent must also wait
@@ -710,17 +2259,1774 @@ class Runtime:
710
2259
  details={
711
2260
  "sub_run_id": sub_run_id,
712
2261
  "sub_workflow_id": workflow_id,
2262
+ "include_traces": include_traces,
713
2263
  "sub_waiting": {
714
2264
  "reason": sub_state.waiting.reason.value if sub_state.waiting else None,
715
2265
  "wait_key": sub_state.waiting.wait_key if sub_state.waiting else None,
716
2266
  },
717
2267
  },
718
2268
  )
2269
+ if wrap_as_tool_result:
2270
+ if isinstance(wait.details, dict):
2271
+ wait.details["wrap_as_tool_result"] = True
2272
+ wait.details["tool_name"] = tool_name or "start_subworkflow"
2273
+ wait.details["call_id"] = call_id or "subworkflow"
719
2274
  return EffectOutcome.waiting(wait)
720
2275
 
721
2276
  # Unexpected status
722
2277
  return EffectOutcome.failed(f"Unexpected subworkflow status: {sub_state.status.value}")
723
2278
 
2279
+ # Built-in memory handlers ---------------------------------------------
2280
+
2281
+ def _handle_memory_query(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2282
+ """Handle MEMORY_QUERY.
2283
+
2284
+ This effect supports provenance-first recall over archived memory spans stored in ArtifactStore.
2285
+ It is intentionally metadata-first and embedding-free (semantic retrieval belongs in AbstractMemory).
2286
+
2287
+ Payload (all optional unless otherwise stated):
2288
+ - span_id: str | int | list[str|int] (artifact_id or 1-based index into _runtime.memory_spans)
2289
+ - query: str (keyword substring match)
2290
+ - since: str (ISO8601, span intersection filter)
2291
+ - until: str (ISO8601, span intersection filter)
2292
+ - tags: dict[str, str|list[str]] (span tag filter; values can be multi-valued)
2293
+ - tags_mode: "all"|"any" (default "all"; AND/OR across tag keys)
2294
+ - authors: list[str] (alias: usernames; matches span.created_by case-insensitively)
2295
+ - locations: list[str] (matches span.location case-insensitively)
2296
+ - limit_spans: int (default 5)
2297
+ - deep: bool (default True when query is set; scans archived messages)
2298
+ - deep_limit_spans: int (default 50)
2299
+ - deep_limit_messages_per_span: int (default 400)
2300
+ - connected: bool (include connected spans via time adjacency + shared tags)
2301
+ - neighbor_hops: int (default 1 when connected=True)
2302
+ - connect_by: list[str] (default ["topic","person"])
2303
+ - max_messages: int (default 80; total messages rendered across all spans)
2304
+ - tool_name: str (default "recall_memory"; for formatting)
2305
+ - call_id: str (tool-call id passthrough)
2306
+ """
2307
+ from .vars import ensure_namespaces, parse_vars_path, resolve_vars_path
2308
+
2309
+ ensure_namespaces(run.vars)
2310
+ runtime_ns = run.vars.get("_runtime")
2311
+ if not isinstance(runtime_ns, dict):
2312
+ runtime_ns = {}
2313
+ run.vars["_runtime"] = runtime_ns
2314
+
2315
+ artifact_store = self._artifact_store
2316
+ if artifact_store is None:
2317
+ return EffectOutcome.failed(
2318
+ "MEMORY_QUERY requires an ArtifactStore; configure runtime.set_artifact_store(...)"
2319
+ )
2320
+
2321
+ payload = dict(effect.payload or {})
2322
+ tool_name = str(payload.get("tool_name") or "recall_memory")
2323
+ call_id = str(payload.get("call_id") or "memory")
2324
+
2325
+ # Recall effort policy (optional; no silent fallback).
2326
+ recall_level_raw = payload.get("recall_level")
2327
+ if recall_level_raw is None:
2328
+ recall_level_raw = payload.get("recallLevel")
2329
+ try:
2330
+ from ..memory.recall_levels import parse_recall_level, policy_for
2331
+
2332
+ recall_level = parse_recall_level(recall_level_raw)
2333
+ except Exception as e:
2334
+ return EffectOutcome.failed(str(e))
2335
+
2336
+ recall_warnings: list[str] = []
2337
+ recall_effort: dict[str, Any] = {}
2338
+
2339
+ # Scope routing (run/session/global). Scope affects which run owns the span index queried.
2340
+ scope = str(payload.get("scope") or "run").strip().lower() or "run"
2341
+ if scope not in {"run", "session", "global", "all"}:
2342
+ return EffectOutcome.failed(f"Unknown memory_query scope: {scope}")
2343
+
2344
+ # Return mode controls whether we include structured meta in the tool result.
2345
+ return_mode = str(payload.get("return") or payload.get("return_mode") or "rendered").strip().lower() or "rendered"
2346
+ if return_mode not in {"rendered", "meta", "both"}:
2347
+ return EffectOutcome.failed(f"Unknown memory_query return mode: {return_mode}")
2348
+
2349
+ query = payload.get("query")
2350
+ query_text = str(query or "").strip()
2351
+ since = payload.get("since")
2352
+ until = payload.get("until")
2353
+ tags = payload.get("tags")
2354
+ tags_dict: Optional[Dict[str, Any]] = None
2355
+ if isinstance(tags, dict):
2356
+ # Accept str or list[str] values. Ignore reserved key "kind".
2357
+ out_tags: Dict[str, Any] = {}
2358
+ for k, v in tags.items():
2359
+ if not isinstance(k, str) or not k.strip():
2360
+ continue
2361
+ if k == "kind":
2362
+ continue
2363
+ if isinstance(v, str) and v.strip():
2364
+ out_tags[k.strip()] = v.strip()
2365
+ elif isinstance(v, (list, tuple)):
2366
+ vals = [str(x).strip() for x in v if isinstance(x, str) and str(x).strip()]
2367
+ if vals:
2368
+ out_tags[k.strip()] = vals
2369
+ tags_dict = out_tags or None
2370
+
2371
+ tags_mode_raw = payload.get("tags_mode")
2372
+ if tags_mode_raw is None:
2373
+ tags_mode_raw = payload.get("tagsMode")
2374
+ if tags_mode_raw is None:
2375
+ tags_mode_raw = payload.get("tag_mode")
2376
+ tags_mode = str(tags_mode_raw or "all").strip().lower() or "all"
2377
+ if tags_mode in {"and"}:
2378
+ tags_mode = "all"
2379
+ if tags_mode in {"or"}:
2380
+ tags_mode = "any"
2381
+ if tags_mode not in {"all", "any"}:
2382
+ tags_mode = "all"
2383
+
2384
+ def _norm_str_list(value: Any) -> list[str]:
2385
+ if value is None:
2386
+ return []
2387
+ if isinstance(value, str):
2388
+ v = value.strip()
2389
+ return [v] if v else []
2390
+ if not isinstance(value, list):
2391
+ return []
2392
+ out: list[str] = []
2393
+ for x in value:
2394
+ if isinstance(x, str) and x.strip():
2395
+ out.append(x.strip())
2396
+ # preserve order but dedup (case-insensitive)
2397
+ seen: set[str] = set()
2398
+ deduped: list[str] = []
2399
+ for s in out:
2400
+ key = s.lower()
2401
+ if key in seen:
2402
+ continue
2403
+ seen.add(key)
2404
+ deduped.append(s)
2405
+ return deduped
2406
+
2407
+ authors = _norm_str_list(payload.get("authors") if "authors" in payload else payload.get("usernames"))
2408
+ if not authors:
2409
+ authors = _norm_str_list(payload.get("users"))
2410
+ locations = _norm_str_list(payload.get("locations") if "locations" in payload else payload.get("location"))
2411
+
2412
+ limit_spans_provided = "limit_spans" in payload
2413
+ try:
2414
+ limit_spans = int(payload.get("limit_spans", 5) or 5)
2415
+ except Exception:
2416
+ limit_spans = 5
2417
+ if limit_spans < 1:
2418
+ limit_spans = 1
2419
+
2420
+ deep_provided = "deep" in payload
2421
+ deep = payload.get("deep")
2422
+ if deep is None:
2423
+ deep_enabled = bool(query_text)
2424
+ else:
2425
+ deep_enabled = bool(deep)
2426
+
2427
+ deep_limit_spans_provided = "deep_limit_spans" in payload
2428
+ try:
2429
+ deep_limit_spans = int(payload.get("deep_limit_spans", 50) or 50)
2430
+ except Exception:
2431
+ deep_limit_spans = 50
2432
+ if deep_limit_spans < 1:
2433
+ deep_limit_spans = 1
2434
+
2435
+ deep_limit_messages_provided = "deep_limit_messages_per_span" in payload
2436
+ try:
2437
+ deep_limit_messages_per_span = int(payload.get("deep_limit_messages_per_span", 400) or 400)
2438
+ except Exception:
2439
+ deep_limit_messages_per_span = 400
2440
+ if deep_limit_messages_per_span < 1:
2441
+ deep_limit_messages_per_span = 1
2442
+
2443
+ connected_provided = "connected" in payload
2444
+ connected = bool(payload.get("connected", False))
2445
+ try:
2446
+ neighbor_hops = int(payload.get("neighbor_hops", 1) or 1)
2447
+ except Exception:
2448
+ neighbor_hops = 1
2449
+ if neighbor_hops < 0:
2450
+ neighbor_hops = 0
2451
+
2452
+ connect_by = payload.get("connect_by")
2453
+ if isinstance(connect_by, list):
2454
+ connect_keys = [str(x) for x in connect_by if isinstance(x, (str, int, float)) and str(x).strip()]
2455
+ else:
2456
+ connect_keys = ["topic", "person"]
2457
+
2458
+ max_messages_provided = "max_messages" in payload
2459
+ try:
2460
+ max_messages = int(payload.get("max_messages", -1) or -1)
2461
+ except Exception:
2462
+ max_messages = -1
2463
+ # `-1` means "no truncation" for rendered messages.
2464
+ if max_messages < -1:
2465
+ max_messages = -1
2466
+ if max_messages != -1 and max_messages < 1:
2467
+ max_messages = 1
2468
+
2469
+ # Apply recall_level budgets when explicitly provided (no silent downgrade).
2470
+ if recall_level is not None:
2471
+ pol = policy_for(recall_level)
2472
+
2473
+ if not limit_spans_provided:
2474
+ limit_spans = pol.span.limit_spans_default
2475
+ if limit_spans > pol.span.limit_spans_max:
2476
+ recall_warnings.append(
2477
+ f"recall_level={recall_level.value}: clamped limit_spans from {limit_spans} to {pol.span.limit_spans_max}"
2478
+ )
2479
+ limit_spans = pol.span.limit_spans_max
2480
+
2481
+ if deep_enabled and not pol.span.deep_allowed:
2482
+ recall_warnings.append(
2483
+ f"recall_level={recall_level.value}: deep scan disabled (not allowed at this level)"
2484
+ )
2485
+ deep_enabled = False
2486
+
2487
+ if deep_enabled and not deep_limit_spans_provided:
2488
+ deep_limit_spans = min(deep_limit_spans, pol.span.deep_limit_spans_max)
2489
+ if deep_limit_spans > pol.span.deep_limit_spans_max:
2490
+ recall_warnings.append(
2491
+ f"recall_level={recall_level.value}: clamped deep_limit_spans from {deep_limit_spans} to {pol.span.deep_limit_spans_max}"
2492
+ )
2493
+ deep_limit_spans = pol.span.deep_limit_spans_max
2494
+
2495
+ if deep_enabled and not deep_limit_messages_provided:
2496
+ deep_limit_messages_per_span = min(deep_limit_messages_per_span, pol.span.deep_limit_messages_per_span_max)
2497
+ if deep_limit_messages_per_span > pol.span.deep_limit_messages_per_span_max:
2498
+ recall_warnings.append(
2499
+ f"recall_level={recall_level.value}: clamped deep_limit_messages_per_span from {deep_limit_messages_per_span} to {pol.span.deep_limit_messages_per_span_max}"
2500
+ )
2501
+ deep_limit_messages_per_span = pol.span.deep_limit_messages_per_span_max
2502
+
2503
+ if connected and not pol.span.connected_allowed:
2504
+ recall_warnings.append(
2505
+ f"recall_level={recall_level.value}: connected expansion disabled (not allowed at this level)"
2506
+ )
2507
+ connected = False
2508
+
2509
+ if neighbor_hops > pol.span.neighbor_hops_max:
2510
+ recall_warnings.append(
2511
+ f"recall_level={recall_level.value}: clamped neighbor_hops from {neighbor_hops} to {pol.span.neighbor_hops_max}"
2512
+ )
2513
+ neighbor_hops = pol.span.neighbor_hops_max
2514
+
2515
+ # Enforce bounded rendering budget (max_messages). -1 means "unbounded" and is not allowed when policy is active.
2516
+ if not max_messages_provided:
2517
+ max_messages = pol.span.max_messages_default
2518
+ elif max_messages == -1:
2519
+ recall_warnings.append(
2520
+ f"recall_level={recall_level.value}: max_messages=-1 (unbounded) is not allowed; clamped to {pol.span.max_messages_max}"
2521
+ )
2522
+ max_messages = pol.span.max_messages_max
2523
+ elif max_messages > pol.span.max_messages_max:
2524
+ recall_warnings.append(
2525
+ f"recall_level={recall_level.value}: clamped max_messages from {max_messages} to {pol.span.max_messages_max}"
2526
+ )
2527
+ max_messages = pol.span.max_messages_max
2528
+
2529
+ recall_effort = {
2530
+ "recall_level": recall_level.value,
2531
+ "applied": {
2532
+ "limit_spans": limit_spans,
2533
+ "deep": bool(deep_enabled),
2534
+ "deep_limit_spans": deep_limit_spans,
2535
+ "deep_limit_messages_per_span": deep_limit_messages_per_span,
2536
+ "connected": bool(connected),
2537
+ "neighbor_hops": neighbor_hops,
2538
+ "max_messages": max_messages,
2539
+ },
2540
+ }
2541
+
2542
+ from ..memory.active_context import ActiveContextPolicy, TimeRange
2543
+
2544
+ # Select run(s) to query.
2545
+ runs_to_query: list[RunState] = []
2546
+ if scope == "run":
2547
+ runs_to_query = [run]
2548
+ elif scope == "session":
2549
+ runs_to_query = [self._resolve_scope_owner_run(run, scope="session")]
2550
+ elif scope == "global":
2551
+ runs_to_query = [self._resolve_scope_owner_run(run, scope="global")]
2552
+ else: # all
2553
+ # Deterministic order; dedup by run_id.
2554
+ root = self._resolve_scope_owner_run(run, scope="session")
2555
+ global_run = self._resolve_scope_owner_run(run, scope="global")
2556
+ seen_ids: set[str] = set()
2557
+ for r in (run, root, global_run):
2558
+ if r.run_id in seen_ids:
2559
+ continue
2560
+ seen_ids.add(r.run_id)
2561
+ runs_to_query.append(r)
2562
+
2563
+ # Collect per-run span indexes (metadata) and summary maps for rendering.
2564
+ spans_by_run_id: dict[str, list[dict[str, Any]]] = {}
2565
+ all_spans: list[dict[str, Any]] = []
2566
+ all_summary_by_artifact: dict[str, str] = {}
2567
+ for target in runs_to_query:
2568
+ spans = ActiveContextPolicy.list_memory_spans_from_run(target)
2569
+ # `memory_spans` is a general span-like index (conversation spans, notes, evidence, etc).
2570
+ # MEMORY_QUERY is specifically for provenance-first *memory recall*, not evidence retrieval.
2571
+ spans = [s for s in spans if not (isinstance(s, dict) and str(s.get("kind") or "") == "evidence")]
2572
+ spans_by_run_id[target.run_id] = spans
2573
+ all_spans.extend([dict(s) for s in spans if isinstance(s, dict)])
2574
+ all_summary_by_artifact.update(ActiveContextPolicy.summary_text_by_artifact_id_from_run(target))
2575
+
2576
+ # Resolve explicit span ids if provided.
2577
+ span_id_payload = payload.get("span_id")
2578
+ span_ids_payload = payload.get("span_ids")
2579
+ explicit_ids = span_ids_payload if isinstance(span_ids_payload, list) else span_id_payload
2580
+
2581
+ all_selected: list[str] = []
2582
+
2583
+ if explicit_ids is not None:
2584
+ explicit_list = list(explicit_ids) if isinstance(explicit_ids, list) else [explicit_ids]
2585
+
2586
+ # Indices are inherently scoped to a single run's span list; for `scope="all"`,
2587
+ # require stable artifact ids to avoid ambiguity.
2588
+ if scope == "all":
2589
+ for x in explicit_list:
2590
+ if isinstance(x, int):
2591
+ return EffectOutcome.failed("memory_query scope='all' requires explicit span_ids as artifact ids (no indices)")
2592
+ if isinstance(x, str) and x.strip().isdigit():
2593
+ return EffectOutcome.failed("memory_query scope='all' requires explicit span_ids as artifact ids (no indices)")
2594
+ # Treat as artifact ids.
2595
+ all_selected = _dedup_preserve_order([str(x).strip() for x in explicit_list if str(x).strip()])
2596
+ else:
2597
+ # Single-run resolution for indices.
2598
+ target = runs_to_query[0]
2599
+ spans = spans_by_run_id.get(target.run_id, [])
2600
+ all_selected = ActiveContextPolicy.resolve_span_ids_from_spans(explicit_list, spans)
2601
+ else:
2602
+ # Filter spans per target and union.
2603
+ time_range = None
2604
+ if since or until:
2605
+ time_range = TimeRange(
2606
+ start=str(since) if since else None,
2607
+ end=str(until) if until else None,
2608
+ )
2609
+
2610
+ for target in runs_to_query:
2611
+ spans = spans_by_run_id.get(target.run_id, [])
2612
+ matches = ActiveContextPolicy.filter_spans_from_run(
2613
+ target,
2614
+ artifact_store=artifact_store,
2615
+ time_range=time_range,
2616
+ tags=tags_dict,
2617
+ tags_mode=tags_mode,
2618
+ authors=authors or None,
2619
+ locations=locations or None,
2620
+ query=query_text or None,
2621
+ limit=limit_spans,
2622
+ )
2623
+ selected = [str(s.get("artifact_id") or "") for s in matches if isinstance(s, dict) and s.get("artifact_id")]
2624
+
2625
+ if deep_enabled and query_text:
2626
+ # Deep scan is bounded and should respect metadata filters (tags/authors/locations/time).
2627
+ deep_candidates = ActiveContextPolicy.filter_spans_from_run(
2628
+ target,
2629
+ artifact_store=artifact_store,
2630
+ time_range=time_range,
2631
+ tags=tags_dict,
2632
+ tags_mode=tags_mode,
2633
+ authors=authors or None,
2634
+ locations=locations or None,
2635
+ query=None,
2636
+ limit=deep_limit_spans,
2637
+ )
2638
+ selected = _dedup_preserve_order(
2639
+ selected
2640
+ + _deep_scan_span_ids(
2641
+ spans=deep_candidates,
2642
+ artifact_store=artifact_store,
2643
+ query=query_text,
2644
+ limit_spans=deep_limit_spans,
2645
+ limit_messages_per_span=deep_limit_messages_per_span,
2646
+ )
2647
+ )
2648
+
2649
+ if connected and selected:
2650
+ connect_candidates = ActiveContextPolicy.filter_spans_from_run(
2651
+ target,
2652
+ artifact_store=artifact_store,
2653
+ time_range=time_range,
2654
+ tags=tags_dict,
2655
+ tags_mode=tags_mode,
2656
+ authors=authors or None,
2657
+ locations=locations or None,
2658
+ query=None,
2659
+ limit=max(1000, len(spans)),
2660
+ )
2661
+ selected = _dedup_preserve_order(
2662
+ _expand_connected_span_ids(
2663
+ spans=connect_candidates,
2664
+ seed_artifact_ids=selected,
2665
+ connect_keys=connect_keys,
2666
+ neighbor_hops=neighbor_hops,
2667
+ limit=max(limit_spans, len(selected)),
2668
+ )
2669
+ )
2670
+
2671
+ all_selected = _dedup_preserve_order(all_selected + selected)
2672
+
2673
+ rendered_text = ""
2674
+ if return_mode in {"rendered", "both"}:
2675
+ # Render output (provenance + messages). Note: this may load artifacts.
2676
+ rendered_text = _render_memory_query_output(
2677
+ spans=all_spans,
2678
+ artifact_store=artifact_store,
2679
+ selected_artifact_ids=all_selected,
2680
+ summary_by_artifact=all_summary_by_artifact,
2681
+ max_messages=max_messages,
2682
+ )
2683
+
2684
+ # Structured meta output (for deterministic workflows).
2685
+ meta: dict[str, Any] = {}
2686
+ if return_mode in {"meta", "both"}:
2687
+ # Index span record by artifact id (first match wins deterministically).
2688
+ by_artifact: dict[str, dict[str, Any]] = {}
2689
+ for s in all_spans:
2690
+ try:
2691
+ aid = str(s.get("artifact_id") or "").strip()
2692
+ except Exception:
2693
+ aid = ""
2694
+ if not aid or aid in by_artifact:
2695
+ continue
2696
+ by_artifact[aid] = s
2697
+
2698
+ matches: list[dict[str, Any]] = []
2699
+ for aid in all_selected:
2700
+ span = by_artifact.get(aid)
2701
+ if not isinstance(span, dict):
2702
+ continue
2703
+ m: dict[str, Any] = {
2704
+ "span_id": aid,
2705
+ "kind": span.get("kind"),
2706
+ "created_at": span.get("created_at"),
2707
+ "from_timestamp": span.get("from_timestamp"),
2708
+ "to_timestamp": span.get("to_timestamp"),
2709
+ "tags": span.get("tags") if isinstance(span.get("tags"), dict) else {},
2710
+ }
2711
+ for k in ("created_by", "location"):
2712
+ if k in span:
2713
+ m[k] = span.get(k)
2714
+ # Include known preview fields without enforcing a global schema.
2715
+ for k in ("note_preview", "message_count", "summary_message_id"):
2716
+ if k in span:
2717
+ m[k] = span.get(k)
2718
+ matches.append(m)
2719
+
2720
+ meta = {"matches": matches, "span_ids": list(all_selected)}
2721
+
2722
+ # Attach recall policy transparency (warnings + applied budgets).
2723
+ if recall_level is not None:
2724
+ if return_mode in {"meta", "both"}:
2725
+ if recall_effort:
2726
+ meta["effort"] = recall_effort
2727
+ if recall_warnings:
2728
+ meta["warnings"] = list(recall_warnings)
2729
+ if return_mode in {"rendered", "both"} and recall_warnings:
2730
+ warnings_block = "\n".join([f"- {w}" for w in recall_warnings if str(w).strip()])
2731
+ rendered_text = f"[recall warnings]\n{warnings_block}\n\n{rendered_text}".strip()
2732
+
2733
+ result = {
2734
+ "mode": "executed",
2735
+ "results": [
2736
+ {
2737
+ "call_id": call_id,
2738
+ "name": tool_name,
2739
+ "success": True,
2740
+ "output": rendered_text if return_mode in {"rendered", "both"} else "",
2741
+ "error": None,
2742
+ "meta": meta if meta else None,
2743
+ }
2744
+ ],
2745
+ }
2746
+ return EffectOutcome.completed(result=result)
2747
+
2748
+ def _handle_vars_query(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2749
+ """Handle VARS_QUERY.
2750
+
2751
+ This is a JSON-safe, runtime-owned introspection primitive intended for:
2752
+ - progressive recall/debugging (e.g., inspect `scratchpad`)
2753
+ - host tooling parity (schema-only tools that map to runtime effects)
2754
+
2755
+ Payload (all optional unless stated):
2756
+ - path: str (default "scratchpad"; supports dot path or JSON pointer "/a/b/0")
2757
+ - keys_only: bool (default False; when True, return keys/length instead of full value)
2758
+ - target_run_id: str (optional; inspect another run state)
2759
+ - tool_name: str (default "inspect_vars"; for tool-style output)
2760
+ - call_id: str (tool-call id passthrough)
2761
+ """
2762
+ import json
2763
+
2764
+ from .vars import ensure_namespaces, parse_vars_path, resolve_vars_path
2765
+
2766
+ payload = dict(effect.payload or {})
2767
+ tool_name = str(payload.get("tool_name") or "inspect_vars")
2768
+ call_id = str(payload.get("call_id") or "vars")
2769
+
2770
+ target_run_id = payload.get("target_run_id")
2771
+ if target_run_id is not None:
2772
+ target_run_id = str(target_run_id).strip() or None
2773
+
2774
+ path = payload.get("path")
2775
+ if path is None:
2776
+ path = payload.get("var_path")
2777
+ path_text = str(path or "").strip() or "scratchpad"
2778
+
2779
+ keys_only = bool(payload.get("keys_only", False))
2780
+
2781
+ target_run = run
2782
+ if target_run_id and target_run_id != run.run_id:
2783
+ loaded = self._run_store.load(target_run_id)
2784
+ if loaded is None:
2785
+ return EffectOutcome.completed(
2786
+ result={
2787
+ "mode": "executed",
2788
+ "results": [
2789
+ {
2790
+ "call_id": call_id,
2791
+ "name": tool_name,
2792
+ "success": False,
2793
+ "output": None,
2794
+ "error": f"Unknown target_run_id: {target_run_id}",
2795
+ }
2796
+ ],
2797
+ }
2798
+ )
2799
+ target_run = loaded
2800
+
2801
+ ensure_namespaces(target_run.vars)
2802
+
2803
+ try:
2804
+ tokens = parse_vars_path(path_text)
2805
+ value = resolve_vars_path(target_run.vars, tokens)
2806
+ except Exception as e:
2807
+ return EffectOutcome.completed(
2808
+ result={
2809
+ "mode": "executed",
2810
+ "results": [
2811
+ {
2812
+ "call_id": call_id,
2813
+ "name": tool_name,
2814
+ "success": False,
2815
+ "output": None,
2816
+ "error": str(e),
2817
+ }
2818
+ ],
2819
+ }
2820
+ )
2821
+
2822
+ out: Dict[str, Any] = {"path": path_text, "type": type(value).__name__}
2823
+ if keys_only:
2824
+ if isinstance(value, dict):
2825
+ out["keys"] = sorted([str(k) for k in value.keys()])
2826
+ elif isinstance(value, list):
2827
+ out["length"] = len(value)
2828
+ else:
2829
+ out["value"] = value
2830
+ else:
2831
+ out["value"] = value
2832
+
2833
+ text = json.dumps(out, ensure_ascii=False, indent=2, sort_keys=True, default=str)
2834
+
2835
+ return EffectOutcome.completed(
2836
+ result={
2837
+ "mode": "executed",
2838
+ "results": [
2839
+ {
2840
+ "call_id": call_id,
2841
+ "name": tool_name,
2842
+ "success": True,
2843
+ "output": text,
2844
+ "error": None,
2845
+ }
2846
+ ],
2847
+ }
2848
+ )
2849
+
2850
+ def _handle_memory_tag(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
2851
+ """Handle MEMORY_TAG.
2852
+
2853
+ Payload (required unless stated):
2854
+ - span_id: str | int (artifact_id or 1-based index into `_runtime.memory_spans`)
2855
+ - scope: str (optional, default "run") "run" | "session" | "global" | "all"
2856
+ - tags: dict[str,str] (merged into span["tags"] by default)
2857
+ - merge: bool (optional, default True; when False, replaces span["tags"])
2858
+ - target_run_id: str (optional; defaults to current run_id; used as the base run for scope routing)
2859
+ - tool_name: str (optional; for tool-style output, default "remember")
2860
+ - call_id: str (optional; passthrough for tool-style output)
2861
+
2862
+ Notes:
2863
+ - This mutates the owner run's span index (`_runtime.memory_spans`) only; it does not change artifacts.
2864
+ - Tagging is intentionally JSON-safe (string->string).
2865
+ """
2866
+ import json
2867
+
2868
+ from .vars import ensure_namespaces
2869
+
2870
+ payload = dict(effect.payload or {})
2871
+ tool_name = str(payload.get("tool_name") or "remember")
2872
+ call_id = str(payload.get("call_id") or "memory")
2873
+
2874
+ base_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
2875
+ base_run = run
2876
+ if base_run_id != run.run_id:
2877
+ loaded = self._run_store.load(base_run_id)
2878
+ if loaded is None:
2879
+ return EffectOutcome.failed(f"Unknown target_run_id: {base_run_id}")
2880
+ base_run = loaded
2881
+ ensure_namespaces(base_run.vars)
2882
+
2883
+ scope = str(payload.get("scope") or "run").strip().lower() or "run"
2884
+ if scope not in {"run", "session", "global", "all"}:
2885
+ return EffectOutcome.failed(f"Unknown memory_tag scope: {scope}")
2886
+
2887
+ span_id = payload.get("span_id")
2888
+ tags = payload.get("tags")
2889
+ if span_id is None:
2890
+ return EffectOutcome.failed("MEMORY_TAG requires payload.span_id")
2891
+ if not isinstance(tags, dict) or not tags:
2892
+ return EffectOutcome.failed("MEMORY_TAG requires payload.tags as a non-empty dict[str,str]")
2893
+
2894
+ merge = bool(payload.get("merge", True))
2895
+
2896
+ clean_tags: Dict[str, str] = {}
2897
+ for k, v in tags.items():
2898
+ if isinstance(k, str) and isinstance(v, str) and k and v:
2899
+ if k == "kind":
2900
+ continue
2901
+ clean_tags[k] = v
2902
+ if not clean_tags:
2903
+ return EffectOutcome.failed("MEMORY_TAG requires at least one non-empty string tag")
2904
+
2905
+ artifact_id: Optional[str] = None
2906
+ index_hint: Optional[int] = None
2907
+
2908
+ if isinstance(span_id, int):
2909
+ index_hint = span_id
2910
+ elif isinstance(span_id, str):
2911
+ s = span_id.strip()
2912
+ if not s:
2913
+ return EffectOutcome.failed("MEMORY_TAG requires a non-empty span_id")
2914
+ if s.isdigit():
2915
+ index_hint = int(s)
2916
+ else:
2917
+ artifact_id = s
2918
+ else:
2919
+ return EffectOutcome.failed("MEMORY_TAG requires span_id as str or int")
2920
+
2921
+ if scope == "all" and index_hint is not None:
2922
+ return EffectOutcome.failed("memory_tag scope='all' requires span_id as artifact id (no indices)")
2923
+
2924
+ def _ensure_spans(target_run: RunState) -> list[dict[str, Any]]:
2925
+ ensure_namespaces(target_run.vars)
2926
+ target_runtime_ns = target_run.vars.get("_runtime")
2927
+ if not isinstance(target_runtime_ns, dict):
2928
+ target_runtime_ns = {}
2929
+ target_run.vars["_runtime"] = target_runtime_ns
2930
+ spans_any = target_runtime_ns.get("memory_spans")
2931
+ if not isinstance(spans_any, list):
2932
+ spans_any = []
2933
+ target_runtime_ns["memory_spans"] = spans_any
2934
+ return spans_any # type: ignore[return-value]
2935
+
2936
+ def _resolve_target_index(spans_list: list[Any], *, artifact_id_value: str, index_value: Optional[int]) -> Optional[int]:
2937
+ if index_value is not None:
2938
+ idx = int(index_value) - 1
2939
+ if idx < 0 or idx >= len(spans_list):
2940
+ return None
2941
+ span = spans_list[idx]
2942
+ if not isinstance(span, dict):
2943
+ return None
2944
+ return idx
2945
+ for i, span in enumerate(spans_list):
2946
+ if not isinstance(span, dict):
2947
+ continue
2948
+ if str(span.get("artifact_id") or "") == artifact_id_value:
2949
+ return i
2950
+ return None
2951
+
2952
+ def _apply_tags(target_run: RunState, spans_list: list[Any]) -> Optional[dict[str, Any]]:
2953
+ artifact_id_local = artifact_id
2954
+ target_index_local: Optional[int] = None
2955
+
2956
+ # Resolve index->artifact id when an index hint is used.
2957
+ if index_hint is not None:
2958
+ idx = int(index_hint) - 1
2959
+ if idx < 0 or idx >= len(spans_list):
2960
+ return None
2961
+ span = spans_list[idx]
2962
+ if not isinstance(span, dict):
2963
+ return None
2964
+ resolved = str(span.get("artifact_id") or "").strip()
2965
+ if not resolved:
2966
+ return None
2967
+ artifact_id_local = resolved
2968
+ target_index_local = idx
2969
+
2970
+ if not artifact_id_local:
2971
+ return None
2972
+
2973
+ if target_index_local is None:
2974
+ target_index_local = _resolve_target_index(
2975
+ spans_list, artifact_id_value=str(artifact_id_local), index_value=None
2976
+ )
2977
+ if target_index_local is None:
2978
+ return None
2979
+
2980
+ target = spans_list[target_index_local]
2981
+ if not isinstance(target, dict):
2982
+ return None
2983
+
2984
+ existing_tags = target.get("tags")
2985
+ if not isinstance(existing_tags, dict):
2986
+ existing_tags = {}
2987
+
2988
+ if merge:
2989
+ merged_tags = dict(existing_tags)
2990
+ merged_tags.update(clean_tags)
2991
+ else:
2992
+ merged_tags = dict(clean_tags)
2993
+
2994
+ target["tags"] = merged_tags
2995
+ target["tagged_at"] = utc_now_iso()
2996
+ if run.actor_id:
2997
+ target["tagged_by"] = str(run.actor_id)
2998
+ return {"run_id": target_run.run_id, "artifact_id": str(artifact_id_local), "tags": merged_tags}
2999
+
3000
+ # Resolve which run(s) to tag.
3001
+ runs_to_tag: list[RunState] = []
3002
+ if scope == "all":
3003
+ root = self._resolve_scope_owner_run(base_run, scope="session")
3004
+ global_run = self._resolve_scope_owner_run(base_run, scope="global")
3005
+ seen_ids: set[str] = set()
3006
+ for r in (base_run, root, global_run):
3007
+ if r.run_id in seen_ids:
3008
+ continue
3009
+ seen_ids.add(r.run_id)
3010
+ runs_to_tag.append(r)
3011
+ else:
3012
+ try:
3013
+ runs_to_tag = [self._resolve_scope_owner_run(base_run, scope=scope)]
3014
+ except Exception as e:
3015
+ return EffectOutcome.failed(str(e))
3016
+
3017
+ applied: list[dict[str, Any]] = []
3018
+ for target_run in runs_to_tag:
3019
+ spans_list = _ensure_spans(target_run)
3020
+ entry = _apply_tags(target_run, spans_list)
3021
+ if entry is None:
3022
+ continue
3023
+ applied.append(entry)
3024
+ if target_run is not run:
3025
+ target_run.updated_at = utc_now_iso()
3026
+ self._run_store.save(target_run)
3027
+
3028
+ if not applied:
3029
+ if artifact_id:
3030
+ return EffectOutcome.failed(f"Unknown span_id: {artifact_id}")
3031
+ if index_hint is not None:
3032
+ return EffectOutcome.failed(f"Unknown span index: {index_hint}")
3033
+ return EffectOutcome.failed("Could not resolve span_id")
3034
+
3035
+ rendered_tags = json.dumps(applied[0].get("tags") or {}, ensure_ascii=False, sort_keys=True)
3036
+ rendered_runs = ",".join([str(x.get("run_id") or "") for x in applied if x.get("run_id")])
3037
+ text = f"Tagged span_id={applied[0].get('artifact_id')} scope={scope} runs=[{rendered_runs}] tags={rendered_tags}"
3038
+
3039
+ result = {
3040
+ "mode": "executed",
3041
+ "results": [
3042
+ {
3043
+ "call_id": call_id,
3044
+ "name": tool_name,
3045
+ "success": True,
3046
+ "output": text,
3047
+ "error": None,
3048
+ "meta": {"applied": applied},
3049
+ }
3050
+ ],
3051
+ }
3052
+ return EffectOutcome.completed(result=result)
3053
+
3054
+ def _handle_memory_compact(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
3055
+ """Handle MEMORY_COMPACT.
3056
+
3057
+ This is a runtime-owned compaction of a run's active context:
3058
+ - archives the compacted messages to ArtifactStore (provenance preserved)
3059
+ - inserts a system summary message that includes `span_id=...` (LLM-visible handle)
3060
+ - updates `_runtime.memory_spans` index with metadata/tags
3061
+
3062
+ Payload (optional unless stated):
3063
+ - preserve_recent: int (default 6; preserves N most recent non-system messages)
3064
+ - compression_mode: str ("light"|"standard"|"heavy", default "standard")
3065
+ - focus: str (optional; topic to prioritize)
3066
+ - target_run_id: str (optional; defaults to current run)
3067
+ - tool_name: str (optional; for tool-style output, default "compact_memory")
3068
+ - call_id: str (optional)
3069
+ """
3070
+ import json
3071
+ from uuid import uuid4
3072
+
3073
+ from .vars import ensure_namespaces
3074
+ from ..memory.compaction import normalize_messages, split_for_compaction, span_metadata_from_messages
3075
+
3076
+ ensure_namespaces(run.vars)
3077
+
3078
+ artifact_store = self._artifact_store
3079
+ if artifact_store is None:
3080
+ return EffectOutcome.failed(
3081
+ "MEMORY_COMPACT requires an ArtifactStore; configure runtime.set_artifact_store(...)"
3082
+ )
3083
+
3084
+ payload = dict(effect.payload or {})
3085
+ tool_name = str(payload.get("tool_name") or "compact_memory")
3086
+ call_id = str(payload.get("call_id") or "memory")
3087
+
3088
+ target_run_id = payload.get("target_run_id")
3089
+ if target_run_id is not None:
3090
+ target_run_id = str(target_run_id).strip() or None
3091
+
3092
+ try:
3093
+ preserve_recent = int(payload.get("preserve_recent", 6) or 6)
3094
+ except Exception:
3095
+ preserve_recent = 6
3096
+ if preserve_recent < 0:
3097
+ preserve_recent = 0
3098
+
3099
+ compression_mode = str(payload.get("compression_mode") or "standard").strip().lower()
3100
+ if compression_mode not in ("light", "standard", "heavy"):
3101
+ compression_mode = "standard"
3102
+
3103
+ focus = payload.get("focus")
3104
+ focus_text = str(focus).strip() if isinstance(focus, str) else ""
3105
+ focus_text = focus_text or None
3106
+
3107
+ # Resolve which run is being compacted.
3108
+ target_run = run
3109
+ if target_run_id and target_run_id != run.run_id:
3110
+ loaded = self._run_store.load(target_run_id)
3111
+ if loaded is None:
3112
+ return EffectOutcome.failed(f"Unknown target_run_id: {target_run_id}")
3113
+ target_run = loaded
3114
+ ensure_namespaces(target_run.vars)
3115
+
3116
+ ctx = target_run.vars.get("context")
3117
+ if not isinstance(ctx, dict):
3118
+ return EffectOutcome.failed("MEMORY_COMPACT requires vars.context to be a dict")
3119
+ messages_raw = ctx.get("messages")
3120
+ if not isinstance(messages_raw, list) or not messages_raw:
3121
+ return EffectOutcome.completed(
3122
+ result={
3123
+ "mode": "executed",
3124
+ "results": [
3125
+ {
3126
+ "call_id": call_id,
3127
+ "name": tool_name,
3128
+ "success": True,
3129
+ "output": "No messages to compact.",
3130
+ "error": None,
3131
+ }
3132
+ ],
3133
+ }
3134
+ )
3135
+
3136
+ now_iso = utc_now_iso
3137
+ messages = normalize_messages(messages_raw, now_iso=now_iso)
3138
+ split = split_for_compaction(messages, preserve_recent=preserve_recent)
3139
+
3140
+ if not split.older_messages:
3141
+ return EffectOutcome.completed(
3142
+ result={
3143
+ "mode": "executed",
3144
+ "results": [
3145
+ {
3146
+ "call_id": call_id,
3147
+ "name": tool_name,
3148
+ "success": True,
3149
+ "output": f"Nothing to compact (non-system messages <= preserve_recent={preserve_recent}).",
3150
+ "error": None,
3151
+ }
3152
+ ],
3153
+ }
3154
+ )
3155
+
3156
+ # ------------------------------------------------------------------
3157
+ # 1) LLM summary - use integration layer summarizer if available
3158
+ # ------------------------------------------------------------------
3159
+ #
3160
+ # When chat_summarizer is injected (from AbstractCore integration layer),
3161
+ # use it for adaptive chunking based on max_tokens. This handles cases
3162
+ # where the environment can't use the model's full context window
3163
+ # (e.g., GPU memory constraints).
3164
+ #
3165
+ # When max_tokens == -1 (AUTO): Uses model's full capability
3166
+ # When max_tokens > 0: Chunks messages if they exceed the limit
3167
+
3168
+ sub_run_id: Optional[str] = None # Track for provenance if using fallback
3169
+
3170
+ if self._chat_summarizer is not None:
3171
+ # Use AbstractCore's BasicSummarizer with adaptive chunking
3172
+ try:
3173
+ summarizer_result = self._chat_summarizer.summarize_chat_history(
3174
+ messages=split.older_messages,
3175
+ preserve_recent=0, # Already split; don't preserve again
3176
+ focus=focus_text,
3177
+ compression_mode=compression_mode,
3178
+ )
3179
+ summary_text_out = summarizer_result.get("summary", "(summary unavailable)")
3180
+ key_points = list(summarizer_result.get("key_points") or [])
3181
+ confidence = summarizer_result.get("confidence")
3182
+ except Exception as e:
3183
+ return EffectOutcome.failed(f"Summarizer failed: {e}")
3184
+ else:
3185
+ # Fallback: Original prompt-based approach (for non-AbstractCore runtimes)
3186
+ older_text = "\n".join([f"{m.get('role')}: {m.get('content')}" for m in split.older_messages])
3187
+ focus_line = f"Focus: {focus_text}\n" if focus_text else ""
3188
+ mode_line = f"Compression mode: {compression_mode}\n"
3189
+
3190
+ prompt = (
3191
+ "You are compressing older conversation context for an agent runtime.\n"
3192
+ "Write a faithful, compact summary that preserves decisions, constraints, names, file paths, commands, and open questions.\n"
3193
+ "Do NOT invent details. If something is unknown, say so.\n"
3194
+ f"{mode_line}"
3195
+ f"{focus_line}"
3196
+ "Return STRICT JSON with keys: summary (string), key_points (array of strings), confidence (number 0..1).\n\n"
3197
+ "OLDER MESSAGES (to be archived):\n"
3198
+ f"{older_text}\n"
3199
+ )
3200
+
3201
+ # Best-effort output budget for the summary itself.
3202
+ limits = target_run.vars.get("_limits") if isinstance(target_run.vars.get("_limits"), dict) else {}
3203
+ max_out = limits.get("max_output_tokens")
3204
+ try:
3205
+ max_out_tokens = int(max_out) if max_out is not None else None
3206
+ except Exception:
3207
+ max_out_tokens = None
3208
+
3209
+ llm_payload: Dict[str, Any] = {"prompt": prompt}
3210
+ if max_out_tokens is not None:
3211
+ llm_payload["params"] = {"max_tokens": max_out_tokens}
3212
+
3213
+ def llm_node(sub_run: RunState, sub_ctx) -> StepPlan:
3214
+ return StepPlan(
3215
+ node_id="llm",
3216
+ effect=Effect(type=EffectType.LLM_CALL, payload=llm_payload, result_key="_temp.llm"),
3217
+ next_node="done",
3218
+ )
3219
+
3220
+ def done_node(sub_run: RunState, sub_ctx) -> StepPlan:
3221
+ temp = sub_run.vars.get("_temp") if isinstance(sub_run.vars.get("_temp"), dict) else {}
3222
+ return StepPlan(node_id="done", complete_output={"response": temp.get("llm")})
3223
+
3224
+ wf = WorkflowSpec(workflow_id="wf_memory_compact_llm", entry_node="llm", nodes={"llm": llm_node, "done": done_node})
3225
+
3226
+ sub_run_id = self.start(
3227
+ workflow=wf,
3228
+ vars={"context": {"prompt": prompt}, "scratchpad": {}, "_runtime": {}, "_temp": {}, "_limits": dict(limits)},
3229
+ actor_id=run.actor_id,
3230
+ session_id=getattr(run, "session_id", None),
3231
+ parent_run_id=run.run_id,
3232
+ )
3233
+
3234
+ sub_state = self.tick(workflow=wf, run_id=sub_run_id)
3235
+ if sub_state.status == RunStatus.WAITING:
3236
+ return EffectOutcome.failed("MEMORY_COMPACT does not support waiting subworkflows yet")
3237
+ if sub_state.status == RunStatus.FAILED:
3238
+ return EffectOutcome.failed(sub_state.error or "Compaction LLM subworkflow failed")
3239
+ response = (sub_state.output or {}).get("response")
3240
+ if not isinstance(response, dict):
3241
+ response = {}
3242
+
3243
+ content = response.get("content")
3244
+ content_text = "" if content is None else str(content).strip()
3245
+ lowered = content_text.lower()
3246
+ if any(
3247
+ keyword in lowered
3248
+ for keyword in (
3249
+ "operation not permitted",
3250
+ "failed to connect",
3251
+ "connection refused",
3252
+ "timed out",
3253
+ "timeout",
3254
+ "not running",
3255
+ "model not found",
3256
+ )
3257
+ ):
3258
+ return EffectOutcome.failed(f"Compaction LLM unavailable: {content_text}")
3259
+
3260
+ summary_text_out = content_text
3261
+ key_points: list[str] = []
3262
+ confidence: Optional[float] = None
3263
+
3264
+ # Parse JSON if present (support fenced output).
3265
+ if content_text:
3266
+ candidate = content_text
3267
+ if "```" in candidate:
3268
+ # extract first JSON-ish block
3269
+ start = candidate.find("{")
3270
+ end = candidate.rfind("}")
3271
+ if 0 <= start < end:
3272
+ candidate = candidate[start : end + 1]
3273
+ try:
3274
+ parsed = json.loads(candidate)
3275
+ if isinstance(parsed, dict):
3276
+ if parsed.get("summary") is not None:
3277
+ summary_text_out = str(parsed.get("summary") or "").strip() or summary_text_out
3278
+ kp = parsed.get("key_points")
3279
+ if isinstance(kp, list):
3280
+ key_points = [str(x) for x in kp if isinstance(x, (str, int, float))][:20]
3281
+ conf = parsed.get("confidence")
3282
+ if isinstance(conf, (int, float)):
3283
+ confidence = float(conf)
3284
+ except Exception:
3285
+ pass
3286
+
3287
+ summary_text_out = summary_text_out.strip()
3288
+ if not summary_text_out:
3289
+ summary_text_out = "(summary unavailable)"
3290
+
3291
+ # ------------------------------------------------------------------
3292
+ # 2) Archive older messages + update run state with summary
3293
+ # ------------------------------------------------------------------
3294
+
3295
+ span_meta = span_metadata_from_messages(split.older_messages)
3296
+ artifact_payload = {
3297
+ "messages": split.older_messages,
3298
+ "span": span_meta,
3299
+ "created_at": now_iso(),
3300
+ }
3301
+ artifact_tags: Dict[str, str] = {
3302
+ "kind": "conversation_span",
3303
+ "compression_mode": compression_mode,
3304
+ "preserve_recent": str(preserve_recent),
3305
+ }
3306
+ if focus_text:
3307
+ artifact_tags["focus"] = focus_text
3308
+
3309
+ meta = artifact_store.store_json(artifact_payload, run_id=target_run.run_id, tags=artifact_tags)
3310
+ archived_ref = meta.artifact_id
3311
+
3312
+ summary_message_id = f"msg_{uuid4().hex}"
3313
+ summary_prefix = f"[CONVERSATION HISTORY SUMMARY span_id={archived_ref}]"
3314
+ summary_metadata: Dict[str, Any] = {
3315
+ "message_id": summary_message_id,
3316
+ "kind": "memory_summary",
3317
+ "compression_mode": compression_mode,
3318
+ "preserve_recent": preserve_recent,
3319
+ "source_artifact_id": archived_ref,
3320
+ "source_message_count": int(span_meta.get("message_count") or 0),
3321
+ "source_from_timestamp": span_meta.get("from_timestamp"),
3322
+ "source_to_timestamp": span_meta.get("to_timestamp"),
3323
+ "source_from_message_id": span_meta.get("from_message_id"),
3324
+ "source_to_message_id": span_meta.get("to_message_id"),
3325
+ }
3326
+ if focus_text:
3327
+ summary_metadata["focus"] = focus_text
3328
+
3329
+ summary_message = {
3330
+ "role": "system",
3331
+ "content": f"{summary_prefix}: {summary_text_out}",
3332
+ "timestamp": now_iso(),
3333
+ "metadata": summary_metadata,
3334
+ }
3335
+
3336
+ new_messages = list(split.system_messages) + [summary_message] + list(split.recent_messages)
3337
+ ctx["messages"] = new_messages
3338
+ if isinstance(getattr(target_run, "output", None), dict):
3339
+ target_run.output["messages"] = new_messages
3340
+
3341
+ runtime_ns = target_run.vars.get("_runtime")
3342
+ if not isinstance(runtime_ns, dict):
3343
+ runtime_ns = {}
3344
+ target_run.vars["_runtime"] = runtime_ns
3345
+ spans = runtime_ns.get("memory_spans")
3346
+ if not isinstance(spans, list):
3347
+ spans = []
3348
+ runtime_ns["memory_spans"] = spans
3349
+ span_record: Dict[str, Any] = {
3350
+ "kind": "conversation_span",
3351
+ "artifact_id": archived_ref,
3352
+ "created_at": now_iso(),
3353
+ "summary_message_id": summary_message_id,
3354
+ "from_timestamp": span_meta.get("from_timestamp"),
3355
+ "to_timestamp": span_meta.get("to_timestamp"),
3356
+ "from_message_id": span_meta.get("from_message_id"),
3357
+ "to_message_id": span_meta.get("to_message_id"),
3358
+ "message_count": int(span_meta.get("message_count") or 0),
3359
+ "compression_mode": compression_mode,
3360
+ "focus": focus_text,
3361
+ }
3362
+ if run.actor_id:
3363
+ span_record["created_by"] = str(run.actor_id)
3364
+ spans.append(span_record)
3365
+
3366
+ if target_run is not run:
3367
+ target_run.updated_at = now_iso()
3368
+ self._run_store.save(target_run)
3369
+
3370
+ out = {
3371
+ "llm_run_id": sub_run_id,
3372
+ "span_id": archived_ref,
3373
+ "summary_message_id": summary_message_id,
3374
+ "preserve_recent": preserve_recent,
3375
+ "compression_mode": compression_mode,
3376
+ "focus": focus_text,
3377
+ "key_points": key_points,
3378
+ "confidence": confidence,
3379
+ }
3380
+ text = f"Compacted {len(split.older_messages)} messages into span_id={archived_ref}."
3381
+ result = {
3382
+ "mode": "executed",
3383
+ "results": [
3384
+ {
3385
+ "call_id": call_id,
3386
+ "name": tool_name,
3387
+ "success": True,
3388
+ "output": text,
3389
+ "error": None,
3390
+ "meta": out,
3391
+ }
3392
+ ],
3393
+ }
3394
+ return EffectOutcome.completed(result=result)
3395
+
3396
+ def _handle_memory_note(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
3397
+ """Handle MEMORY_NOTE.
3398
+
3399
+ Store a small, durable memory note (key insight/decision) with tags and provenance sources.
3400
+
3401
+ Payload:
3402
+ - note: str (required)
3403
+ - tags: dict[str,str] (optional)
3404
+ - sources: dict (optional)
3405
+ - run_id: str (optional; defaults to current run_id)
3406
+ - span_ids: list[str] (optional; referenced span ids)
3407
+ - message_ids: list[str] (optional; referenced message ids)
3408
+ - target_run_id: str (optional; defaults to current run_id)
3409
+ - tool_name: str (optional; default "remember_note")
3410
+ - call_id: str (optional; passthrough)
3411
+ """
3412
+ import json
3413
+
3414
+ from .vars import ensure_namespaces
3415
+
3416
+ ensure_namespaces(run.vars)
3417
+ runtime_ns = run.vars.get("_runtime")
3418
+ if not isinstance(runtime_ns, dict):
3419
+ runtime_ns = {}
3420
+ run.vars["_runtime"] = runtime_ns
3421
+
3422
+ artifact_store = self._artifact_store
3423
+ if artifact_store is None:
3424
+ return EffectOutcome.failed(
3425
+ "MEMORY_NOTE requires an ArtifactStore; configure runtime.set_artifact_store(...)"
3426
+ )
3427
+
3428
+ payload = dict(effect.payload or {})
3429
+ tool_name = str(payload.get("tool_name") or "remember_note")
3430
+ call_id = str(payload.get("call_id") or "memory")
3431
+
3432
+ base_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
3433
+ base_run = run
3434
+ if base_run_id != run.run_id:
3435
+ loaded = self._run_store.load(base_run_id)
3436
+ if loaded is None:
3437
+ return EffectOutcome.failed(f"Unknown target_run_id: {base_run_id}")
3438
+ base_run = loaded
3439
+ ensure_namespaces(base_run.vars)
3440
+
3441
+ scope = str(payload.get("scope") or "run").strip().lower() or "run"
3442
+ try:
3443
+ target_run = self._resolve_scope_owner_run(base_run, scope=scope)
3444
+ except Exception as e:
3445
+ return EffectOutcome.failed(str(e))
3446
+ ensure_namespaces(target_run.vars)
3447
+
3448
+ target_runtime_ns = target_run.vars.get("_runtime")
3449
+ if not isinstance(target_runtime_ns, dict):
3450
+ target_runtime_ns = {}
3451
+ target_run.vars["_runtime"] = target_runtime_ns
3452
+ spans = target_runtime_ns.get("memory_spans")
3453
+ if not isinstance(spans, list):
3454
+ spans = []
3455
+ target_runtime_ns["memory_spans"] = spans
3456
+
3457
+ note = payload.get("note")
3458
+ note_text = str(note or "").strip()
3459
+ if not note_text:
3460
+ return EffectOutcome.failed("MEMORY_NOTE requires payload.note (non-empty string)")
3461
+
3462
+ location_raw = payload.get("location")
3463
+ location = str(location_raw).strip() if isinstance(location_raw, str) else ""
3464
+
3465
+ tags = payload.get("tags")
3466
+ clean_tags: Dict[str, str] = {}
3467
+ if isinstance(tags, dict):
3468
+ for k, v in tags.items():
3469
+ if isinstance(k, str) and isinstance(v, str) and k and v:
3470
+ if k == "kind":
3471
+ continue
3472
+ clean_tags[k] = v
3473
+
3474
+ sources = payload.get("sources")
3475
+ sources_dict = dict(sources) if isinstance(sources, dict) else {}
3476
+
3477
+ def _norm_list(value: Any) -> list[str]:
3478
+ if not isinstance(value, list):
3479
+ return []
3480
+ out: list[str] = []
3481
+ for item in value:
3482
+ if isinstance(item, str):
3483
+ s = item.strip()
3484
+ if s:
3485
+ out.append(s)
3486
+ elif isinstance(item, int):
3487
+ out.append(str(item))
3488
+ # preserve order but dedup
3489
+ seen: set[str] = set()
3490
+ deduped: list[str] = []
3491
+ for s in out:
3492
+ if s in seen:
3493
+ continue
3494
+ seen.add(s)
3495
+ deduped.append(s)
3496
+ return deduped
3497
+
3498
+ # Provenance default: the run that emitted this effect (not the scope owner).
3499
+ source_run_id = str(sources_dict.get("run_id") or run.run_id).strip() or run.run_id
3500
+ span_ids = _norm_list(sources_dict.get("span_ids"))
3501
+ message_ids = _norm_list(sources_dict.get("message_ids"))
3502
+
3503
+ created_at = utc_now_iso()
3504
+ artifact_payload: Dict[str, Any] = {
3505
+ "note": note_text,
3506
+ "sources": {"run_id": source_run_id, "span_ids": span_ids, "message_ids": message_ids},
3507
+ "created_at": created_at,
3508
+ }
3509
+ if location:
3510
+ artifact_payload["location"] = location
3511
+ if run.actor_id:
3512
+ artifact_payload["actor_id"] = str(run.actor_id)
3513
+ session_id = getattr(target_run, "session_id", None) or getattr(run, "session_id", None)
3514
+ if session_id:
3515
+ artifact_payload["session_id"] = str(session_id)
3516
+
3517
+ artifact_tags: Dict[str, str] = {"kind": "memory_note"}
3518
+ artifact_tags.update(clean_tags)
3519
+ meta = artifact_store.store_json(artifact_payload, run_id=target_run.run_id, tags=artifact_tags)
3520
+ artifact_id = meta.artifact_id
3521
+
3522
+ preview = note_text
3523
+ if len(preview) > 160:
3524
+ #[WARNING:TRUNCATION] bounded memory_note preview for spans listing
3525
+ marker = "… (truncated)"
3526
+ keep = max(0, 160 - len(marker))
3527
+ if keep <= 0:
3528
+ preview = marker[:160].rstrip()
3529
+ else:
3530
+ preview = preview[:keep].rstrip() + marker
3531
+
3532
+ span_record: Dict[str, Any] = {
3533
+ "kind": "memory_note",
3534
+ "artifact_id": artifact_id,
3535
+ "created_at": created_at,
3536
+ # Treat notes as point-in-time spans for time-range filtering.
3537
+ "from_timestamp": created_at,
3538
+ "to_timestamp": created_at,
3539
+ "message_count": 0,
3540
+ "note_preview": preview,
3541
+ }
3542
+ if location:
3543
+ span_record["location"] = location
3544
+ if clean_tags:
3545
+ span_record["tags"] = dict(clean_tags)
3546
+ if span_ids or message_ids:
3547
+ span_record["sources"] = {"run_id": source_run_id, "span_ids": span_ids, "message_ids": message_ids}
3548
+ if run.actor_id:
3549
+ span_record["created_by"] = str(run.actor_id)
3550
+
3551
+ spans.append(span_record)
3552
+
3553
+ def _coerce_bool(value: Any) -> bool:
3554
+ if isinstance(value, bool):
3555
+ return bool(value)
3556
+ if isinstance(value, (int, float)) and not isinstance(value, bool):
3557
+ try:
3558
+ return float(value) != 0.0
3559
+ except Exception:
3560
+ return False
3561
+ if isinstance(value, str):
3562
+ s = value.strip().lower()
3563
+ if not s:
3564
+ return False
3565
+ if s in {"false", "0", "no", "off"}:
3566
+ return False
3567
+ if s in {"true", "1", "yes", "on"}:
3568
+ return True
3569
+ return False
3570
+
3571
+ # Optional UX convenience: keep the stored note immediately visible to downstream LLM calls by
3572
+ # rehydrating it into `base_run.context.messages` as a synthetic system message.
3573
+ keep_raw = payload.get("keep_in_context")
3574
+ if keep_raw is None:
3575
+ keep_raw = payload.get("keepInContext")
3576
+ keep_in_context = _coerce_bool(keep_raw)
3577
+ kept: Optional[Dict[str, Any]] = None
3578
+ if keep_in_context:
3579
+ try:
3580
+ from ..memory.active_context import ActiveContextPolicy
3581
+
3582
+ policy = ActiveContextPolicy(run_store=self._run_store, artifact_store=artifact_store)
3583
+ out = policy.rehydrate_into_context_from_run(
3584
+ base_run,
3585
+ span_ids=[artifact_id],
3586
+ placement="end",
3587
+ dedup_by="message_id",
3588
+ max_messages=1,
3589
+ )
3590
+ kept = {"inserted": out.get("inserted", 0), "skipped": out.get("skipped", 0)}
3591
+
3592
+ # Persist when mutating a different run than the currently executing one.
3593
+ if base_run is not run:
3594
+ base_run.updated_at = utc_now_iso()
3595
+ self._run_store.save(base_run)
3596
+ except Exception as e:
3597
+ kept = {"inserted": 0, "skipped": 0, "error": str(e)}
3598
+
3599
+ if target_run is not run:
3600
+ target_run.updated_at = utc_now_iso()
3601
+ self._run_store.save(target_run)
3602
+
3603
+ rendered_tags = json.dumps(clean_tags, ensure_ascii=False, sort_keys=True) if clean_tags else "{}"
3604
+ text = f"Stored memory_note span_id={artifact_id} tags={rendered_tags}"
3605
+ meta_out: Dict[str, Any] = {"span_id": artifact_id, "created_at": created_at, "note_preview": preview}
3606
+ if isinstance(kept, dict):
3607
+ meta_out["kept_in_context"] = kept
3608
+
3609
+ result = {
3610
+ "mode": "executed",
3611
+ "results": [
3612
+ {
3613
+ "call_id": call_id,
3614
+ "name": tool_name,
3615
+ "success": True,
3616
+ "output": text,
3617
+ "error": None,
3618
+ "meta": meta_out,
3619
+ }
3620
+ ],
3621
+ }
3622
+ return EffectOutcome.completed(result=result)
3623
+
3624
+ def _handle_memory_rehydrate(self, run: RunState, effect: Effect, default_next_node: Optional[str]) -> EffectOutcome:
3625
+ """Handle MEMORY_REHYDRATE.
3626
+
3627
+ This is a runtime-owned, deterministic mutation of `context.messages`:
3628
+ - loads archived conversation span artifacts from ArtifactStore
3629
+ - inserts them into `context.messages` with dedup
3630
+ - persists the mutated run (RunStore checkpoint)
3631
+
3632
+ Payload (required unless stated):
3633
+ - span_ids: list[str|int] (required; artifact ids preferred; indices allowed)
3634
+ - placement: str ("after_summary"|"after_system"|"end", default "after_summary")
3635
+ - dedup_by: str (default "message_id")
3636
+ - max_messages: int (optional; max inserted messages)
3637
+ - target_run_id: str (optional; defaults to current run)
3638
+ """
3639
+ from .vars import ensure_namespaces
3640
+
3641
+ ensure_namespaces(run.vars)
3642
+ artifact_store = self._artifact_store
3643
+ if artifact_store is None:
3644
+ return EffectOutcome.failed(
3645
+ "MEMORY_REHYDRATE requires an ArtifactStore; configure runtime.set_artifact_store(...)"
3646
+ )
3647
+
3648
+ payload = dict(effect.payload or {})
3649
+ target_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
3650
+
3651
+ # Recall effort policy (optional; no silent fallback).
3652
+ recall_level_raw = payload.get("recall_level")
3653
+ if recall_level_raw is None:
3654
+ recall_level_raw = payload.get("recallLevel")
3655
+ try:
3656
+ from ..memory.recall_levels import parse_recall_level, policy_for
3657
+
3658
+ recall_level = parse_recall_level(recall_level_raw)
3659
+ except Exception as e:
3660
+ return EffectOutcome.failed(str(e))
3661
+
3662
+ recall_warnings: list[str] = []
3663
+ recall_effort: dict[str, Any] = {}
3664
+
3665
+ # Normalize span_ids (accept legacy `span_id` too).
3666
+ raw_span_ids = payload.get("span_ids")
3667
+ if raw_span_ids is None:
3668
+ raw_span_ids = payload.get("span_id")
3669
+ if raw_span_ids is None:
3670
+ return EffectOutcome.failed("MEMORY_REHYDRATE requires payload.span_ids (or legacy span_id)")
3671
+ span_ids: list[Any] = []
3672
+ if isinstance(raw_span_ids, list):
3673
+ span_ids = list(raw_span_ids)
3674
+ elif raw_span_ids is not None:
3675
+ span_ids = [raw_span_ids]
3676
+ if not span_ids:
3677
+ # Empty rehydrate is a valid no-op (common when recall returns no spans).
3678
+ return EffectOutcome.completed(result={"inserted": 0, "skipped": 0, "artifacts": []})
3679
+
3680
+ placement = str(payload.get("placement") or "after_summary").strip() or "after_summary"
3681
+ dedup_by = str(payload.get("dedup_by") or "message_id").strip() or "message_id"
3682
+ max_messages = payload.get("max_messages")
3683
+ max_messages_provided = "max_messages" in payload
3684
+
3685
+ if recall_level is not None:
3686
+ pol = policy_for(recall_level)
3687
+ raw_max = max_messages
3688
+ parsed: Optional[int] = None
3689
+ if raw_max is not None and not isinstance(raw_max, bool):
3690
+ try:
3691
+ parsed = int(float(raw_max))
3692
+ except Exception:
3693
+ parsed = None
3694
+ if not max_messages_provided or parsed is None:
3695
+ parsed = pol.rehydrate.max_messages_default
3696
+ if parsed < 1:
3697
+ recall_warnings.append(
3698
+ f"recall_level={recall_level.value}: max_messages must be >=1; using {pol.rehydrate.max_messages_default}"
3699
+ )
3700
+ parsed = pol.rehydrate.max_messages_default
3701
+ if parsed > pol.rehydrate.max_messages_max:
3702
+ recall_warnings.append(
3703
+ f"recall_level={recall_level.value}: clamped max_messages from {parsed} to {pol.rehydrate.max_messages_max}"
3704
+ )
3705
+ parsed = pol.rehydrate.max_messages_max
3706
+
3707
+ max_messages = parsed
3708
+ recall_effort = {
3709
+ "recall_level": recall_level.value,
3710
+ "applied": {"max_messages": int(parsed)},
3711
+ }
3712
+
3713
+ # Load the target run (may be different from current).
3714
+ target_run = run
3715
+ if target_run_id != run.run_id:
3716
+ loaded = self._run_store.load(target_run_id)
3717
+ if loaded is None:
3718
+ return EffectOutcome.failed(f"Unknown target_run_id: {target_run_id}")
3719
+ target_run = loaded
3720
+ ensure_namespaces(target_run.vars)
3721
+
3722
+ # Best-effort: rehydrate only span kinds that are meaningful to inject into
3723
+ # `context.messages` for downstream LLM calls.
3724
+ #
3725
+ # Rationale:
3726
+ # - conversation_span: archived chat messages
3727
+ # - memory_note: durable notes (rehydrated as a synthetic message by ActiveContextPolicy)
3728
+ #
3729
+ # Evidence and other span kinds are intentionally skipped by default.
3730
+ from ..memory.active_context import ActiveContextPolicy
3731
+
3732
+ spans = ActiveContextPolicy.list_memory_spans_from_run(target_run)
3733
+ resolved = ActiveContextPolicy.resolve_span_ids_from_spans(span_ids, spans)
3734
+ if not resolved:
3735
+ return EffectOutcome.completed(result={"inserted": 0, "skipped": 0, "artifacts": []})
3736
+
3737
+ kind_by_artifact: dict[str, str] = {}
3738
+ for s in spans:
3739
+ if not isinstance(s, dict):
3740
+ continue
3741
+ aid = str(s.get("artifact_id") or "").strip()
3742
+ if not aid or aid in kind_by_artifact:
3743
+ continue
3744
+ kind_by_artifact[aid] = str(s.get("kind") or "").strip()
3745
+
3746
+ to_rehydrate: list[str] = []
3747
+ skipped_artifacts: list[dict[str, Any]] = []
3748
+ allowed_kinds = {"conversation_span", "memory_note"}
3749
+ for aid in resolved:
3750
+ kind = kind_by_artifact.get(aid, "")
3751
+ if kind and kind not in allowed_kinds:
3752
+ skipped_artifacts.append(
3753
+ {"span_id": aid, "inserted": 0, "skipped": 0, "error": None, "kind": kind}
3754
+ )
3755
+ continue
3756
+ to_rehydrate.append(aid)
3757
+
3758
+ # Reuse the canonical policy implementation (no duplicated logic).
3759
+ # Mutate the in-memory RunState to keep runtime tick semantics consistent.
3760
+ policy = ActiveContextPolicy(run_store=self._run_store, artifact_store=artifact_store)
3761
+ out = policy.rehydrate_into_context_from_run(
3762
+ target_run,
3763
+ span_ids=to_rehydrate,
3764
+ placement=placement,
3765
+ dedup_by=dedup_by,
3766
+ max_messages=max_messages,
3767
+ )
3768
+
3769
+ # Persist when mutating a different run than the currently executing one.
3770
+ if target_run is not run:
3771
+ target_run.updated_at = utc_now_iso()
3772
+ self._run_store.save(target_run)
3773
+
3774
+ # Normalize output shape to match backlog expectations (`span_id` field, optional kind).
3775
+ artifacts_out: list[dict[str, Any]] = []
3776
+ artifacts = out.get("artifacts")
3777
+ if isinstance(artifacts, list):
3778
+ for a in artifacts:
3779
+ if not isinstance(a, dict):
3780
+ continue
3781
+ aid = str(a.get("artifact_id") or "").strip()
3782
+ artifacts_out.append(
3783
+ {
3784
+ "span_id": aid,
3785
+ "inserted": a.get("inserted"),
3786
+ "skipped": a.get("skipped"),
3787
+ "error": a.get("error"),
3788
+ "kind": kind_by_artifact.get(aid) or None,
3789
+ "preview": a.get("preview"),
3790
+ }
3791
+ )
3792
+ artifacts_out.extend(skipped_artifacts)
3793
+
3794
+ return EffectOutcome.completed(
3795
+ result={
3796
+ "inserted": out.get("inserted", 0),
3797
+ "skipped": out.get("skipped", 0),
3798
+ "artifacts": artifacts_out,
3799
+ "effort": recall_effort if recall_effort else None,
3800
+ "warnings": list(recall_warnings) if recall_warnings else None,
3801
+ }
3802
+ )
3803
+
3804
+
3805
+ def _dedup_preserve_order(values: list[str]) -> list[str]:
3806
+ seen: set[str] = set()
3807
+ out: list[str] = []
3808
+ for v in values:
3809
+ s = str(v or "").strip()
3810
+ if not s or s in seen:
3811
+ continue
3812
+ seen.add(s)
3813
+ out.append(s)
3814
+ return out
3815
+
3816
+
3817
+ def _span_sort_key(span: dict) -> tuple[str, str]:
3818
+ """Sort key for span adjacency. Prefer from_timestamp, then created_at."""
3819
+ from_ts = str(span.get("from_timestamp") or "")
3820
+ created = str(span.get("created_at") or "")
3821
+ return (from_ts or created, created)
3822
+
3823
+
3824
+ def _expand_connected_span_ids(
3825
+ *,
3826
+ spans: list[dict[str, Any]],
3827
+ seed_artifact_ids: list[str],
3828
+ connect_keys: list[str],
3829
+ neighbor_hops: int,
3830
+ limit: int,
3831
+ ) -> list[str]:
3832
+ """Expand seed spans to include deterministic neighbors (time + shared tags)."""
3833
+ if not spans or not seed_artifact_ids:
3834
+ return list(seed_artifact_ids)
3835
+
3836
+ ordered = [s for s in spans if isinstance(s, dict) and s.get("artifact_id")]
3837
+ ordered.sort(key=_span_sort_key)
3838
+ idx_by_artifact: dict[str, int] = {str(s["artifact_id"]): i for i, s in enumerate(ordered)}
3839
+
3840
+ # Build tag index for requested keys.
3841
+ tag_index: dict[tuple[str, str], list[str]] = {}
3842
+ for s in ordered:
3843
+ tags = s.get("tags") if isinstance(s.get("tags"), dict) else {}
3844
+ for k in connect_keys:
3845
+ v = tags.get(k)
3846
+ if isinstance(v, str) and v:
3847
+ tag_index.setdefault((k, v), []).append(str(s["artifact_id"]))
3848
+
3849
+ out: list[str] = []
3850
+ for aid in seed_artifact_ids:
3851
+ if len(out) >= limit:
3852
+ break
3853
+ out.append(aid)
3854
+
3855
+ idx = idx_by_artifact.get(aid)
3856
+ if idx is not None and neighbor_hops > 0:
3857
+ for delta in range(1, neighbor_hops + 1):
3858
+ for j in (idx - delta, idx + delta):
3859
+ if 0 <= j < len(ordered):
3860
+ out.append(str(ordered[j]["artifact_id"]))
3861
+
3862
+ if connect_keys:
3863
+ s = ordered[idx] if idx is not None and 0 <= idx < len(ordered) else None
3864
+ if isinstance(s, dict):
3865
+ tags = s.get("tags") if isinstance(s.get("tags"), dict) else {}
3866
+ for k in connect_keys:
3867
+ v = tags.get(k)
3868
+ if isinstance(v, str) and v:
3869
+ out.extend(tag_index.get((k, v), []))
3870
+
3871
+ return _dedup_preserve_order(out)[:limit]
3872
+
3873
+
3874
+ def _deep_scan_span_ids(
3875
+ *,
3876
+ spans: list[dict[str, Any]],
3877
+ artifact_store: Any,
3878
+ query: str,
3879
+ limit_spans: int,
3880
+ limit_messages_per_span: int,
3881
+ ) -> list[str]:
3882
+ """Fallback keyword scan over archived messages when metadata/summary is insufficient."""
3883
+ q = str(query or "").strip().lower()
3884
+ if not q:
3885
+ return []
3886
+
3887
+ scanned = 0
3888
+ matches: list[str] = []
3889
+ for s in spans:
3890
+ if scanned >= limit_spans:
3891
+ break
3892
+ if not isinstance(s, dict):
3893
+ continue
3894
+ artifact_id = s.get("artifact_id")
3895
+ if not isinstance(artifact_id, str) or not artifact_id:
3896
+ continue
3897
+ scanned += 1
3898
+
3899
+ payload = artifact_store.load_json(artifact_id)
3900
+ if not isinstance(payload, dict):
3901
+ continue
3902
+ messages = payload.get("messages")
3903
+ if not isinstance(messages, list) or not messages:
3904
+ continue
3905
+
3906
+ for m in messages[:limit_messages_per_span]:
3907
+ if not isinstance(m, dict):
3908
+ continue
3909
+ content = m.get("content")
3910
+ if not content:
3911
+ continue
3912
+ if q in str(content).lower():
3913
+ matches.append(artifact_id)
3914
+ break
3915
+
3916
+ return _dedup_preserve_order(matches)
3917
+
3918
+
3919
+ def _render_memory_query_output(
3920
+ *,
3921
+ spans: list[dict[str, Any]],
3922
+ artifact_store: Any,
3923
+ selected_artifact_ids: list[str],
3924
+ summary_by_artifact: dict[str, str],
3925
+ max_messages: int,
3926
+ ) -> str:
3927
+ if not selected_artifact_ids:
3928
+ return "No matching memory spans."
3929
+
3930
+ span_by_id: dict[str, dict[str, Any]] = {
3931
+ str(s.get("artifact_id")): s for s in spans if isinstance(s, dict) and s.get("artifact_id")
3932
+ }
3933
+
3934
+ lines: list[str] = []
3935
+ lines.append("Recalled memory spans (provenance-preserving):")
3936
+
3937
+ remaining: Optional[int] = None if int(max_messages) == -1 else int(max_messages)
3938
+ for i, aid in enumerate(selected_artifact_ids, start=1):
3939
+ span = span_by_id.get(aid, {})
3940
+ kind = span.get("kind") or "span"
3941
+ created = span.get("created_at") or ""
3942
+ from_ts = span.get("from_timestamp") or ""
3943
+ to_ts = span.get("to_timestamp") or ""
3944
+ count = span.get("message_count") or ""
3945
+ created_by = span.get("created_by") or ""
3946
+ location = span.get("location") or ""
3947
+ tags = span.get("tags") if isinstance(span.get("tags"), dict) else {}
3948
+ tags_txt = ", ".join([f"{k}={v}" for k, v in sorted(tags.items()) if isinstance(v, str) and v])
3949
+
3950
+ lines.append("")
3951
+ lines.append(f"[{i}] span_id={aid} kind={kind} msgs={count} created_at={created}")
3952
+ if from_ts or to_ts:
3953
+ lines.append(f" time_range: {from_ts} .. {to_ts}")
3954
+ if isinstance(created_by, str) and str(created_by).strip():
3955
+ lines.append(f" created_by: {str(created_by).strip()}")
3956
+ if isinstance(location, str) and str(location).strip():
3957
+ lines.append(f" location: {str(location).strip()}")
3958
+ if tags_txt:
3959
+ lines.append(f" tags: {tags_txt}")
3960
+
3961
+ summary = summary_by_artifact.get(aid)
3962
+ if summary:
3963
+ lines.append(f" summary: {str(summary).strip()}")
3964
+
3965
+ if remaining is not None and remaining <= 0:
3966
+ continue
3967
+
3968
+ payload = artifact_store.load_json(aid)
3969
+ if not isinstance(payload, dict):
3970
+ lines.append(" (artifact payload unavailable)")
3971
+ continue
3972
+ if kind == "memory_note" or "note" in payload:
3973
+ note = str(payload.get("note") or "").strip()
3974
+ if note:
3975
+ lines.append(" note: " + note)
3976
+ else:
3977
+ lines.append(" (note payload missing note text)")
3978
+
3979
+ if not (isinstance(location, str) and location.strip()):
3980
+ loc = payload.get("location")
3981
+ if isinstance(loc, str) and loc.strip():
3982
+ lines.append(f" location: {loc.strip()}")
3983
+
3984
+ sources = payload.get("sources")
3985
+ if isinstance(sources, dict):
3986
+ src_run = sources.get("run_id")
3987
+ span_ids = sources.get("span_ids")
3988
+ msg_ids = sources.get("message_ids")
3989
+ if isinstance(src_run, str) and src_run:
3990
+ lines.append(f" sources.run_id: {src_run}")
3991
+ if isinstance(span_ids, list) and span_ids:
3992
+ cleaned = [str(x) for x in span_ids if isinstance(x, (str, int))]
3993
+ if cleaned:
3994
+ lines.append(f" sources.span_ids: {', '.join(cleaned[:12])}")
3995
+ if isinstance(msg_ids, list) and msg_ids:
3996
+ cleaned = [str(x) for x in msg_ids if isinstance(x, (str, int))]
3997
+ if cleaned:
3998
+ lines.append(f" sources.message_ids: {', '.join(cleaned[:12])}")
3999
+ continue
4000
+
4001
+ messages = payload.get("messages")
4002
+ if not isinstance(messages, list):
4003
+ lines.append(" (artifact missing messages)")
4004
+ continue
4005
+
4006
+ # Render messages with a global cap.
4007
+ rendered = 0
4008
+ for m in messages:
4009
+ if remaining is not None and remaining <= 0:
4010
+ break
4011
+ if not isinstance(m, dict):
4012
+ continue
4013
+ role = str(m.get("role") or "unknown")
4014
+ content = str(m.get("content") or "")
4015
+ ts = str(m.get("timestamp") or "")
4016
+ prefix = f" - {role}: "
4017
+ if ts:
4018
+ prefix = f" - {ts} {role}: "
4019
+ lines.append(prefix + content)
4020
+ rendered += 1
4021
+ if remaining is not None:
4022
+ remaining -= 1
4023
+
4024
+ total = sum(1 for m in messages if isinstance(m, dict))
4025
+ if remaining is not None and rendered < total:
4026
+ lines.append(f" (remaining {total - rendered} messages omitted by max_messages={int(max_messages)})")
4027
+
4028
+ return "\n".join(lines)
4029
+
724
4030
 
725
4031
  def _set_nested(target: Dict[str, Any], dotted_key: str, value: Any) -> None:
726
4032
  """Set nested dict value using dot notation."""