PyPI - AbstractRuntime - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

AbstractRuntime 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

abstractruntime/__init__.py +76 -1
abstractruntime/core/config.py +68 -1
abstractruntime/core/models.py +5 -0
abstractruntime/core/policy.py +74 -3
abstractruntime/core/runtime.py +1002 -126
abstractruntime/core/vars.py +8 -2
abstractruntime/evidence/recorder.py +1 -1
abstractruntime/history_bundle.py +772 -0
abstractruntime/integrations/abstractcore/__init__.py +3 -0
abstractruntime/integrations/abstractcore/default_tools.py +127 -3
abstractruntime/integrations/abstractcore/effect_handlers.py +2440 -99
abstractruntime/integrations/abstractcore/embeddings_client.py +69 -0
abstractruntime/integrations/abstractcore/factory.py +68 -20
abstractruntime/integrations/abstractcore/llm_client.py +447 -15
abstractruntime/integrations/abstractcore/mcp_worker.py +1 -0
abstractruntime/integrations/abstractcore/session_attachments.py +946 -0
abstractruntime/integrations/abstractcore/tool_executor.py +31 -10
abstractruntime/integrations/abstractcore/workspace_scoped_tools.py +561 -0
abstractruntime/integrations/abstractmemory/__init__.py +3 -0
abstractruntime/integrations/abstractmemory/effect_handlers.py +946 -0
abstractruntime/memory/active_context.py +6 -1
abstractruntime/memory/kg_packets.py +164 -0
abstractruntime/memory/memact_composer.py +175 -0
abstractruntime/memory/recall_levels.py +163 -0
abstractruntime/memory/token_budget.py +86 -0
abstractruntime/storage/__init__.py +4 -1
abstractruntime/storage/artifacts.py +158 -30
abstractruntime/storage/base.py +17 -1
abstractruntime/storage/commands.py +339 -0
abstractruntime/storage/in_memory.py +41 -1
abstractruntime/storage/json_files.py +195 -12
abstractruntime/storage/observable.py +38 -1
abstractruntime/storage/offloading.py +433 -0
abstractruntime/storage/sqlite.py +836 -0
abstractruntime/visualflow_compiler/__init__.py +29 -0
abstractruntime/visualflow_compiler/adapters/__init__.py +11 -0
abstractruntime/visualflow_compiler/adapters/agent_adapter.py +126 -0
abstractruntime/visualflow_compiler/adapters/context_adapter.py +109 -0
abstractruntime/visualflow_compiler/adapters/control_adapter.py +615 -0
abstractruntime/visualflow_compiler/adapters/effect_adapter.py +1051 -0
abstractruntime/visualflow_compiler/adapters/event_adapter.py +307 -0
abstractruntime/visualflow_compiler/adapters/function_adapter.py +97 -0
abstractruntime/visualflow_compiler/adapters/memact_adapter.py +114 -0
abstractruntime/visualflow_compiler/adapters/subflow_adapter.py +74 -0
abstractruntime/visualflow_compiler/adapters/variable_adapter.py +316 -0
abstractruntime/visualflow_compiler/compiler.py +3832 -0
abstractruntime/visualflow_compiler/flow.py +247 -0
abstractruntime/visualflow_compiler/visual/__init__.py +13 -0
abstractruntime/visualflow_compiler/visual/agent_ids.py +29 -0
abstractruntime/visualflow_compiler/visual/builtins.py +1376 -0
abstractruntime/visualflow_compiler/visual/code_executor.py +214 -0
abstractruntime/visualflow_compiler/visual/executor.py +2804 -0
abstractruntime/visualflow_compiler/visual/models.py +211 -0
abstractruntime/workflow_bundle/__init__.py +52 -0
abstractruntime/workflow_bundle/models.py +236 -0
abstractruntime/workflow_bundle/packer.py +317 -0
abstractruntime/workflow_bundle/reader.py +87 -0
abstractruntime/workflow_bundle/registry.py +587 -0
abstractruntime-0.4.1.dist-info/METADATA +177 -0
abstractruntime-0.4.1.dist-info/RECORD +86 -0
abstractruntime-0.4.0.dist-info/METADATA +0 -167
abstractruntime-0.4.0.dist-info/RECORD +0 -49
{abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/WHEEL +0 -0
{abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/entry_points.txt +0 -0
{abstractruntime-0.4.0.dist-info → abstractruntime-0.4.1.dist-info}/licenses/LICENSE +0 -0

abstractruntime/core/runtime.py CHANGED Viewed

@@ -18,10 +18,11 @@ We keep the design explicitly modular:
 from __future__ import annotations
-from dataclasses import dataclass
+from dataclasses import dataclass, asdict, is_dataclass
 from datetime import datetime, timezone
 from typing import Any, Callable, Dict, Optional, List
 import copy
+import hashlib
 import inspect
 import json
 import os
@@ -50,9 +51,201 @@ def utc_now_iso() -> str:
     return datetime.now(timezone.utc).isoformat()
+def _jsonable(value: Any, *, _path: Optional[set[int]] = None, _depth: int = 0) -> Any:
+    """Best-effort conversion to JSON-safe objects.
+    The ledger is persisted as JSON. Any value stored in StepRecord.result must be JSON-safe.
+    """
+    if _path is None:
+        _path = set()
+    # Avoid pathological recursion and cyclic structures.
+    if _depth > 200:
+        return "<max_depth>"
+    if value is None:
+        return None
+    if isinstance(value, (str, int, float, bool)):
+        return value
+    if isinstance(value, dict):
+        vid = id(value)
+        if vid in _path:
+            return "<cycle>"
+        _path.add(vid)
+        try:
+            return {str(k): _jsonable(v, _path=_path, _depth=_depth + 1) for k, v in value.items()}
+        finally:
+            _path.discard(vid)
+    if isinstance(value, list):
+        vid = id(value)
+        if vid in _path:
+            return "<cycle>"
+        _path.add(vid)
+        try:
+            return [_jsonable(v, _path=_path, _depth=_depth + 1) for v in value]
+        finally:
+            _path.discard(vid)
+    try:
+        if is_dataclass(value):
+            vid = id(value)
+            if vid in _path:
+                return "<cycle>"
+            _path.add(vid)
+            try:
+                return _jsonable(asdict(value), _path=_path, _depth=_depth + 1)
+            finally:
+                _path.discard(vid)
+    except Exception:
+        pass
+    try:
+        md = getattr(value, "model_dump", None)
+        if callable(md):
+            vid = id(value)
+            if vid in _path:
+                return "<cycle>"
+            _path.add(vid)
+            try:
+                return _jsonable(md(), _path=_path, _depth=_depth + 1)
+            finally:
+                _path.discard(vid)
+    except Exception:
+        pass
+    try:
+        td = getattr(value, "to_dict", None)
+        if callable(td):
+            vid = id(value)
+            if vid in _path:
+                return "<cycle>"
+            _path.add(vid)
+            try:
+                return _jsonable(td(), _path=_path, _depth=_depth + 1)
+            finally:
+                _path.discard(vid)
+    except Exception:
+        pass
+    try:
+        json.dumps(value)
+        return value
+    except Exception:
+        return str(value)
 _DEFAULT_GLOBAL_MEMORY_RUN_ID = "global_memory"
+_DEFAULT_SESSION_MEMORY_RUN_PREFIX = "session_memory_"
 _SAFE_RUN_ID_PATTERN = re.compile(r"^[a-zA-Z0-9_-]+$")
+_RUNTIME_TOOL_CALL_ID_PREFIX = "rtcall_"
+def _ensure_tool_calls_have_runtime_ids(
+    *,
+    effect: Effect,
+    idempotency_key: str,
+) -> Effect:
+    """Attach stable runtime-owned IDs to tool calls without mutating semantics.
+    - Preserves provider/model `call_id` when present (used for OpenAI transcripts).
+    - Adds `runtime_call_id` derived from the effect idempotency key + call index.
+    - Ensures each tool call has a non-empty `call_id` (falls back to runtime id).
+    - Canonicalizes allowlist ordering (`allowed_tools`) for deterministic payloads.
+    """
+    if effect.type != EffectType.TOOL_CALLS:
+        return effect
+    if not isinstance(effect.payload, dict):
+        return effect
+    payload = dict(effect.payload)
+    raw_tool_calls = payload.get("tool_calls")
+    if not isinstance(raw_tool_calls, list):
+        return effect
+    tool_calls: list[Any] = []
+    for idx, tc in enumerate(raw_tool_calls):
+        if not isinstance(tc, dict):
+            tool_calls.append(tc)
+            continue
+        tc2 = dict(tc)
+        runtime_call_id = tc2.get("runtime_call_id")
+        runtime_call_id_str = str(runtime_call_id).strip() if runtime_call_id is not None else ""
+        if not runtime_call_id_str:
+            runtime_call_id_str = f"{_RUNTIME_TOOL_CALL_ID_PREFIX}{idempotency_key}_{idx+1}"
+            tc2["runtime_call_id"] = runtime_call_id_str
+        call_id = tc2.get("call_id")
+        if call_id is None:
+            call_id = tc2.get("id")
+        call_id_str = str(call_id).strip() if call_id is not None else ""
+        if call_id_str:
+            tc2["call_id"] = call_id_str
+        else:
+            # When the model/provider didn't emit a call id (or the caller omitted it),
+            # fall back to a runtime-owned stable id so result correlation still works.
+            tc2["call_id"] = runtime_call_id_str
+        name = tc2.get("name")
+        if isinstance(name, str):
+            tc2["name"] = name.strip()
+        tool_calls.append(tc2)
+    payload["tool_calls"] = tool_calls
+    allowed_tools = payload.get("allowed_tools")
+    if isinstance(allowed_tools, list):
+        uniq = {
+            str(t).strip()
+            for t in allowed_tools
+            if isinstance(t, str) and t.strip()
+        }
+        payload["allowed_tools"] = sorted(uniq)
+    return Effect(type=effect.type, payload=payload, result_key=effect.result_key)
+def _maybe_inject_llm_call_grounding_for_ledger(*, effect: Effect) -> Effect:
+    """Inject per-call time/location grounding into LLM_CALL payloads for auditability.
+    Why:
+    - The ledger is the replay/source-of-truth for thin clients.
+    - Grounding is injected at the integration boundary (AbstractCore LLM client) so the
+      model always knows "when/where" it is.
+    - But that injection historically happened *after* the runtime recorded the LLM_CALL
+      payload, making it appear missing in ledger UIs.
+    Contract:
+    - Only mutates the effect payload (never the durable run context/messages).
+    - Must not influence idempotency keys; callers should compute idempotency before calling this.
+    """
+    if effect.type != EffectType.LLM_CALL:
+        return effect
+    if not isinstance(effect.payload, dict):
+        return effect
+    payload = dict(effect.payload)
+    prompt = payload.get("prompt")
+    messages = payload.get("messages")
+    prompt_str = str(prompt or "")
+    messages_list = messages if isinstance(messages, list) else None
+    try:
+        from abstractruntime.integrations.abstractcore.llm_client import _inject_turn_grounding
+    except Exception:
+        return effect
+    updated_prompt, updated_messages = _inject_turn_grounding(prompt=prompt_str, messages=messages_list)
+    changed = False
+    if updated_prompt != prompt_str:
+        payload["prompt"] = updated_prompt
+        changed = True
+    if messages_list is not None:
+        if updated_messages != messages_list:
+            payload["messages"] = updated_messages
+            changed = True
+    return Effect(type=effect.type, payload=payload, result_key=effect.result_key) if changed else effect
 def _ensure_runtime_namespace(vars: Dict[str, Any]) -> Dict[str, Any]:
     runtime_ns = vars.get("_runtime")
@@ -406,6 +599,7 @@ class Runtime:
             pass
         run.updated_at = utc_now_iso()
         self._run_store.save(run)
+        self._append_terminal_status_event(run)
         return run
     def pause_run(self, run_id: str, *, reason: Optional[str] = None) -> RunState:
@@ -595,10 +789,14 @@ class Runtime:
         def pct(current: int, maximum: int) -> float:
             return round(current / maximum * 100, 1) if maximum > 0 else 0
+        from .vars import DEFAULT_MAX_TOKENS
         current_iter = int(limits.get("current_iteration", 0) or 0)
         max_iter = int(limits.get("max_iterations", 25) or 25)
         tokens_used = int(limits.get("estimated_tokens_used", 0) or 0)
-        max_tokens = int(limits.get("max_tokens", 32768) or 32768)
+        max_tokens = int(limits.get("max_tokens", DEFAULT_MAX_TOKENS) or DEFAULT_MAX_TOKENS)
+        max_input_tokens = limits.get("max_input_tokens")
+        max_output_tokens = limits.get("max_output_tokens")
         return {
             "iterations": {
@@ -610,6 +808,8 @@ class Runtime:
             "tokens": {
                 "estimated_used": tokens_used,
                 "max": max_tokens,
+                "max_input_tokens": max_input_tokens,
+                "max_output_tokens": max_output_tokens,
                 "pct": pct(tokens_used, max_tokens),
                 "warning": pct(tokens_used, max_tokens) >= limits.get("warn_tokens_pct", 80),
             },
@@ -646,7 +846,9 @@ class Runtime:
         # Check tokens
         tokens_used = int(limits.get("estimated_tokens_used", 0) or 0)
-        max_tokens = int(limits.get("max_tokens", 32768) or 32768)
+        from .vars import DEFAULT_MAX_TOKENS
+        max_tokens = int(limits.get("max_tokens", DEFAULT_MAX_TOKENS) or DEFAULT_MAX_TOKENS)
         warn_tokens_pct = int(limits.get("warn_tokens_pct", 80) or 80)
         if max_tokens > 0 and tokens_used > 0:
@@ -677,6 +879,7 @@ class Runtime:
             "max_iterations",
             "max_tokens",
             "max_output_tokens",
+            "max_input_tokens",
             "max_history_messages",
             "warn_iterations_pct",
             "warn_tokens_pct",
@@ -690,6 +893,35 @@ class Runtime:
         self._run_store.save(run)
+    def _append_terminal_status_event(self, run: RunState) -> None:
+        """Best-effort: append a durable `abstract.status` event on terminal runs.
+        This exists for UI clients that rely on `emit_event` records (e.g. status bars)
+        and should not be required for correctness. Failures must be non-fatal.
+        """
+        try:
+            status = getattr(getattr(run, "status", None), "value", None) or str(getattr(run, "status", "") or "")
+            status_str = str(status or "").strip().lower()
+            if status_str not in {RunStatus.COMPLETED.value, RunStatus.FAILED.value, RunStatus.CANCELLED.value}:
+                return
+            node_id = str(getattr(run, "current_node", None) or "").strip() or "runtime"
+            eff = Effect(
+                type=EffectType.EMIT_EVENT,
+                payload={"name": "abstract.status", "scope": "session", "payload": {"text": status_str}},
+            )
+            rec = StepRecord.start(
+                run=run,
+                node_id=node_id,
+                effect=eff,
+                idempotency_key=f"system:terminal_status:{status_str}",
+            )
+            rec.finish_success({"emitted": True, "name": "abstract.status", "payload": {"text": status_str}})
+            self._ledger_store.append(rec)
+        except Exception:
+            # Observability must never compromise durability/execution.
+            return
     def tick(self, *, workflow: WorkflowSpec, run_id: str, max_steps: int = 100) -> RunState:
         run = self.get_state(run_id)
         # Terminal runs never progress.
@@ -748,9 +980,10 @@ class Runtime:
                 # ledger: completion record (no effect)
                 rec = StepRecord.start(run=run, node_id=plan.node_id, effect=None)
                 rec.status = StepStatus.COMPLETED
-                rec.result = {"completed": True}
+                rec.result = {"completed": True, "output": _jsonable(run.output)}
                 rec.ended_at = utc_now_iso()
                 self._ledger_store.append(rec)
+                self._append_terminal_status_event(run)
                 return run
             # Pure transition
@@ -766,9 +999,11 @@ class Runtime:
                 continue
             # Effectful step - check for prior completed result (idempotency)
+            effect = plan.effect
             idempotency_key = self._effect_policy.idempotency_key(
-                run=run, node_id=plan.node_id, effect=plan.effect
+                run=run, node_id=plan.node_id, effect=effect
             )
+            effect = _ensure_tool_calls_have_runtime_ids(effect=effect, idempotency_key=idempotency_key)
             prior_result = self._find_prior_completed_result(run.run_id, idempotency_key)
             reused_prior_result = prior_result is not None
@@ -782,11 +1017,15 @@ class Runtime:
                 # Reuse prior result - skip re-execution
                 outcome = EffectOutcome.completed(prior_result)
             else:
+                # For LLM calls, inject runtime grounding into the effect payload so ledger consumers
+                # can see exactly what the model was sent (timestamp + country), without mutating the
+                # durable run context.
+                effect = _maybe_inject_llm_call_grounding_for_ledger(effect=effect)
                 # Execute with retry logic
                 outcome = self._execute_effect_with_retry(
                     run=run,
                     node_id=plan.node_id,
-                    effect=plan.effect,
+                    effect=effect,
                     idempotency_key=idempotency_key,
                     default_next_node=plan.next_node,
                 )
@@ -800,13 +1039,13 @@ class Runtime:
             try:
                 if (
                     not reused_prior_result
-                    and plan.effect.type == EffectType.TOOL_CALLS
+                    and effect.type == EffectType.TOOL_CALLS
                     and outcome.status == "completed"
                 ):
                     self._maybe_record_tool_evidence(
                         run=run,
                         node_id=plan.node_id,
-                        effect=plan.effect,
+                        effect=effect,
                         tool_results=outcome.result,
                     )
             except Exception:
@@ -816,13 +1055,36 @@ class Runtime:
             _record_node_trace(
                 run=run,
                 node_id=plan.node_id,
-                effect=plan.effect,
+                effect=effect,
                 outcome=outcome,
                 idempotency_key=idempotency_key,
                 reused_prior_result=reused_prior_result,
                 duration_ms=duration_ms,
             )
+            # Best-effort token observability: surface last-known input token usage in `_limits`.
+            #
+            # AbstractCore responses generally populate `usage` (prompt/input/output/total tokens).
+            # We store the input-side usage as `estimated_tokens_used` so host UIs and workflows
+            # can reason about compaction budgets without re-tokenizing.
+            try:
+                if effect.type == EffectType.LLM_CALL and outcome.status == "completed" and isinstance(outcome.result, dict):
+                    usage = outcome.result.get("usage")
+                    if isinstance(usage, dict):
+                        raw_in = usage.get("input_tokens")
+                        if raw_in is None:
+                            raw_in = usage.get("prompt_tokens")
+                        if raw_in is None:
+                            raw_in = usage.get("total_tokens")
+                        if raw_in is not None and not isinstance(raw_in, bool):
+                            limits = run.vars.get("_limits")
+                            if not isinstance(limits, dict):
+                                limits = {}
+                                run.vars["_limits"] = limits
+                            limits["estimated_tokens_used"] = int(raw_in)
+            except Exception:
+                pass
             if outcome.status == "failed":
                 controlled = _abort_if_externally_controlled()
                 if controlled is not None:
@@ -831,6 +1093,7 @@ class Runtime:
                 run.error = outcome.error or "unknown error"
                 run.updated_at = utc_now_iso()
                 self._run_store.save(run)
+                self._append_terminal_status_event(run)
                 return run
             if outcome.status == "waiting":
@@ -845,8 +1108,8 @@ class Runtime:
                 return run
             # completed
-            if plan.effect.result_key and outcome.result is not None:
-                _set_nested(run.vars, plan.effect.result_key, outcome.result)
+            if effect.result_key and outcome.result is not None:
+                _set_nested(run.vars, effect.result_key, outcome.result)
             # Terminal effect node: treat missing next_node as completion.
             #
@@ -862,6 +1125,7 @@ class Runtime:
                 run.output = {"success": True, "result": outcome.result}
                 run.updated_at = utc_now_iso()
                 self._run_store.save(run)
+                self._append_terminal_status_event(run)
                 return run
             controlled = _abort_if_externally_controlled()
             if controlled is not None:
@@ -942,50 +1206,113 @@ class Runtime:
         stored_payload: Dict[str, Any] = payload
         if result_key:
-            # Tool waits may carry blocked-by-allowlist metadata. External hosts typically only execute
-            # the filtered subset of tool calls and resume with results for those calls. To keep agent
-            # semantics correct (and evidence indices aligned), merge blocked entries back into the
-            # resumed payload deterministically.
-            merged_payload: Dict[str, Any] = payload
-            try:
-                details = run.waiting.details if run.waiting is not None else None
-                if isinstance(details, dict):
-                    blocked = details.get("blocked_by_index")
-                    original_count = details.get("original_call_count")
-                    results = payload.get("results") if isinstance(payload, dict) else None
-                    if (
-                        isinstance(blocked, dict)
-                        and isinstance(original_count, int)
-                        and original_count > 0
-                        and isinstance(results, list)
-                        and len(results) != original_count
-                    ):
-                        merged_results: list[Any] = []
-                        executed_iter = iter(results)
-                        for idx in range(original_count):
-                            blocked_entry = blocked.get(str(idx))
-                            if isinstance(blocked_entry, dict):
-                                merged_results.append(blocked_entry)
-                                continue
-                            try:
-                                merged_results.append(next(executed_iter))
-                            except StopIteration:
-                                merged_results.append(
-                                    {
-                                        "call_id": "",
-                                        "name": "",
-                                        "success": False,
-                                        "output": None,
-                                        "error": "Missing tool result",
-                                    }
-                                )
-                        merged_payload = dict(payload)
-                        merged_payload["results"] = merged_results
-                        merged_payload.setdefault("mode", "executed")
-            except Exception:
+            details = run.waiting.details if run.waiting is not None else None
+            # Special case: subworkflow completion resumed as a tool-style observation.
+            if (
+                run.waiting.reason == WaitReason.SUBWORKFLOW
+                and isinstance(details, dict)
+                and bool(details.get("wrap_as_tool_result", False))
+                and isinstance(payload, dict)
+                and not ("mode" in payload and "results" in payload)
+            ):
+                tool_name = str(details.get("tool_name") or "start_subworkflow").strip() or "start_subworkflow"
+                call_id = str(details.get("call_id") or "subworkflow").strip() or "subworkflow"
+                sub_run_id = str(payload.get("sub_run_id") or details.get("sub_run_id") or "").strip()
+                child_output = payload.get("output")
+                answer = ""
+                report = ""
+                err = None
+                success = True
+                if isinstance(child_output, dict):
+                    # Generic failure envelope support (VisualFlow style).
+                    if child_output.get("success") is False:
+                        success = False
+                        err = str(child_output.get("error") or "Subworkflow failed")
+                    a = child_output.get("answer")
+                    if isinstance(a, str) and a.strip():
+                        answer = a.strip()
+                    r = child_output.get("report")
+                    if isinstance(r, str) and r.strip():
+                        report = r.strip()
+                if not answer:
+                    if isinstance(child_output, str) and child_output.strip():
+                        answer = child_output.strip()
+                    else:
+                        try:
+                            answer = json.dumps(child_output, ensure_ascii=False)
+                        except Exception:
+                            answer = "" if child_output is None else str(child_output)
+                tool_output: Dict[str, Any] = {"rendered": answer, "answer": answer, "sub_run_id": sub_run_id}
+                if report and len(report) <= 4000:
+                    tool_output["report"] = report
+                merged_payload: Dict[str, Any] = {
+                    "mode": "executed",
+                    "results": [
+                        {
+                            "call_id": call_id,
+                            "name": tool_name,
+                            "success": bool(success),
+                            "output": tool_output if success else None,
+                            "error": None if success else err,
+                        }
+                    ],
+                }
+            else:
+                # Tool waits may carry blocked-by-allowlist metadata. External hosts typically only execute
+                # the filtered subset of tool calls and resume with results for those calls. To keep agent
+                # semantics correct (and evidence indices aligned), merge blocked entries back into the
+                # resumed payload deterministically.
                 merged_payload = payload
+                try:
+                    if isinstance(details, dict):
+                        blocked = details.get("blocked_by_index")
+                        pre_results = details.get("pre_results_by_index")
+                        original_count = details.get("original_call_count")
+                        results = payload.get("results") if isinstance(payload, dict) else None
+                        fixed_by_index: Dict[str, Any] = {}
+                        if isinstance(blocked, dict):
+                            fixed_by_index.update(blocked)
+                        if isinstance(pre_results, dict):
+                            fixed_by_index.update(pre_results)
+                        if (
+                            fixed_by_index
+                            and isinstance(original_count, int)
+                            and original_count > 0
+                            and isinstance(results, list)
+                            and len(results) != original_count
+                        ):
+                            merged_results: list[Any] = []
+                            executed_iter = iter(results)
+                            for idx in range(original_count):
+                                fixed_entry = fixed_by_index.get(str(idx))
+                                if isinstance(fixed_entry, dict):
+                                    merged_results.append(fixed_entry)
+                                    continue
+                                try:
+                                    merged_results.append(next(executed_iter))
+                                except StopIteration:
+                                    merged_results.append(
+                                        {
+                                            "call_id": "",
+                                            "runtime_call_id": None,
+                                            "name": "",
+                                            "success": False,
+                                            "output": None,
+                                            "error": "Missing tool result",
+                                        }
+                                    )
+                            merged_payload = dict(payload)
+                            merged_payload["results"] = merged_results
+                            merged_payload.setdefault("mode", "executed")
+                except Exception:
+                    merged_payload = payload
             _set_nested(run.vars, result_key, merged_payload)
             stored_payload = merged_payload
@@ -1014,15 +1341,107 @@ class Runtime:
             except Exception:
                 pass
+        # Append a durable "resume" record to the ledger for replay-first clients.
+        #
+        # Why:
+        # - The ledger is the source-of-truth for replay/streaming (ADR-0011/0018).
+        # - Without a resume record, user input payloads (ASK_USER / abstract.ask / tool approvals)
+        #   only live in RunState.vars and are not visible during ledger-only replay.
+        #
+        # This is best-effort: failure to append must not compromise correctness.
+        try:
+            wait_before = run.waiting
+            wait_reason_value = None
+            wait_key_value = None
+            try:
+                if wait_before is not None:
+                    r0 = getattr(wait_before, "reason", None)
+                    wait_reason_value = r0.value if hasattr(r0, "value") else str(r0) if r0 is not None else None
+                    wait_key_value = getattr(wait_before, "wait_key", None)
+            except Exception:
+                wait_reason_value = None
+                wait_key_value = None
+            payload_for_ledger: Any = stored_payload
+            try:
+                from ..storage.offloading import _default_max_inline_bytes, offload_large_values
+                if self._artifact_store is not None:
+                    payload_for_ledger = offload_large_values(
+                        stored_payload,
+                        artifact_store=self._artifact_store,
+                        run_id=str(run.run_id or ""),
+                        max_inline_bytes=_default_max_inline_bytes(),
+                        base_tags={"source": "resume", "kind": "resume_payload"},
+                        root_path="resume.payload",
+                        allow_root_replace=False,
+                    )
+            except Exception:
+                payload_for_ledger = stored_payload
+            node_id0 = str(getattr(run, "current_node", None) or "")
+            rec = StepRecord.start(run=run, node_id=node_id0 or "unknown", effect=None)
+            rec.status = StepStatus.COMPLETED
+            rec.effect = {
+                "type": "resume",
+                "payload": {
+                    "wait_reason": wait_reason_value,
+                    "wait_key": wait_key_value,
+                    "resume_to_node": resume_to,
+                    "result_key": result_key,
+                    "payload": payload_for_ledger,
+                },
+                "result_key": None,
+            }
+            rec.result = {"resumed": True}
+            rec.ended_at = utc_now_iso()
+            self._ledger_store.append(rec)
+        except Exception:
+            pass
         # Terminal waiting node: if there is no resume target, treat the resume payload as
         # the final output instead of re-executing the waiting node again (which would
         # otherwise create an infinite wait/resume loop).
         if resume_to is None:
+            # Capture the wait context for observability before clearing it.
+            wait_before = run.waiting
+            wait_reason = None
+            wait_key0 = None
+            try:
+                if wait_before is not None:
+                    r0 = getattr(wait_before, "reason", None)
+                    wait_reason = r0.value if hasattr(r0, "value") else str(r0) if r0 is not None else None
+                    wait_key0 = getattr(wait_before, "wait_key", None)
+            except Exception:
+                wait_reason = None
+                wait_key0 = None
             run.status = RunStatus.COMPLETED
             run.waiting = None
             run.output = {"success": True, "result": stored_payload}
             run.updated_at = utc_now_iso()
             self._run_store.save(run)
+            # Ledger must remain the source-of-truth for replay/streaming.
+            # When a terminal wait is resumed, there is no follow-up `tick()` to append a
+            # completion record, so we append one here.
+            try:
+                node_id0 = str(getattr(run, "current_node", None) or "")
+                rec = StepRecord.start(run=run, node_id=node_id0 or "unknown", effect=None)
+                rec.status = StepStatus.COMPLETED
+                rec.result = {
+                    "completed": True,
+                    "via": "resume",
+                    "wait_reason": wait_reason,
+                    "wait_key": wait_key0,
+                    "output": _jsonable(run.output),
+                }
+                rec.ended_at = utc_now_iso()
+                self._ledger_store.append(rec)
+            except Exception:
+                # Observability must never compromise durability/execution.
+                pass
+            self._append_terminal_status_event(run)
             return run
         self._apply_resume_payload(run, payload=payload, override_node=resume_to)
@@ -1095,8 +1514,57 @@ class Runtime:
         self._run_store.save(run)
         return run
-    def _resolve_session_root_run(self, run: RunState) -> RunState:
-        """Resolve the root run of the current run-tree (walk `parent_run_id`)."""
+    def _session_memory_run_id(self, session_id: str) -> str:
+        """Return a stable session memory run id for a durable `session_id`.
+        This run is internal and is used only as the owner for `scope="session"` span indices.
+        """
+        sid = str(session_id or "").strip()
+        if not sid:
+            raise ValueError("session_id is required")
+        if _SAFE_RUN_ID_PATTERN.match(sid):
+            rid = f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}{sid}"
+            if _SAFE_RUN_ID_PATTERN.match(rid):
+                return rid
+        digest = hashlib.sha256(sid.encode("utf-8")).hexdigest()[:32]
+        return f"{_DEFAULT_SESSION_MEMORY_RUN_PREFIX}sha_{digest}"
+    def _ensure_session_memory_run(self, session_id: str) -> RunState:
+        """Load or create the session memory run used as the owner for `scope=\"session\"` spans."""
+        rid = self._session_memory_run_id(session_id)
+        existing = self._run_store.load(rid)
+        if existing is not None:
+            return existing
+        run = RunState(
+            run_id=rid,
+            workflow_id="__session_memory__",
+            status=RunStatus.COMPLETED,
+            current_node="done",
+            vars={
+                "context": {"task": "", "messages": []},
+                "scratchpad": {},
+                "_runtime": {"memory_spans": []},
+                "_temp": {},
+                "_limits": {},
+            },
+            waiting=None,
+            output={"messages": []},
+            error=None,
+            created_at=utc_now_iso(),
+            updated_at=utc_now_iso(),
+            actor_id=None,
+            session_id=str(session_id or "").strip() or None,
+            parent_run_id=None,
+        )
+        self._run_store.save(run)
+        return run
+    def _resolve_run_tree_root_run(self, run: RunState) -> RunState:
+        """Resolve the root run of the current run-tree (walk `parent_run_id`).
+        This is used as a backward-compatible fallback for legacy runs without `session_id`.
+        """
         cur = run
         seen: set[str] = set()
         while True:
@@ -1118,7 +1586,10 @@ class Runtime:
         if s == "run":
             return base_run
         if s == "session":
-            return self._resolve_session_root_run(base_run)
+            sid = getattr(base_run, "session_id", None)
+            if isinstance(sid, str) and sid.strip():
+                return self._ensure_session_memory_run(sid.strip())
+            return self._resolve_run_tree_root_run(base_run)
         if s == "global":
             return self._ensure_global_memory_run()
         raise ValueError(f"Unknown memory scope: {scope}")
@@ -1377,12 +1848,6 @@ class Runtime:
         except Exception:
             wildcard_wait_key = None
-        if self._workflow_registry is None:
-            return EffectOutcome.failed(
-                "emit_event requires a workflow_registry to resume target runs. "
-                "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)."
-            )
         if not isinstance(self._run_store, QueryableRunStore):
             return EffectOutcome.failed(
                 "emit_event requires a QueryableRunStore to find waiting runs. "
@@ -1415,6 +1880,11 @@ class Runtime:
         available_in_session: list[str] = []
         prefix = f"evt:session:{session_id}:"
+        # First pass: find matching runs and compute best-effort diagnostics without
+        # requiring a workflow_registry. This allows UI-only EMIT_EVENT usage
+        # (e.g. AbstractCode notifications) in deployments that do not use
+        # WAIT_EVENT listeners.
+        matched: list[tuple[RunState, Optional[str]]] = []
         for r in candidates:
             if _is_paused_run_vars(getattr(r, "vars", None)):
                 continue
@@ -1430,6 +1900,31 @@ class Runtime:
             if wk != wait_key and (wildcard_wait_key is None or wk != wildcard_wait_key):
                 continue
+            matched.append((r, wk if isinstance(wk, str) else None))
+        # If there are no matching listeners, emitting is still a useful side effect
+        # for hosts (ledger observability, UI events). In that case, do not require
+        # a workflow_registry.
+        if not matched:
+            out0: Dict[str, Any] = {
+                "wait_key": wait_key,
+                "name": str(name),
+                "scope": str(scope or "session"),
+                "delivered": 0,
+                "delivered_to": [],
+                "resumed": [],
+            }
+            if available_in_session:
+                out0["available_listeners_in_session"] = available_in_session
+            return EffectOutcome.completed(out0)
+        if self._workflow_registry is None:
+            return EffectOutcome.failed(
+                "emit_event requires a workflow_registry to resume target runs. "
+                "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)."
+            )
+        for r, wk in matched:
             wf = self._workflow_registry.get(r.workflow_id)
             if wf is None:
                 # Can't resume without the spec; skip but include diagnostic in result.
@@ -1516,7 +2011,15 @@ class Runtime:
             message = effect.payload.get("text") or effect.payload.get("content")
         if message is None:
             return EffectOutcome.failed("answer_user requires payload.message")
-        return EffectOutcome.completed({"message": str(message)})
+        level_raw = effect.payload.get("level")
+        level = str(level_raw).strip().lower() if isinstance(level_raw, str) else ""
+        if level in {"warn"}:
+            level = "warning"
+        if level not in {"message", "warning", "error", "info"}:
+            level = "message"
+        if level == "info":
+            level = "message"
+        return EffectOutcome.completed({"message": str(message), "level": level})
     def _handle_start_subworkflow(
         self, run: RunState, effect: Effect, default_next_node: Optional[str]
@@ -1537,11 +2040,77 @@ class Runtime:
             - Starts the subworkflow and returns immediately
             - Returns {"sub_run_id": "..."} so parent can track it
         """
+        payload0 = effect.payload if isinstance(effect.payload, dict) else {}
+        wrap_as_tool_result = bool(payload0.get("wrap_as_tool_result", False))
+        tool_name_raw = payload0.get("tool_name")
+        if tool_name_raw is None:
+            tool_name_raw = payload0.get("toolName")
+        tool_name = str(tool_name_raw or "").strip()
+        call_id_raw = payload0.get("call_id")
+        if call_id_raw is None:
+            call_id_raw = payload0.get("callId")
+        call_id = str(call_id_raw or "").strip()
+        def _tool_result(*, success: bool, output: Any, error: Optional[str]) -> Dict[str, Any]:
+            name = tool_name or "start_subworkflow"
+            cid = call_id or "subworkflow"
+            return {
+                "mode": "executed",
+                "results": [
+                    {
+                        "call_id": cid,
+                        "name": name,
+                        "success": bool(success),
+                        "output": output if success else None,
+                        "error": None if success else str(error or "Subworkflow failed"),
+                    }
+                ],
+            }
+        def _tool_output_for_subworkflow(*, sub_run_id: str, output: Any) -> Dict[str, Any]:
+            rendered = ""
+            answer = ""
+            report = ""
+            if isinstance(output, dict):
+                a = output.get("answer")
+                if isinstance(a, str) and a.strip():
+                    answer = a.strip()
+                r = output.get("report")
+                if isinstance(r, str) and r.strip():
+                    report = r.strip()
+            if not answer:
+                if isinstance(output, str) and output.strip():
+                    answer = output.strip()
+                else:
+                    try:
+                        answer = json.dumps(output, ensure_ascii=False)
+                    except Exception:
+                        answer = str(output)
+            rendered = answer
+            out = {"rendered": rendered, "answer": answer, "sub_run_id": str(sub_run_id)}
+            # Keep the tool observation bounded; the full child run can be inspected via run id if needed.
+            if report and len(report) <= 4000:
+                out["report"] = report
+            return out
         workflow_id = effect.payload.get("workflow_id")
         if not workflow_id:
+            if wrap_as_tool_result:
+                return EffectOutcome.completed(_tool_result(success=False, output=None, error="start_subworkflow requires payload.workflow_id"))
             return EffectOutcome.failed("start_subworkflow requires payload.workflow_id")
         if self._workflow_registry is None:
+            if wrap_as_tool_result:
+                return EffectOutcome.completed(
+                    _tool_result(
+                        success=False,
+                        output=None,
+                        error=(
+                            "start_subworkflow requires a workflow_registry. "
+                            "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)"
+                        ),
+                    )
+                )
             return EffectOutcome.failed(
                 "start_subworkflow requires a workflow_registry. "
                 "Set it via Runtime(workflow_registry=...) or runtime.set_workflow_registry(...)"
@@ -1550,20 +2119,59 @@ class Runtime:
         # Look up the subworkflow
         sub_workflow = self._workflow_registry.get(workflow_id)
         if sub_workflow is None:
+            if wrap_as_tool_result:
+                return EffectOutcome.completed(
+                    _tool_result(success=False, output=None, error=f"Workflow '{workflow_id}' not found in registry")
+                )
             return EffectOutcome.failed(f"Workflow '{workflow_id}' not found in registry")
-        sub_vars = effect.payload.get("vars") or {}
+        sub_vars_raw = effect.payload.get("vars")
+        sub_vars: Dict[str, Any] = dict(sub_vars_raw) if isinstance(sub_vars_raw, dict) else {}
+        # Inherit workspace policy into child runs by default.
+        #
+        # Why: in VisualFlow, agents/subflows run as START_SUBWORKFLOW runs. Tool execution inside the
+        # child must respect the same workspace scope the user configured for the parent run.
+        #
+        # Policy: only inherit when the child did not explicitly override the keys.
+        try:
+            parent_vars = run.vars if isinstance(getattr(run, "vars", None), dict) else {}
+            for k in ("workspace_root", "workspace_access_mode", "workspace_allowed_paths", "workspace_ignored_paths"):
+                if k in sub_vars:
+                    continue
+                v = parent_vars.get(k)
+                if v is None:
+                    continue
+                if isinstance(v, str):
+                    if not v.strip():
+                        continue
+                    sub_vars[k] = v
+                    continue
+                sub_vars[k] = v
+        except Exception:
+            pass
         is_async = bool(effect.payload.get("async", False))
         wait_for_completion = bool(effect.payload.get("wait", False))
         include_traces = bool(effect.payload.get("include_traces", False))
         resume_to = effect.payload.get("resume_to_node") or default_next_node
+        # Optional override: allow the caller (e.g. VisualFlow compiler) to pass an explicit
+        # session_id for the child run. When omitted, children inherit the parent's session.
+        session_override = effect.payload.get("session_id")
+        if session_override is None:
+            session_override = effect.payload.get("sessionId")
+        session_id: Optional[str]
+        if isinstance(session_override, str) and session_override.strip():
+            session_id = session_override.strip()
+        else:
+            session_id = getattr(run, "session_id", None)
         # Start the subworkflow with parent tracking
         sub_run_id = self.start(
             workflow=sub_workflow,
             vars=sub_vars,
             actor_id=run.actor_id,  # Inherit actor from parent
-            session_id=getattr(run, "session_id", None),  # Inherit session from parent
+            session_id=session_id,
             parent_run_id=run.run_id,  # Track parent for hierarchy
         )
@@ -1586,11 +2194,25 @@ class Runtime:
                         "sub_run_id": sub_run_id,
                         "sub_workflow_id": workflow_id,
                         "async": True,
+                        "include_traces": include_traces,
                     },
                 )
+                if wrap_as_tool_result:
+                    if isinstance(wait.details, dict):
+                        wait.details["wrap_as_tool_result"] = True
+                        wait.details["tool_name"] = tool_name or "start_subworkflow"
+                        wait.details["call_id"] = call_id or "subworkflow"
                 return EffectOutcome.waiting(wait)
             # Fire-and-forget: caller is responsible for driving/observing the child.
+            if wrap_as_tool_result:
+                return EffectOutcome.completed(
+                    _tool_result(
+                        success=True,
+                        output={"rendered": f"Started subworkflow {sub_run_id}", "sub_run_id": sub_run_id, "async": True},
+                        error=None,
+                    )
+                )
             return EffectOutcome.completed({"sub_run_id": sub_run_id, "async": True})
         # Sync mode: run the subworkflow until completion or waiting
@@ -1602,6 +2224,14 @@ class Runtime:
         if sub_state.status == RunStatus.COMPLETED:
             # Subworkflow completed - return its output
+            if wrap_as_tool_result:
+                return EffectOutcome.completed(
+                    _tool_result(
+                        success=True,
+                        output=_tool_output_for_subworkflow(sub_run_id=sub_run_id, output=sub_state.output),
+                        error=None,
+                    )
+                )
             result: Dict[str, Any] = {"sub_run_id": sub_run_id, "output": sub_state.output}
             if include_traces:
                 result["node_traces"] = self.get_node_traces(sub_run_id)
@@ -1609,9 +2239,15 @@ class Runtime:
         if sub_state.status == RunStatus.FAILED:
             # Subworkflow failed - propagate error
-            return EffectOutcome.failed(
-                f"Subworkflow '{workflow_id}' failed: {sub_state.error}"
-            )
+            if wrap_as_tool_result:
+                return EffectOutcome.completed(
+                    _tool_result(
+                        success=False,
+                        output=None,
+                        error=f"Subworkflow '{workflow_id}' failed: {sub_state.error}",
+                    )
+                )
+            return EffectOutcome.failed(f"Subworkflow '{workflow_id}' failed: {sub_state.error}")
         if sub_state.status == RunStatus.WAITING:
             # Subworkflow is waiting - parent must also wait
@@ -1623,12 +2259,18 @@ class Runtime:
                 details={
                     "sub_run_id": sub_run_id,
                     "sub_workflow_id": workflow_id,
+                    "include_traces": include_traces,
                     "sub_waiting": {
                         "reason": sub_state.waiting.reason.value if sub_state.waiting else None,
                         "wait_key": sub_state.waiting.wait_key if sub_state.waiting else None,
                     },
                 },
             )
+            if wrap_as_tool_result:
+                if isinstance(wait.details, dict):
+                    wait.details["wrap_as_tool_result"] = True
+                    wait.details["tool_name"] = tool_name or "start_subworkflow"
+                    wait.details["call_id"] = call_id or "subworkflow"
             return EffectOutcome.waiting(wait)
         # Unexpected status
@@ -1680,7 +2322,21 @@ class Runtime:
         tool_name = str(payload.get("tool_name") or "recall_memory")
         call_id = str(payload.get("call_id") or "memory")
-        # Scope routing (run-tree/global). Scope affects which run owns the span index queried.
+        # Recall effort policy (optional; no silent fallback).
+        recall_level_raw = payload.get("recall_level")
+        if recall_level_raw is None:
+            recall_level_raw = payload.get("recallLevel")
+        try:
+            from ..memory.recall_levels import parse_recall_level, policy_for
+            recall_level = parse_recall_level(recall_level_raw)
+        except Exception as e:
+            return EffectOutcome.failed(str(e))
+        recall_warnings: list[str] = []
+        recall_effort: dict[str, Any] = {}
+        # Scope routing (run/session/global). Scope affects which run owns the span index queried.
         scope = str(payload.get("scope") or "run").strip().lower() or "run"
         if scope not in {"run", "session", "global", "all"}:
             return EffectOutcome.failed(f"Unknown memory_query scope: {scope}")
@@ -1753,6 +2409,7 @@ class Runtime:
             authors = _norm_str_list(payload.get("users"))
         locations = _norm_str_list(payload.get("locations") if "locations" in payload else payload.get("location"))
+        limit_spans_provided = "limit_spans" in payload
         try:
             limit_spans = int(payload.get("limit_spans", 5) or 5)
         except Exception:
@@ -1760,12 +2417,14 @@ class Runtime:
         if limit_spans < 1:
             limit_spans = 1
+        deep_provided = "deep" in payload
         deep = payload.get("deep")
         if deep is None:
             deep_enabled = bool(query_text)
         else:
             deep_enabled = bool(deep)
+        deep_limit_spans_provided = "deep_limit_spans" in payload
         try:
             deep_limit_spans = int(payload.get("deep_limit_spans", 50) or 50)
         except Exception:
@@ -1773,6 +2432,7 @@ class Runtime:
         if deep_limit_spans < 1:
             deep_limit_spans = 1
+        deep_limit_messages_provided = "deep_limit_messages_per_span" in payload
         try:
             deep_limit_messages_per_span = int(payload.get("deep_limit_messages_per_span", 400) or 400)
         except Exception:
@@ -1780,6 +2440,7 @@ class Runtime:
         if deep_limit_messages_per_span < 1:
             deep_limit_messages_per_span = 1
+        connected_provided = "connected" in payload
         connected = bool(payload.get("connected", False))
         try:
             neighbor_hops = int(payload.get("neighbor_hops", 1) or 1)
@@ -1794,6 +2455,7 @@ class Runtime:
         else:
             connect_keys = ["topic", "person"]
+        max_messages_provided = "max_messages" in payload
         try:
             max_messages = int(payload.get("max_messages", -1) or -1)
         except Exception:
@@ -1804,6 +2466,79 @@ class Runtime:
         if max_messages != -1 and max_messages < 1:
             max_messages = 1
+        # Apply recall_level budgets when explicitly provided (no silent downgrade).
+        if recall_level is not None:
+            pol = policy_for(recall_level)
+            if not limit_spans_provided:
+                limit_spans = pol.span.limit_spans_default
+            if limit_spans > pol.span.limit_spans_max:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: clamped limit_spans from {limit_spans} to {pol.span.limit_spans_max}"
+                )
+                limit_spans = pol.span.limit_spans_max
+            if deep_enabled and not pol.span.deep_allowed:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: deep scan disabled (not allowed at this level)"
+                )
+                deep_enabled = False
+            if deep_enabled and not deep_limit_spans_provided:
+                deep_limit_spans = min(deep_limit_spans, pol.span.deep_limit_spans_max)
+            if deep_limit_spans > pol.span.deep_limit_spans_max:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: clamped deep_limit_spans from {deep_limit_spans} to {pol.span.deep_limit_spans_max}"
+                )
+                deep_limit_spans = pol.span.deep_limit_spans_max
+            if deep_enabled and not deep_limit_messages_provided:
+                deep_limit_messages_per_span = min(deep_limit_messages_per_span, pol.span.deep_limit_messages_per_span_max)
+            if deep_limit_messages_per_span > pol.span.deep_limit_messages_per_span_max:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: clamped deep_limit_messages_per_span from {deep_limit_messages_per_span} to {pol.span.deep_limit_messages_per_span_max}"
+                )
+                deep_limit_messages_per_span = pol.span.deep_limit_messages_per_span_max
+            if connected and not pol.span.connected_allowed:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: connected expansion disabled (not allowed at this level)"
+                )
+                connected = False
+            if neighbor_hops > pol.span.neighbor_hops_max:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: clamped neighbor_hops from {neighbor_hops} to {pol.span.neighbor_hops_max}"
+                )
+                neighbor_hops = pol.span.neighbor_hops_max
+            # Enforce bounded rendering budget (max_messages). -1 means "unbounded" and is not allowed when policy is active.
+            if not max_messages_provided:
+                max_messages = pol.span.max_messages_default
+            elif max_messages == -1:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: max_messages=-1 (unbounded) is not allowed; clamped to {pol.span.max_messages_max}"
+                )
+                max_messages = pol.span.max_messages_max
+            elif max_messages > pol.span.max_messages_max:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: clamped max_messages from {max_messages} to {pol.span.max_messages_max}"
+                )
+                max_messages = pol.span.max_messages_max
+            recall_effort = {
+                "recall_level": recall_level.value,
+                "applied": {
+                    "limit_spans": limit_spans,
+                    "deep": bool(deep_enabled),
+                    "deep_limit_spans": deep_limit_spans,
+                    "deep_limit_messages_per_span": deep_limit_messages_per_span,
+                    "connected": bool(connected),
+                    "neighbor_hops": neighbor_hops,
+                    "max_messages": max_messages,
+                },
+            }
         from ..memory.active_context import ActiveContextPolicy, TimeRange
         # Select run(s) to query.
@@ -1984,6 +2719,17 @@ class Runtime:
             meta = {"matches": matches, "span_ids": list(all_selected)}
+        # Attach recall policy transparency (warnings + applied budgets).
+        if recall_level is not None:
+            if return_mode in {"meta", "both"}:
+                if recall_effort:
+                    meta["effort"] = recall_effort
+                if recall_warnings:
+                    meta["warnings"] = list(recall_warnings)
+            if return_mode in {"rendered", "both"} and recall_warnings:
+                warnings_block = "\n".join([f"- {w}" for w in recall_warnings if str(w).strip()])
+                rendered_text = f"[recall warnings]\n{warnings_block}\n\n{rendered_text}".strip()
         result = {
             "mode": "executed",
             "results": [
@@ -2106,33 +2852,38 @@ class Runtime:
         Payload (required unless stated):
           - span_id: str | int   (artifact_id or 1-based index into `_runtime.memory_spans`)
+          - scope: str           (optional, default "run")  "run" | "session" | "global" | "all"
           - tags: dict[str,str]  (merged into span["tags"] by default)
           - merge: bool          (optional, default True; when False, replaces span["tags"])
+          - target_run_id: str   (optional; defaults to current run_id; used as the base run for scope routing)
           - tool_name: str       (optional; for tool-style output, default "remember")
           - call_id: str         (optional; passthrough for tool-style output)
         Notes:
-        - This mutates the in-run span index (`_runtime.memory_spans`) only; it does not change artifacts.
+        - This mutates the owner run's span index (`_runtime.memory_spans`) only; it does not change artifacts.
         - Tagging is intentionally JSON-safe (string->string).
         """
         import json
         from .vars import ensure_namespaces
-        ensure_namespaces(run.vars)
-        runtime_ns = run.vars.get("_runtime")
-        if not isinstance(runtime_ns, dict):
-            runtime_ns = {}
-            run.vars["_runtime"] = runtime_ns
-        spans = runtime_ns.get("memory_spans")
-        if not isinstance(spans, list):
-            return EffectOutcome.failed("MEMORY_TAG requires _runtime.memory_spans to be a list")
         payload = dict(effect.payload or {})
         tool_name = str(payload.get("tool_name") or "remember")
         call_id = str(payload.get("call_id") or "memory")
+        base_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
+        base_run = run
+        if base_run_id != run.run_id:
+            loaded = self._run_store.load(base_run_id)
+            if loaded is None:
+                return EffectOutcome.failed(f"Unknown target_run_id: {base_run_id}")
+            base_run = loaded
+        ensure_namespaces(base_run.vars)
+        scope = str(payload.get("scope") or "run").strip().lower() or "run"
+        if scope not in {"run", "session", "global", "all"}:
+            return EffectOutcome.failed(f"Unknown memory_tag scope: {scope}")
         span_id = payload.get("span_id")
         tags = payload.get("tags")
         if span_id is None:
@@ -2145,75 +2896,145 @@ class Runtime:
         clean_tags: Dict[str, str] = {}
         for k, v in tags.items():
             if isinstance(k, str) and isinstance(v, str) and k and v:
+                if k == "kind":
+                    continue
                 clean_tags[k] = v
         if not clean_tags:
             return EffectOutcome.failed("MEMORY_TAG requires at least one non-empty string tag")
         artifact_id: Optional[str] = None
-        target_index: Optional[int] = None
+        index_hint: Optional[int] = None
         if isinstance(span_id, int):
-            idx = span_id - 1
-            if idx < 0 or idx >= len(spans):
-                return EffectOutcome.failed(f"Unknown span index: {span_id}")
-            span = spans[idx]
-            if not isinstance(span, dict):
-                return EffectOutcome.failed(f"Invalid span record at index {span_id}")
-            artifact_id = str(span.get("artifact_id") or "").strip() or None
-            target_index = idx
+            index_hint = span_id
         elif isinstance(span_id, str):
             s = span_id.strip()
             if not s:
                 return EffectOutcome.failed("MEMORY_TAG requires a non-empty span_id")
             if s.isdigit():
-                idx = int(s) - 1
-                if idx < 0 or idx >= len(spans):
-                    return EffectOutcome.failed(f"Unknown span index: {s}")
-                span = spans[idx]
-                if not isinstance(span, dict):
-                    return EffectOutcome.failed(f"Invalid span record at index {s}")
-                artifact_id = str(span.get("artifact_id") or "").strip() or None
-                target_index = idx
+                index_hint = int(s)
             else:
                 artifact_id = s
         else:
             return EffectOutcome.failed("MEMORY_TAG requires span_id as str or int")
-        if not artifact_id:
-            return EffectOutcome.failed("Could not resolve span_id to an artifact_id")
+        if scope == "all" and index_hint is not None:
+            return EffectOutcome.failed("memory_tag scope='all' requires span_id as artifact id (no indices)")
-        if target_index is None:
-            for i, span in enumerate(spans):
+        def _ensure_spans(target_run: RunState) -> list[dict[str, Any]]:
+            ensure_namespaces(target_run.vars)
+            target_runtime_ns = target_run.vars.get("_runtime")
+            if not isinstance(target_runtime_ns, dict):
+                target_runtime_ns = {}
+                target_run.vars["_runtime"] = target_runtime_ns
+            spans_any = target_runtime_ns.get("memory_spans")
+            if not isinstance(spans_any, list):
+                spans_any = []
+                target_runtime_ns["memory_spans"] = spans_any
+            return spans_any  # type: ignore[return-value]
+        def _resolve_target_index(spans_list: list[Any], *, artifact_id_value: str, index_value: Optional[int]) -> Optional[int]:
+            if index_value is not None:
+                idx = int(index_value) - 1
+                if idx < 0 or idx >= len(spans_list):
+                    return None
+                span = spans_list[idx]
+                if not isinstance(span, dict):
+                    return None
+                return idx
+            for i, span in enumerate(spans_list):
                 if not isinstance(span, dict):
                     continue
-                if str(span.get("artifact_id") or "") == artifact_id:
-                    target_index = i
-                    break
+                if str(span.get("artifact_id") or "") == artifact_id_value:
+                    return i
+            return None
-        if target_index is None:
-            return EffectOutcome.failed(f"Unknown span_id: {artifact_id}")
+        def _apply_tags(target_run: RunState, spans_list: list[Any]) -> Optional[dict[str, Any]]:
+            artifact_id_local = artifact_id
+            target_index_local: Optional[int] = None
-        target = spans[target_index]
-        if not isinstance(target, dict):
-            return EffectOutcome.failed(f"Invalid span record at index {target_index + 1}")
+            # Resolve index->artifact id when an index hint is used.
+            if index_hint is not None:
+                idx = int(index_hint) - 1
+                if idx < 0 or idx >= len(spans_list):
+                    return None
+                span = spans_list[idx]
+                if not isinstance(span, dict):
+                    return None
+                resolved = str(span.get("artifact_id") or "").strip()
+                if not resolved:
+                    return None
+                artifact_id_local = resolved
+                target_index_local = idx
+            if not artifact_id_local:
+                return None
-        existing_tags = target.get("tags")
-        if not isinstance(existing_tags, dict):
-            existing_tags = {}
+            if target_index_local is None:
+                target_index_local = _resolve_target_index(
+                    spans_list, artifact_id_value=str(artifact_id_local), index_value=None
+                )
+            if target_index_local is None:
+                return None
-        if merge:
-            merged_tags = dict(existing_tags)
-            merged_tags.update(clean_tags)
-        else:
-            merged_tags = dict(clean_tags)
+            target = spans_list[target_index_local]
+            if not isinstance(target, dict):
+                return None
-        target["tags"] = merged_tags
-        target["tagged_at"] = utc_now_iso()
-        if run.actor_id:
-            target["tagged_by"] = str(run.actor_id)
+            existing_tags = target.get("tags")
+            if not isinstance(existing_tags, dict):
+                existing_tags = {}
+            if merge:
+                merged_tags = dict(existing_tags)
+                merged_tags.update(clean_tags)
+            else:
+                merged_tags = dict(clean_tags)
+            target["tags"] = merged_tags
+            target["tagged_at"] = utc_now_iso()
+            if run.actor_id:
+                target["tagged_by"] = str(run.actor_id)
+            return {"run_id": target_run.run_id, "artifact_id": str(artifact_id_local), "tags": merged_tags}
+        # Resolve which run(s) to tag.
+        runs_to_tag: list[RunState] = []
+        if scope == "all":
+            root = self._resolve_scope_owner_run(base_run, scope="session")
+            global_run = self._resolve_scope_owner_run(base_run, scope="global")
+            seen_ids: set[str] = set()
+            for r in (base_run, root, global_run):
+                if r.run_id in seen_ids:
+                    continue
+                seen_ids.add(r.run_id)
+                runs_to_tag.append(r)
+        else:
+            try:
+                runs_to_tag = [self._resolve_scope_owner_run(base_run, scope=scope)]
+            except Exception as e:
+                return EffectOutcome.failed(str(e))
-        rendered_tags = json.dumps(merged_tags, ensure_ascii=False, sort_keys=True)
-        text = f"Tagged span_id={artifact_id} tags={rendered_tags}"
+        applied: list[dict[str, Any]] = []
+        for target_run in runs_to_tag:
+            spans_list = _ensure_spans(target_run)
+            entry = _apply_tags(target_run, spans_list)
+            if entry is None:
+                continue
+            applied.append(entry)
+            if target_run is not run:
+                target_run.updated_at = utc_now_iso()
+                self._run_store.save(target_run)
+        if not applied:
+            if artifact_id:
+                return EffectOutcome.failed(f"Unknown span_id: {artifact_id}")
+            if index_hint is not None:
+                return EffectOutcome.failed(f"Unknown span index: {index_hint}")
+            return EffectOutcome.failed("Could not resolve span_id")
+        rendered_tags = json.dumps(applied[0].get("tags") or {}, ensure_ascii=False, sort_keys=True)
+        rendered_runs = ",".join([str(x.get("run_id") or "") for x in applied if x.get("run_id")])
+        text = f"Tagged span_id={applied[0].get('artifact_id')} scope={scope} runs=[{rendered_runs}] tags={rendered_tags}"
         result = {
             "mode": "executed",
@@ -2224,6 +3045,7 @@ class Runtime:
                     "success": True,
                     "output": text,
                     "error": None,
+                    "meta": {"applied": applied},
                 }
             ],
         }
@@ -2699,7 +3521,13 @@ class Runtime:
         preview = note_text
         if len(preview) > 160:
-            preview = preview[:157] + "…"
+            #[WARNING:TRUNCATION] bounded memory_note preview for spans listing
+            marker = "… (truncated)"
+            keep = max(0, 160 - len(marker))
+            if keep <= 0:
+                preview = marker[:160].rstrip()
+            else:
+                preview = preview[:keep].rstrip() + marker
         span_record: Dict[str, Any] = {
             "kind": "memory_note",
@@ -2820,21 +3648,67 @@ class Runtime:
         payload = dict(effect.payload or {})
         target_run_id = str(payload.get("target_run_id") or run.run_id).strip() or run.run_id
+        # Recall effort policy (optional; no silent fallback).
+        recall_level_raw = payload.get("recall_level")
+        if recall_level_raw is None:
+            recall_level_raw = payload.get("recallLevel")
+        try:
+            from ..memory.recall_levels import parse_recall_level, policy_for
+            recall_level = parse_recall_level(recall_level_raw)
+        except Exception as e:
+            return EffectOutcome.failed(str(e))
+        recall_warnings: list[str] = []
+        recall_effort: dict[str, Any] = {}
         # Normalize span_ids (accept legacy `span_id` too).
         raw_span_ids = payload.get("span_ids")
         if raw_span_ids is None:
             raw_span_ids = payload.get("span_id")
+        if raw_span_ids is None:
+            return EffectOutcome.failed("MEMORY_REHYDRATE requires payload.span_ids (or legacy span_id)")
         span_ids: list[Any] = []
         if isinstance(raw_span_ids, list):
             span_ids = list(raw_span_ids)
         elif raw_span_ids is not None:
             span_ids = [raw_span_ids]
         if not span_ids:
-            return EffectOutcome.failed("MEMORY_REHYDRATE requires payload.span_ids (non-empty list)")
+            # Empty rehydrate is a valid no-op (common when recall returns no spans).
+            return EffectOutcome.completed(result={"inserted": 0, "skipped": 0, "artifacts": []})
         placement = str(payload.get("placement") or "after_summary").strip() or "after_summary"
         dedup_by = str(payload.get("dedup_by") or "message_id").strip() or "message_id"
         max_messages = payload.get("max_messages")
+        max_messages_provided = "max_messages" in payload
+        if recall_level is not None:
+            pol = policy_for(recall_level)
+            raw_max = max_messages
+            parsed: Optional[int] = None
+            if raw_max is not None and not isinstance(raw_max, bool):
+                try:
+                    parsed = int(float(raw_max))
+                except Exception:
+                    parsed = None
+            if not max_messages_provided or parsed is None:
+                parsed = pol.rehydrate.max_messages_default
+            if parsed < 1:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: max_messages must be >=1; using {pol.rehydrate.max_messages_default}"
+                )
+                parsed = pol.rehydrate.max_messages_default
+            if parsed > pol.rehydrate.max_messages_max:
+                recall_warnings.append(
+                    f"recall_level={recall_level.value}: clamped max_messages from {parsed} to {pol.rehydrate.max_messages_max}"
+                )
+                parsed = pol.rehydrate.max_messages_max
+            max_messages = parsed
+            recall_effort = {
+                "recall_level": recall_level.value,
+                "applied": {"max_messages": int(parsed)},
+            }
         # Load the target run (may be different from current).
         target_run = run
@@ -2922,6 +3796,8 @@ class Runtime:
                 "inserted": out.get("inserted", 0),
                 "skipped": out.get("skipped", 0),
                 "artifacts": artifacts_out,
+                "effort": recall_effort if recall_effort else None,
+                "warnings": list(recall_warnings) if recall_warnings else None,
             }
         )

AbstractRuntime 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

AbstractRuntime 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl