PyPI - fabri - Versions diffs - 0.1.0__py3-none-any.whl - Mend

fabri 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

fabri/__init__.py +35 -0
fabri/admin.py +73 -0
fabri/cli.py +182 -0
fabri/config.py +76 -0
fabri/core/__init__.py +0 -0
fabri/core/agent.py +229 -0
fabri/core/decompose.py +65 -0
fabri/core/llm.py +254 -0
fabri/core/logging_setup.py +42 -0
fabri/core/outcome.py +8 -0
fabri/memory/__init__.py +0 -0
fabri/memory/compress.py +34 -0
fabri/memory/embeddings.py +17 -0
fabri/memory/pruning.py +76 -0
fabri/memory/schema.py +50 -0
fabri/memory/store.py +83 -0
fabri/orchestrator/__init__.py +0 -0
fabri/orchestrator/pipeline.py +56 -0
fabri/orchestrator/retrieval.py +45 -0
fabri/orchestrator/traces.py +22 -0
fabri/paths.py +27 -0
fabri/runtime.py +68 -0
fabri/scaffold.py +104 -0
fabri/tools/__init__.py +0 -0
fabri/tools/agent_runner_tool.py +49 -0
fabri/tools/agent_tool.py +34 -0
fabri/tools/examples/bash.json +12 -0
fabri/tools/examples/bash.py +54 -0
fabri/tools/examples/broken_tool.json +8 -0
fabri/tools/examples/broken_tool.py +7 -0
fabri/tools/examples/echo_tool.json +8 -0
fabri/tools/examples/echo_tool.py +5 -0
fabri/tools/examples/edit_file.json +17 -0
fabri/tools/examples/edit_file.py +48 -0
fabri/tools/examples/example_go_tool/go.mod +3 -0
fabri/tools/examples/example_go_tool/main.go +28 -0
fabri/tools/examples/fetch_url.json +12 -0
fabri/tools/examples/fetch_url.py +40 -0
fabri/tools/examples/grep.json +16 -0
fabri/tools/examples/grep.py +62 -0
fabri/tools/examples/list_dir.json +8 -0
fabri/tools/examples/list_dir.py +35 -0
fabri/tools/examples/mcp_tool.json +16 -0
fabri/tools/examples/mcp_tool.py +37 -0
fabri/tools/examples/python_exec.json +12 -0
fabri/tools/examples/python_exec.py +52 -0
fabri/tools/examples/read_file.json +8 -0
fabri/tools/examples/read_file.py +31 -0
fabri/tools/examples/sum_tool.json +8 -0
fabri/tools/examples/web_search.json +8 -0
fabri/tools/examples/web_search.py +37 -0
fabri/tools/examples/write_file.json +12 -0
fabri/tools/examples/write_file.py +30 -0
fabri/tools/manifest_schema.py +34 -0
fabri/tools/registry.py +34 -0
fabri/tools/runner.py +71 -0
fabri/toon.py +354 -0
fabri-0.1.0.dist-info/METADATA +243 -0
fabri-0.1.0.dist-info/RECORD +63 -0
fabri-0.1.0.dist-info/WHEEL +5 -0
fabri-0.1.0.dist-info/entry_points.txt +2 -0
fabri-0.1.0.dist-info/licenses/LICENSE +201 -0
fabri-0.1.0.dist-info/top_level.txt +1 -0

fabri/__init__.py ADDED Viewed

@@ -0,0 +1,35 @@
+from fabri.admin import AdminAuthError, describe_config, memory_summary, render_dashboard, require_admin
+from fabri.config import DEFAULT_CONFIG, load_config
+from fabri.core.agent import AgentProtocolError, run_agent
+from fabri.core.llm import AnthropicLLMBackend, LLMBackend, LLMError, OpenAILLMBackend, ScriptedLLMBackend
+from fabri.core.outcome import Outcome
+from fabri.memory.store import QdrantMemoryStore
+from fabri.orchestrator.pipeline import process_trace
+from fabri.runtime import build_llm, build_tool_defs, build_tools
+from fabri.tools.agent_tool import make_agent_tool_manifest
+from fabri.tools.registry import ToolRegistry
+__all__ = [
+    "AdminAuthError",
+    "AgentProtocolError",
+    "AnthropicLLMBackend",
+    "DEFAULT_CONFIG",
+    "LLMBackend",
+    "LLMError",
+    "OpenAILLMBackend",
+    "Outcome",
+    "QdrantMemoryStore",
+    "ScriptedLLMBackend",
+    "ToolRegistry",
+    "build_llm",
+    "build_tool_defs",
+    "build_tools",
+    "describe_config",
+    "load_config",
+    "make_agent_tool_manifest",
+    "memory_summary",
+    "process_trace",
+    "render_dashboard",
+    "require_admin",
+    "run_agent",
+]

fabri/admin.py ADDED Viewed

@@ -0,0 +1,73 @@
+"""Admin-only surface: config inspection and a dashboard summary, usable from
+either the CLI (`cli.py admin ...`) or directly as a library call. There is no
+real auth backend yet -- FABRI_ADMIN_TOKEN is a placeholder seam, not a
+security boundary. Every admin entry point funnels through require_admin() so
+real auth (SSO, an API gateway, whatever the deployment needs) has exactly one
+place to be wired in later, instead of being scattered across call sites."""
+import os
+from fabri.memory.store import QdrantMemoryStore
+from fabri.tools.agent_tool import AGENT_RUNNER_SCRIPT
+from fabri.tools.registry import ToolRegistry
+ADMIN_TOKEN_ENV = "FABRI_ADMIN_TOKEN"
+class AdminAuthError(RuntimeError):
+    pass
+def require_admin(token: str | None) -> None:
+    """If FABRI_ADMIN_TOKEN is unset, the gate is open -- there's no auth
+    backend yet, so refusing to run at all would just be theater. Set it to
+    start enforcing a shared-secret check; swap this function's body for real
+    auth whenever that's available, since every admin command already calls
+    it before doing anything."""
+    expected = os.environ.get(ADMIN_TOKEN_ENV)
+    if expected is None:
+        return
+    if token != expected:
+        raise AdminAuthError(f"admin token required (pass --admin-token, must match ${ADMIN_TOKEN_ENV})")
+def describe_config(config: dict, tools: ToolRegistry) -> dict:
+    """Merged config plus the resolved tool registry, in a shape safe to
+    print or serve: which tools are plain subprocess tools vs. another agent
+    wired in via tools.agents (agent-as-tool, see tools/agent_tool.py)."""
+    tool_rows = [
+        {
+            "name": t.name,
+            "description": t.description,
+            "is_agent_tool": str(AGENT_RUNNER_SCRIPT) in t.command,
+            "command": t.command,
+        }
+        for t in tools.list()
+    ]
+    return {
+        "agent": config["agent"],
+        "llm": config["llm"],
+        "sandbox_root": config["tools"]["sandbox_root"],
+        "decompose": config["tools"]["decompose"],
+        "tools": tool_rows,
+    }
+def memory_summary(store: QdrantMemoryStore) -> dict:
+    return {"tactical": store.count(kind="tactical"), "strategic": store.count(kind="strategic")}
+def render_dashboard(config: dict, tools: ToolRegistry, store: QdrantMemoryStore) -> str:
+    desc = describe_config(config, tools)
+    mem = memory_summary(store)
+    lines = [
+        f"agent:    {desc['agent']['name']}  (max_steps={desc['agent']['max_steps']})",
+        f"llm:      {desc['llm']['provider']}/{desc['llm']['model']}",
+        f"sandbox:  {desc['sandbox_root']}",
+        f"decompose: {'on' if desc['decompose']['enabled'] else 'off'}",
+        f"memory:   {mem['tactical']} tactical / {mem['strategic']} strategic guidelines",
+        "tools:",
+    ]
+    for t in desc["tools"]:
+        kind = "agent" if t["is_agent_tool"] else "tool"
+        lines.append(f"  - [{kind}] {t['name']}: {t['description'].strip().splitlines()[0]}")
+    return "\n".join(lines)

fabri/cli.py ADDED Viewed

@@ -0,0 +1,182 @@
+import argparse
+import json
+import os
+import sys
+import uuid
+from fabri.admin import AdminAuthError, describe_config, render_dashboard, require_admin
+from fabri.config import load_config
+from fabri.core.agent import run_agent
+from fabri.core.logging_setup import configure_logging
+from fabri.memory.store import QdrantMemoryStore
+from fabri.orchestrator.pipeline import process_trace
+from fabri.runtime import build_llm, build_tool_defs, build_tools
+from fabri.scaffold import next_steps, scaffold
+def cmd_init(args: argparse.Namespace) -> None:
+    result = scaffold(args.dir, force=args.force)
+    where = "current directory" if args.dir in (".", "") else args.dir
+    if result["created"]:
+        print(f"Scaffolded a starter fabri project in {where}:")
+        for rel in result["created"]:
+            print(f"  + {rel}")
+    if result["skipped"]:
+        print("\nLeft existing files untouched (pass --force to overwrite):")
+        for rel in result["skipped"]:
+            print(f"  . {rel}")
+    print("\n" + next_steps(args.dir))
+def _require_api_key(api_key_env: str) -> None:
+    if not os.environ.get(api_key_env):
+        print(
+            f"{api_key_env} is not set. Export it before running the live agent, "
+            f"e.g.: export {api_key_env}=sk-...",
+            file=sys.stderr,
+        )
+        sys.exit(1)
+def cmd_run(args: argparse.Namespace) -> None:
+    config = load_config(args.config)
+    _require_api_key(config["llm"]["api_key_env"])
+    session_id = args.session_id or str(uuid.uuid4())
+    configure_logging(session_id, verbose=args.verbose)
+    mem_cfg = config["memory"]
+    store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
+    tools_cfg = config["tools"]
+    tools = build_tools(tools_cfg)
+    decompose_cfg = tools_cfg["decompose"]
+    llm = build_llm(config, build_tool_defs(tools, decompose_cfg))
+    result = run_agent(
+        args.task,
+        llm,
+        tools,
+        store,
+        session_id=session_id,
+        max_steps=config["agent"]["max_steps"],
+        top_k=mem_cfg["top_k"],
+        max_subquestions=decompose_cfg["max_subquestions"],
+        system_prompt=config["agent"].get("system_prompt", ""),
+        system_prompt_prefix=config["agent"].get("system_prompt_prefix", ""),
+        result_format=tools_cfg.get("result_format", "toon"),
+        output_format=config["agent"].get("output_format", "json"),
+    )
+    print(json.dumps(result, indent=2))
+    compress_llm = build_llm(config, [])
+    entries = process_trace(
+        session_id,
+        store,
+        compress_llm,
+        guideline_max_tokens=mem_cfg["guideline_max_tokens"],
+        similarity_threshold=mem_cfg["similarity_threshold"],
+        promotion_threshold_sessions=mem_cfg["promotion_threshold_sessions"],
+    )
+    if entries:
+        print(f"\nSynthesized {len(entries)} guideline(s) from this run:")
+        for e in entries:
+            print(f"  [{e.kind}] {e.text}")
+def cmd_ingest_traces(args: argparse.Namespace) -> None:
+    config = load_config(args.config)
+    _require_api_key(config["llm"]["api_key_env"])
+    configure_logging(args.session_id, verbose=args.verbose)
+    mem_cfg = config["memory"]
+    store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
+    llm = build_llm(config, [])
+    entries = process_trace(
+        args.session_id,
+        store,
+        llm,
+        guideline_max_tokens=mem_cfg["guideline_max_tokens"],
+        similarity_threshold=mem_cfg["similarity_threshold"],
+        promotion_threshold_sessions=mem_cfg["promotion_threshold_sessions"],
+    )
+    print(json.dumps([e.to_payload() for e in entries], indent=2))
+def cmd_inspect_memory(args: argparse.Namespace) -> None:
+    config = load_config(args.config)
+    mem_cfg = config["memory"]
+    store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
+    print(f"tactical: {store.count(kind='tactical')}")
+    print(f"strategic: {store.count(kind='strategic')}")
+    if args.query:
+        for entry, score in store.query(args.query, top_k=args.top_k):
+            print(f"  [{entry.kind}] ({score:.2f}) {entry.text}")
+def cmd_admin_config(args: argparse.Namespace) -> None:
+    try:
+        require_admin(args.admin_token)
+    except AdminAuthError as e:
+        print(str(e), file=sys.stderr)
+        sys.exit(1)
+    config = load_config(args.config)
+    tools = build_tools(config["tools"])
+    print(json.dumps(describe_config(config, tools), indent=2))
+def cmd_admin_dashboard(args: argparse.Namespace) -> None:
+    try:
+        require_admin(args.admin_token)
+    except AdminAuthError as e:
+        print(str(e), file=sys.stderr)
+        sys.exit(1)
+    config = load_config(args.config)
+    tools = build_tools(config["tools"])
+    mem_cfg = config["memory"]
+    store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
+    print(render_dashboard(config, tools, store))
+def main() -> None:
+    parser = argparse.ArgumentParser(prog="fabri")
+    parser.add_argument("--verbose", action="store_true", help="Log at DEBUG level to the console")
+    parser.add_argument("--config", dest="config", default=None, help="Path to an agent.yaml config")
+    sub = parser.add_subparsers(dest="command", required=True)
+    p_init = sub.add_parser("init", help="Scaffold a starter fabri project (agent.yaml, tools, docker-compose)")
+    p_init.add_argument("dir", nargs="?", default=".", help="Target directory (default: current)")
+    p_init.add_argument("--force", action="store_true", help="Overwrite existing files")
+    p_init.set_defaults(func=cmd_init)
+    p_run = sub.add_parser("run", help="Run the agent on a task")
+    p_run.add_argument("task")
+    p_run.add_argument("--session-id", dest="session_id", default=None)
+    p_run.set_defaults(func=cmd_run)
+    p_ingest = sub.add_parser("ingest-traces", help="Synthesize guidelines from a session's trace")
+    p_ingest.add_argument("session_id")
+    p_ingest.set_defaults(func=cmd_ingest_traces)
+    p_inspect = sub.add_parser("inspect-memory", help="Inspect stored memory, optionally querying it")
+    p_inspect.add_argument("query", nargs="?", default=None)
+    p_inspect.add_argument("--top-k", dest="top_k", type=int, default=5)
+    p_inspect.set_defaults(func=cmd_inspect_memory)
+    # admin: config/dashboard inspection. Gated by require_admin() -- a stub
+    # shared-secret check (FABRI_ADMIN_TOKEN), not real auth. See admin.py.
+    p_admin = sub.add_parser("admin", help="Admin-only: inspect a config and its resolved tools/memory")
+    p_admin.add_argument("--admin-token", dest="admin_token", default=None)
+    admin_sub = p_admin.add_subparsers(dest="admin_command", required=True)
+    p_admin_config = admin_sub.add_parser("config", help="Print the merged config + resolved tool registry as JSON")
+    p_admin_config.set_defaults(func=cmd_admin_config)
+    p_admin_dash = admin_sub.add_parser("dashboard", help="Human-readable summary: agent, tools, memory counts")
+    p_admin_dash.set_defaults(func=cmd_admin_dashboard)
+    args = parser.parse_args()
+    args.func(args)
+if __name__ == "__main__":
+    main()

fabri/config.py ADDED Viewed

@@ -0,0 +1,76 @@
+from pathlib import Path
+import yaml
+from fabri.core.decompose import DEFAULT_MAX_SUBQUESTIONS
+from fabri.memory.compress import DEFAULT_MAX_TOKENS
+from fabri.memory.pruning import PROMOTION_THRESHOLD_SESSIONS, SIMILARITY_THRESHOLD
+from fabri.memory.store import COLLECTION_NAME
+from fabri.orchestrator.retrieval import DEFAULT_TOP_K
+DEFAULT_TOOLS_DIR = Path(__file__).resolve().parent / "tools" / "examples"
+DEFAULT_CONFIG = {
+    "agent": {
+        "name": "default",
+        "max_steps": 10,
+        # If `system_prompt` is set, it REPLACES the framework's generic
+        # boilerplate ("You are an autonomous agent..."). If `system_prompt_prefix`
+        # is set, it is prepended to whatever follows. Both empty = original
+        # behavior. Consuming projects use these to inject domain-specific
+        # identity, format contracts, few-shots, etc.
+        "system_prompt": "",
+        "system_prompt_prefix": "",
+        # Format the model is asked to PRODUCE structured output in (decompose).
+        # "json" is the reliable default; "toon" is opt-in (always json-fallback).
+        # Native tool-call arguments are always provider JSON regardless.
+        "output_format": "json",
+    },
+    "llm": {
+        "provider": "anthropic",
+        "model": "claude-sonnet-4-6",
+        "max_tokens": 1024,
+        "api_key_env": "ANTHROPIC_API_KEY",
+    },
+    "tools": {
+        "manifest_dir": str(DEFAULT_TOOLS_DIR),
+        "enabled": None,
+        "sandbox_root": ".",
+        "agents": [],  # other agent.yaml configs exposed as tools -- see tools/agent_tool.py
+        # How tool results are serialized INTO the model's context. "toon" (default)
+        # saves input tokens; the framework encodes this end, so there's no model
+        # reliability risk. Set "json" to opt out.
+        "result_format": "toon",
+        "decompose": {"enabled": False, "max_subquestions": DEFAULT_MAX_SUBQUESTIONS},
+    },
+    "memory": {
+        "collection": COLLECTION_NAME,
+        "qdrant_url": "http://localhost:6333",
+        "top_k": DEFAULT_TOP_K,
+        "similarity_threshold": SIMILARITY_THRESHOLD,
+        "promotion_threshold_sessions": PROMOTION_THRESHOLD_SESSIONS,
+        "guideline_max_tokens": DEFAULT_MAX_TOKENS,
+    },
+}
+def _deep_merge(base: dict, override: dict) -> dict:
+    merged = dict(base)
+    for key, value in override.items():
+        if isinstance(value, dict) and isinstance(merged.get(key), dict):
+            merged[key] = _deep_merge(merged[key], value)
+        else:
+            merged[key] = value
+    return merged
+def load_config(path: str | None) -> dict:
+    """Load an agent.yaml config, merged on top of DEFAULT_CONFIG so omitted
+    fields fall back to today's hardcoded behavior unchanged. `path=None`
+    returns the framework defaults as-is -- the same shape a consuming
+    project's own agent.yaml would produce, so callers don't special-case it."""
+    if path is None:
+        return DEFAULT_CONFIG
+    with open(path) as f:
+        user_config = yaml.safe_load(f) or {}
+    return _deep_merge(DEFAULT_CONFIG, user_config)

fabri/core/__init__.py ADDED Viewed

File without changes

fabri/core/agent.py ADDED Viewed

@@ -0,0 +1,229 @@
+import json
+import time
+import uuid
+from fabri.core.decompose import DEFAULT_MAX_SUBQUESTIONS, decompose
+from fabri.core.llm import LLMBackend, LLMError, ToolCall
+from fabri.core.logging_setup import get_logger
+from fabri.toon import encode as toon_encode
+from fabri.core.outcome import Outcome
+from fabri.memory.store import QdrantMemoryStore
+from fabri.orchestrator.retrieval import DEFAULT_TOP_K, retrieve_context
+from fabri.orchestrator.traces import log_event
+from fabri.tools.registry import ToolRegistry
+MAX_STEPS = 10
+DECOMPOSE_TOOL_NAME = "decompose"
+logger = get_logger()
+class AgentProtocolError(RuntimeError):
+    """Raised when an LLMBackend returns no tool calls and no usable final text
+    (None or empty) -- a malformed response that would otherwise silently burn
+    every remaining step before declaring INCOMPLETE with no diagnostic of why.
+    (An unrecoverable *provider* error is a different thing: core.llm.LLMError,
+    which the loop maps to Outcome.FAILED rather than raising.)"""
+DEFAULT_AGENT_IDENTITY = "You are an autonomous agent. Use tools when needed, and stop once the task is done."
+TOON_RESULT_NOTE = (
+    "Tool results are given to you in TOON, a compact format: objects are `key: value` "
+    "lines; arrays are `name[N]: v1,v2,...`, or a table `name[N]{f1,f2}:` followed by one "
+    "comma-separated row per element. Read it as structured data; keep calling tools and "
+    "answering normally."
+)
+def build_system_prompt(
+    context_block: str,
+    tool_descriptions: str,
+    *,
+    system_prompt: str = "",
+    system_prompt_prefix: str = "",
+    result_format: str = "json",
+) -> str:
+    # `system_prompt` (when set) replaces the framework's generic identity line
+    # entirely -- domain agents use this to inject "You are the story_agent..."
+    # with format pointers and few-shots. `system_prompt_prefix` (when set) is
+    # prepended verbatim; useful for global notes that apply across many configs.
+    # Both empty -> identical to pre-patch behavior.
+    identity = system_prompt or DEFAULT_AGENT_IDENTITY
+    parts = [
+        system_prompt_prefix,
+        identity,
+        f"Available tools:\n{tool_descriptions}" if tool_descriptions else "",
+        TOON_RESULT_NOTE if result_format == "toon" else "",
+        context_block,
+    ]
+    return "\n\n".join(p for p in parts if p)
+def _encode_result(result: dict, result_format: str) -> str:
+    """Serialize a tool result for the model. TOON saves input tokens; we never
+    let an encode error break the loop -- fall back to JSON."""
+    if result_format == "toon":
+        try:
+            return toon_encode(result)
+        except Exception:  # pragma: no cover - defensive, encode handles all JSON shapes
+            logger.warning("toon encode failed for a tool result; falling back to JSON")
+    return json.dumps(result)
+def run_agent(
+    task: str,
+    llm: LLMBackend,
+    tools: ToolRegistry,
+    store: QdrantMemoryStore,
+    session_id: str | None = None,
+    max_steps: int = MAX_STEPS,
+    top_k: int = DEFAULT_TOP_K,
+    max_subquestions: int = DEFAULT_MAX_SUBQUESTIONS,
+    system_prompt: str = "",
+    system_prompt_prefix: str = "",
+    result_format: str = "toon",
+    output_format: str = "json",
+) -> dict:
+    # result_format: how tool results are serialized INTO the model's context
+    #   (toon = fewer input tokens; we control this end so it's reliability-free).
+    # output_format: the format the model is asked to PRODUCE structured output in
+    #   (decompose). Defaults to json for reliability; toon is opt-in and always
+    #   falls back to json parsing. Native tool-call args are always provider JSON.
+    session_id = session_id or str(uuid.uuid4())
+    logger.info("agent run starting: task=%r session_id=%s", task, session_id)
+    context_block = retrieve_context(store, task, top_k=top_k, tool_names=[t.name for t in tools.list()])
+    tool_descriptions = "\n".join(f"- {t.name}: {t.description}" for t in tools.list())
+    system = build_system_prompt(
+        context_block,
+        tool_descriptions,
+        system_prompt=system_prompt,
+        system_prompt_prefix=system_prompt_prefix,
+        result_format=result_format,
+    )
+    log_event(session_id, {"type": "start", "task": task, "context_block": context_block})
+    messages = [{"role": "user", "content": task}]
+    final_text = None
+    success = False
+    failed = False
+    error_reason = None
+    had_tool_failure = False
+    for step_num in range(max_steps):
+        logger.debug("step %d: calling llm", step_num)
+        t0 = time.monotonic()
+        try:
+            response = llm.step(system, messages)
+            if response.tool_calls:
+                had_tool_failure |= _dispatch_tool_calls(
+                    response.tool_calls, tools, llm, task, max_subquestions,
+                    session_id, messages, step_num, result_format, output_format,
+                )
+                continue
+        except LLMError as e:
+            # Unrecoverable provider problem (API error, rate limit, truncated
+            # response), including one raised by a decompose() sub-call. End the
+            # run as FAILED rather than crashing the caller with a raw traceback.
+            failed = True
+            error_reason = str(e)
+            logger.error("step %d: unrecoverable llm error: %s", step_num, e)
+            log_event(session_id, {"type": "error", "reason": error_reason, "outcome": Outcome.FAILED.value})
+            break
+        logger.debug("step %d: llm responded in %.2fs", step_num, time.monotonic() - t0)
+        if response.final_text:
+            final_text = response.final_text
+            success = True
+            logger.info("step %d: final answer produced", step_num)
+            break
+        # No tool calls and no usable final text (empty or structurally
+        # malformed): raising beats silently burning every remaining step and
+        # then reporting an empty answer as success.
+        reason = "llm response had no tool calls and no final text"
+        logger.error("step %d: %s", step_num, reason)
+        log_event(session_id, {"type": "error", "reason": reason, "outcome": Outcome.FAILED.value})
+        raise AgentProtocolError(reason)
+    outcome = _classify_outcome(success, had_tool_failure, failed)
+    logger.info("agent run finished: outcome=%s session_id=%s", outcome.value, session_id)
+    if success:
+        log_event(session_id, {"type": "final", "text": final_text, "outcome": outcome.value})
+    elif failed:
+        log_event(session_id, {"type": "failed", "reason": error_reason, "outcome": outcome.value})
+    else:
+        log_event(session_id, {"type": "incomplete", "reason": "max steps reached", "outcome": outcome.value})
+    return {"session_id": session_id, "success": success, "final_text": final_text, "outcome": outcome.value}
+def _dispatch_tool_calls(
+    calls: list[ToolCall],
+    tools: ToolRegistry,
+    llm: LLMBackend,
+    default_task: str,
+    max_subquestions: int,
+    session_id: str,
+    messages: list[dict],
+    step_num: int,
+    result_format: str = "toon",
+    output_format: str = "json",
+) -> bool:
+    """Run every tool call the model emitted this turn (a model may emit
+    several in parallel), then append exactly one assistant turn echoing all the
+    tool_use blocks and one user turn with all the matching tool_result blocks --
+    the Anthropic API rejects a tool_use that isn't paired with a tool_result.
+    Returns whether any call failed."""
+    had_failure = False
+    real_ids = all(c.id is not None for c in calls)
+    assistant_blocks, result_blocks = [], []
+    simple_calls, simple_results = [], []
+    for call in calls:
+        logger.info("step %d: dispatching tool %s args=%s", step_num, call.name, call.args)
+        t0 = time.monotonic()
+        if call.name == DECOMPOSE_TOOL_NAME:
+            result = decompose(
+                llm, call.args.get("task", default_task),
+                max_subquestions=max_subquestions, output_format=output_format,
+            )
+        else:
+            result = tools.invoke(call.name, call.args)
+        elapsed = time.monotonic() - t0
+        logger.info("step %d: tool %s returned ok=%s in %.2fs", step_num, call.name, result.get("ok"), elapsed)
+        if not result.get("ok"):
+            had_failure = True
+            logger.warning("step %d: tool %s failed: %s", step_num, call.name, result.get("error"))
+        log_event(session_id, {"type": "tool_call", "name": call.name, "args": call.args, "result": result})
+        # The trace keeps the raw dict; only the copy entering the model's context
+        # is TOON-encoded (or JSON), so token savings don't cost us a readable log.
+        encoded = _encode_result(result, result_format)
+        assistant_blocks.append({"type": "tool_use", "id": call.id, "name": call.name, "input": call.args})
+        result_blocks.append({"type": "tool_result", "tool_use_id": call.id, "content": encoded})
+        simple_calls.append(f"[tool_call:{call.name}]")
+        simple_results.append(f"[tool_result] {encoded}")
+    if real_ids:
+        # Real provider tool-use: echo the assistant's tool_use blocks verbatim,
+        # then one user turn carrying every correlated tool_result.
+        messages.append({"role": "assistant", "content": assistant_blocks})
+        messages.append({"role": "user", "content": result_blocks})
+    else:
+        # ScriptedLLMBackend / id-less path: plain strings are enough.
+        messages.append({"role": "assistant", "content": " ".join(simple_calls)})
+        messages.append({"role": "user", "content": " ".join(simple_results)})
+    return had_failure
+def _classify_outcome(success: bool, had_tool_failure: bool, failed: bool) -> Outcome:
+    if failed:
+        return Outcome.FAILED
+    if not success:
+        return Outcome.INCOMPLETE
+    return Outcome.SUCCESS_WITH_RECOVERY if had_tool_failure else Outcome.SUCCESS

fabri/core/decompose.py ADDED Viewed

@@ -0,0 +1,65 @@
+import json
+from fabri import toon
+from fabri.core.llm import LLMBackend
+DEFAULT_MAX_SUBQUESTIONS = 5
+def decompose(
+    llm: LLMBackend,
+    task: str,
+    max_subquestions: int = DEFAULT_MAX_SUBQUESTIONS,
+    output_format: str = "json",
+) -> dict:
+    """Ask the LLM (a separate step() call, not a recursive run_agent) to break a
+    research task into concrete sub-questions. Returns the same {ok, result}
+    shape tools.invoke() returns, so the caller's message-append and trace
+    logging stay unmodified -- this is structured planning, not a sub-agent.
+    `output_format` is the format the model is asked to emit. "json" is the
+    reliable default; "toon" is opt-in and saves a few output tokens, but we
+    always accept either on parse so a model that ignores the instruction (or
+    emits slightly-off TOON) still works."""
+    if output_format == "toon":
+        shape = "a TOON array of strings, e.g. `[3]: first question,second question,third`"
+    else:
+        shape = 'a JSON list of strings, e.g. ["first question", "second question"]'
+    prompt = (
+        f"Break this task into at most {max_subquestions} concrete, separately "
+        f"answerable sub-questions. Return ONLY {shape}.\n\nTask: {task}"
+    )
+    response = llm.step(
+        "You decompose research tasks into concrete sub-questions.",
+        [{"role": "user", "content": prompt}],
+    )
+    text = (response.final_text or "").strip()
+    subquestions = _parse_string_list(text, prefer=output_format)
+    if subquestions is None:
+        return {"ok": False, "error": f"decompose: malformed response: {text!r}"}
+    return {"ok": True, "result": {"subquestions": subquestions[:max_subquestions]}}
+def _parse_string_list(text: str, prefer: str) -> list | None:
+    """Parse a list of strings from the model, trying the preferred format first
+    and falling back to the other -- a model may answer in either."""
+    parsers = [_try_toon, _try_json] if prefer == "toon" else [_try_json, _try_toon]
+    for parse in parsers:
+        value = parse(text)
+        if isinstance(value, list):
+            return value
+    return None
+def _try_json(text: str):
+    try:
+        return json.loads(text)
+    except (json.JSONDecodeError, ValueError):
+        return None
+def _try_toon(text: str):
+    try:
+        return toon.decode(text)
+    except Exception:
+        return None