fabri 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. fabri/__init__.py +35 -0
  2. fabri/admin.py +73 -0
  3. fabri/cli.py +182 -0
  4. fabri/config.py +76 -0
  5. fabri/core/__init__.py +0 -0
  6. fabri/core/agent.py +229 -0
  7. fabri/core/decompose.py +65 -0
  8. fabri/core/llm.py +254 -0
  9. fabri/core/logging_setup.py +42 -0
  10. fabri/core/outcome.py +8 -0
  11. fabri/memory/__init__.py +0 -0
  12. fabri/memory/compress.py +34 -0
  13. fabri/memory/embeddings.py +17 -0
  14. fabri/memory/pruning.py +76 -0
  15. fabri/memory/schema.py +50 -0
  16. fabri/memory/store.py +83 -0
  17. fabri/orchestrator/__init__.py +0 -0
  18. fabri/orchestrator/pipeline.py +56 -0
  19. fabri/orchestrator/retrieval.py +45 -0
  20. fabri/orchestrator/traces.py +22 -0
  21. fabri/paths.py +27 -0
  22. fabri/runtime.py +68 -0
  23. fabri/scaffold.py +104 -0
  24. fabri/tools/__init__.py +0 -0
  25. fabri/tools/agent_runner_tool.py +49 -0
  26. fabri/tools/agent_tool.py +34 -0
  27. fabri/tools/examples/bash.json +12 -0
  28. fabri/tools/examples/bash.py +54 -0
  29. fabri/tools/examples/broken_tool.json +8 -0
  30. fabri/tools/examples/broken_tool.py +7 -0
  31. fabri/tools/examples/echo_tool.json +8 -0
  32. fabri/tools/examples/echo_tool.py +5 -0
  33. fabri/tools/examples/edit_file.json +17 -0
  34. fabri/tools/examples/edit_file.py +48 -0
  35. fabri/tools/examples/example_go_tool/go.mod +3 -0
  36. fabri/tools/examples/example_go_tool/main.go +28 -0
  37. fabri/tools/examples/fetch_url.json +12 -0
  38. fabri/tools/examples/fetch_url.py +40 -0
  39. fabri/tools/examples/grep.json +16 -0
  40. fabri/tools/examples/grep.py +62 -0
  41. fabri/tools/examples/list_dir.json +8 -0
  42. fabri/tools/examples/list_dir.py +35 -0
  43. fabri/tools/examples/mcp_tool.json +16 -0
  44. fabri/tools/examples/mcp_tool.py +37 -0
  45. fabri/tools/examples/python_exec.json +12 -0
  46. fabri/tools/examples/python_exec.py +52 -0
  47. fabri/tools/examples/read_file.json +8 -0
  48. fabri/tools/examples/read_file.py +31 -0
  49. fabri/tools/examples/sum_tool.json +8 -0
  50. fabri/tools/examples/web_search.json +8 -0
  51. fabri/tools/examples/web_search.py +37 -0
  52. fabri/tools/examples/write_file.json +12 -0
  53. fabri/tools/examples/write_file.py +30 -0
  54. fabri/tools/manifest_schema.py +34 -0
  55. fabri/tools/registry.py +34 -0
  56. fabri/tools/runner.py +71 -0
  57. fabri/toon.py +354 -0
  58. fabri-0.1.0.dist-info/METADATA +243 -0
  59. fabri-0.1.0.dist-info/RECORD +63 -0
  60. fabri-0.1.0.dist-info/WHEEL +5 -0
  61. fabri-0.1.0.dist-info/entry_points.txt +2 -0
  62. fabri-0.1.0.dist-info/licenses/LICENSE +201 -0
  63. fabri-0.1.0.dist-info/top_level.txt +1 -0
fabri/__init__.py ADDED
@@ -0,0 +1,35 @@
1
+ from fabri.admin import AdminAuthError, describe_config, memory_summary, render_dashboard, require_admin
2
+ from fabri.config import DEFAULT_CONFIG, load_config
3
+ from fabri.core.agent import AgentProtocolError, run_agent
4
+ from fabri.core.llm import AnthropicLLMBackend, LLMBackend, LLMError, OpenAILLMBackend, ScriptedLLMBackend
5
+ from fabri.core.outcome import Outcome
6
+ from fabri.memory.store import QdrantMemoryStore
7
+ from fabri.orchestrator.pipeline import process_trace
8
+ from fabri.runtime import build_llm, build_tool_defs, build_tools
9
+ from fabri.tools.agent_tool import make_agent_tool_manifest
10
+ from fabri.tools.registry import ToolRegistry
11
+
12
+ __all__ = [
13
+ "AdminAuthError",
14
+ "AgentProtocolError",
15
+ "AnthropicLLMBackend",
16
+ "DEFAULT_CONFIG",
17
+ "LLMBackend",
18
+ "LLMError",
19
+ "OpenAILLMBackend",
20
+ "Outcome",
21
+ "QdrantMemoryStore",
22
+ "ScriptedLLMBackend",
23
+ "ToolRegistry",
24
+ "build_llm",
25
+ "build_tool_defs",
26
+ "build_tools",
27
+ "describe_config",
28
+ "load_config",
29
+ "make_agent_tool_manifest",
30
+ "memory_summary",
31
+ "process_trace",
32
+ "render_dashboard",
33
+ "require_admin",
34
+ "run_agent",
35
+ ]
fabri/admin.py ADDED
@@ -0,0 +1,73 @@
1
+ """Admin-only surface: config inspection and a dashboard summary, usable from
2
+ either the CLI (`cli.py admin ...`) or directly as a library call. There is no
3
+ real auth backend yet -- FABRI_ADMIN_TOKEN is a placeholder seam, not a
4
+ security boundary. Every admin entry point funnels through require_admin() so
5
+ real auth (SSO, an API gateway, whatever the deployment needs) has exactly one
6
+ place to be wired in later, instead of being scattered across call sites."""
7
+ import os
8
+
9
+ from fabri.memory.store import QdrantMemoryStore
10
+ from fabri.tools.agent_tool import AGENT_RUNNER_SCRIPT
11
+ from fabri.tools.registry import ToolRegistry
12
+
13
+ ADMIN_TOKEN_ENV = "FABRI_ADMIN_TOKEN"
14
+
15
+
16
+ class AdminAuthError(RuntimeError):
17
+ pass
18
+
19
+
20
+ def require_admin(token: str | None) -> None:
21
+ """If FABRI_ADMIN_TOKEN is unset, the gate is open -- there's no auth
22
+ backend yet, so refusing to run at all would just be theater. Set it to
23
+ start enforcing a shared-secret check; swap this function's body for real
24
+ auth whenever that's available, since every admin command already calls
25
+ it before doing anything."""
26
+ expected = os.environ.get(ADMIN_TOKEN_ENV)
27
+ if expected is None:
28
+ return
29
+ if token != expected:
30
+ raise AdminAuthError(f"admin token required (pass --admin-token, must match ${ADMIN_TOKEN_ENV})")
31
+
32
+
33
+ def describe_config(config: dict, tools: ToolRegistry) -> dict:
34
+ """Merged config plus the resolved tool registry, in a shape safe to
35
+ print or serve: which tools are plain subprocess tools vs. another agent
36
+ wired in via tools.agents (agent-as-tool, see tools/agent_tool.py)."""
37
+ tool_rows = [
38
+ {
39
+ "name": t.name,
40
+ "description": t.description,
41
+ "is_agent_tool": str(AGENT_RUNNER_SCRIPT) in t.command,
42
+ "command": t.command,
43
+ }
44
+ for t in tools.list()
45
+ ]
46
+ return {
47
+ "agent": config["agent"],
48
+ "llm": config["llm"],
49
+ "sandbox_root": config["tools"]["sandbox_root"],
50
+ "decompose": config["tools"]["decompose"],
51
+ "tools": tool_rows,
52
+ }
53
+
54
+
55
+ def memory_summary(store: QdrantMemoryStore) -> dict:
56
+ return {"tactical": store.count(kind="tactical"), "strategic": store.count(kind="strategic")}
57
+
58
+
59
+ def render_dashboard(config: dict, tools: ToolRegistry, store: QdrantMemoryStore) -> str:
60
+ desc = describe_config(config, tools)
61
+ mem = memory_summary(store)
62
+ lines = [
63
+ f"agent: {desc['agent']['name']} (max_steps={desc['agent']['max_steps']})",
64
+ f"llm: {desc['llm']['provider']}/{desc['llm']['model']}",
65
+ f"sandbox: {desc['sandbox_root']}",
66
+ f"decompose: {'on' if desc['decompose']['enabled'] else 'off'}",
67
+ f"memory: {mem['tactical']} tactical / {mem['strategic']} strategic guidelines",
68
+ "tools:",
69
+ ]
70
+ for t in desc["tools"]:
71
+ kind = "agent" if t["is_agent_tool"] else "tool"
72
+ lines.append(f" - [{kind}] {t['name']}: {t['description'].strip().splitlines()[0]}")
73
+ return "\n".join(lines)
fabri/cli.py ADDED
@@ -0,0 +1,182 @@
1
+ import argparse
2
+ import json
3
+ import os
4
+ import sys
5
+ import uuid
6
+
7
+ from fabri.admin import AdminAuthError, describe_config, render_dashboard, require_admin
8
+ from fabri.config import load_config
9
+ from fabri.core.agent import run_agent
10
+ from fabri.core.logging_setup import configure_logging
11
+ from fabri.memory.store import QdrantMemoryStore
12
+ from fabri.orchestrator.pipeline import process_trace
13
+ from fabri.runtime import build_llm, build_tool_defs, build_tools
14
+ from fabri.scaffold import next_steps, scaffold
15
+
16
+
17
+ def cmd_init(args: argparse.Namespace) -> None:
18
+ result = scaffold(args.dir, force=args.force)
19
+ where = "current directory" if args.dir in (".", "") else args.dir
20
+ if result["created"]:
21
+ print(f"Scaffolded a starter fabri project in {where}:")
22
+ for rel in result["created"]:
23
+ print(f" + {rel}")
24
+ if result["skipped"]:
25
+ print("\nLeft existing files untouched (pass --force to overwrite):")
26
+ for rel in result["skipped"]:
27
+ print(f" . {rel}")
28
+ print("\n" + next_steps(args.dir))
29
+
30
+
31
+ def _require_api_key(api_key_env: str) -> None:
32
+ if not os.environ.get(api_key_env):
33
+ print(
34
+ f"{api_key_env} is not set. Export it before running the live agent, "
35
+ f"e.g.: export {api_key_env}=sk-...",
36
+ file=sys.stderr,
37
+ )
38
+ sys.exit(1)
39
+
40
+
41
+ def cmd_run(args: argparse.Namespace) -> None:
42
+ config = load_config(args.config)
43
+ _require_api_key(config["llm"]["api_key_env"])
44
+ session_id = args.session_id or str(uuid.uuid4())
45
+ configure_logging(session_id, verbose=args.verbose)
46
+
47
+ mem_cfg = config["memory"]
48
+ store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
49
+
50
+ tools_cfg = config["tools"]
51
+ tools = build_tools(tools_cfg)
52
+
53
+ decompose_cfg = tools_cfg["decompose"]
54
+ llm = build_llm(config, build_tool_defs(tools, decompose_cfg))
55
+
56
+ result = run_agent(
57
+ args.task,
58
+ llm,
59
+ tools,
60
+ store,
61
+ session_id=session_id,
62
+ max_steps=config["agent"]["max_steps"],
63
+ top_k=mem_cfg["top_k"],
64
+ max_subquestions=decompose_cfg["max_subquestions"],
65
+ system_prompt=config["agent"].get("system_prompt", ""),
66
+ system_prompt_prefix=config["agent"].get("system_prompt_prefix", ""),
67
+ result_format=tools_cfg.get("result_format", "toon"),
68
+ output_format=config["agent"].get("output_format", "json"),
69
+ )
70
+ print(json.dumps(result, indent=2))
71
+
72
+ compress_llm = build_llm(config, [])
73
+ entries = process_trace(
74
+ session_id,
75
+ store,
76
+ compress_llm,
77
+ guideline_max_tokens=mem_cfg["guideline_max_tokens"],
78
+ similarity_threshold=mem_cfg["similarity_threshold"],
79
+ promotion_threshold_sessions=mem_cfg["promotion_threshold_sessions"],
80
+ )
81
+ if entries:
82
+ print(f"\nSynthesized {len(entries)} guideline(s) from this run:")
83
+ for e in entries:
84
+ print(f" [{e.kind}] {e.text}")
85
+
86
+
87
+ def cmd_ingest_traces(args: argparse.Namespace) -> None:
88
+ config = load_config(args.config)
89
+ _require_api_key(config["llm"]["api_key_env"])
90
+ configure_logging(args.session_id, verbose=args.verbose)
91
+ mem_cfg = config["memory"]
92
+ store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
93
+ llm = build_llm(config, [])
94
+ entries = process_trace(
95
+ args.session_id,
96
+ store,
97
+ llm,
98
+ guideline_max_tokens=mem_cfg["guideline_max_tokens"],
99
+ similarity_threshold=mem_cfg["similarity_threshold"],
100
+ promotion_threshold_sessions=mem_cfg["promotion_threshold_sessions"],
101
+ )
102
+ print(json.dumps([e.to_payload() for e in entries], indent=2))
103
+
104
+
105
+ def cmd_inspect_memory(args: argparse.Namespace) -> None:
106
+ config = load_config(args.config)
107
+ mem_cfg = config["memory"]
108
+ store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
109
+ print(f"tactical: {store.count(kind='tactical')}")
110
+ print(f"strategic: {store.count(kind='strategic')}")
111
+ if args.query:
112
+ for entry, score in store.query(args.query, top_k=args.top_k):
113
+ print(f" [{entry.kind}] ({score:.2f}) {entry.text}")
114
+
115
+
116
+ def cmd_admin_config(args: argparse.Namespace) -> None:
117
+ try:
118
+ require_admin(args.admin_token)
119
+ except AdminAuthError as e:
120
+ print(str(e), file=sys.stderr)
121
+ sys.exit(1)
122
+ config = load_config(args.config)
123
+ tools = build_tools(config["tools"])
124
+ print(json.dumps(describe_config(config, tools), indent=2))
125
+
126
+
127
+ def cmd_admin_dashboard(args: argparse.Namespace) -> None:
128
+ try:
129
+ require_admin(args.admin_token)
130
+ except AdminAuthError as e:
131
+ print(str(e), file=sys.stderr)
132
+ sys.exit(1)
133
+ config = load_config(args.config)
134
+ tools = build_tools(config["tools"])
135
+ mem_cfg = config["memory"]
136
+ store = QdrantMemoryStore(url=mem_cfg["qdrant_url"], collection=mem_cfg["collection"])
137
+ print(render_dashboard(config, tools, store))
138
+
139
+
140
+ def main() -> None:
141
+ parser = argparse.ArgumentParser(prog="fabri")
142
+ parser.add_argument("--verbose", action="store_true", help="Log at DEBUG level to the console")
143
+ parser.add_argument("--config", dest="config", default=None, help="Path to an agent.yaml config")
144
+ sub = parser.add_subparsers(dest="command", required=True)
145
+
146
+ p_init = sub.add_parser("init", help="Scaffold a starter fabri project (agent.yaml, tools, docker-compose)")
147
+ p_init.add_argument("dir", nargs="?", default=".", help="Target directory (default: current)")
148
+ p_init.add_argument("--force", action="store_true", help="Overwrite existing files")
149
+ p_init.set_defaults(func=cmd_init)
150
+
151
+ p_run = sub.add_parser("run", help="Run the agent on a task")
152
+ p_run.add_argument("task")
153
+ p_run.add_argument("--session-id", dest="session_id", default=None)
154
+ p_run.set_defaults(func=cmd_run)
155
+
156
+ p_ingest = sub.add_parser("ingest-traces", help="Synthesize guidelines from a session's trace")
157
+ p_ingest.add_argument("session_id")
158
+ p_ingest.set_defaults(func=cmd_ingest_traces)
159
+
160
+ p_inspect = sub.add_parser("inspect-memory", help="Inspect stored memory, optionally querying it")
161
+ p_inspect.add_argument("query", nargs="?", default=None)
162
+ p_inspect.add_argument("--top-k", dest="top_k", type=int, default=5)
163
+ p_inspect.set_defaults(func=cmd_inspect_memory)
164
+
165
+ # admin: config/dashboard inspection. Gated by require_admin() -- a stub
166
+ # shared-secret check (FABRI_ADMIN_TOKEN), not real auth. See admin.py.
167
+ p_admin = sub.add_parser("admin", help="Admin-only: inspect a config and its resolved tools/memory")
168
+ p_admin.add_argument("--admin-token", dest="admin_token", default=None)
169
+ admin_sub = p_admin.add_subparsers(dest="admin_command", required=True)
170
+
171
+ p_admin_config = admin_sub.add_parser("config", help="Print the merged config + resolved tool registry as JSON")
172
+ p_admin_config.set_defaults(func=cmd_admin_config)
173
+
174
+ p_admin_dash = admin_sub.add_parser("dashboard", help="Human-readable summary: agent, tools, memory counts")
175
+ p_admin_dash.set_defaults(func=cmd_admin_dashboard)
176
+
177
+ args = parser.parse_args()
178
+ args.func(args)
179
+
180
+
181
+ if __name__ == "__main__":
182
+ main()
fabri/config.py ADDED
@@ -0,0 +1,76 @@
1
+ from pathlib import Path
2
+
3
+ import yaml
4
+
5
+ from fabri.core.decompose import DEFAULT_MAX_SUBQUESTIONS
6
+ from fabri.memory.compress import DEFAULT_MAX_TOKENS
7
+ from fabri.memory.pruning import PROMOTION_THRESHOLD_SESSIONS, SIMILARITY_THRESHOLD
8
+ from fabri.memory.store import COLLECTION_NAME
9
+ from fabri.orchestrator.retrieval import DEFAULT_TOP_K
10
+
11
+ DEFAULT_TOOLS_DIR = Path(__file__).resolve().parent / "tools" / "examples"
12
+
13
+ DEFAULT_CONFIG = {
14
+ "agent": {
15
+ "name": "default",
16
+ "max_steps": 10,
17
+ # If `system_prompt` is set, it REPLACES the framework's generic
18
+ # boilerplate ("You are an autonomous agent..."). If `system_prompt_prefix`
19
+ # is set, it is prepended to whatever follows. Both empty = original
20
+ # behavior. Consuming projects use these to inject domain-specific
21
+ # identity, format contracts, few-shots, etc.
22
+ "system_prompt": "",
23
+ "system_prompt_prefix": "",
24
+ # Format the model is asked to PRODUCE structured output in (decompose).
25
+ # "json" is the reliable default; "toon" is opt-in (always json-fallback).
26
+ # Native tool-call arguments are always provider JSON regardless.
27
+ "output_format": "json",
28
+ },
29
+ "llm": {
30
+ "provider": "anthropic",
31
+ "model": "claude-sonnet-4-6",
32
+ "max_tokens": 1024,
33
+ "api_key_env": "ANTHROPIC_API_KEY",
34
+ },
35
+ "tools": {
36
+ "manifest_dir": str(DEFAULT_TOOLS_DIR),
37
+ "enabled": None,
38
+ "sandbox_root": ".",
39
+ "agents": [], # other agent.yaml configs exposed as tools -- see tools/agent_tool.py
40
+ # How tool results are serialized INTO the model's context. "toon" (default)
41
+ # saves input tokens; the framework encodes this end, so there's no model
42
+ # reliability risk. Set "json" to opt out.
43
+ "result_format": "toon",
44
+ "decompose": {"enabled": False, "max_subquestions": DEFAULT_MAX_SUBQUESTIONS},
45
+ },
46
+ "memory": {
47
+ "collection": COLLECTION_NAME,
48
+ "qdrant_url": "http://localhost:6333",
49
+ "top_k": DEFAULT_TOP_K,
50
+ "similarity_threshold": SIMILARITY_THRESHOLD,
51
+ "promotion_threshold_sessions": PROMOTION_THRESHOLD_SESSIONS,
52
+ "guideline_max_tokens": DEFAULT_MAX_TOKENS,
53
+ },
54
+ }
55
+
56
+
57
+ def _deep_merge(base: dict, override: dict) -> dict:
58
+ merged = dict(base)
59
+ for key, value in override.items():
60
+ if isinstance(value, dict) and isinstance(merged.get(key), dict):
61
+ merged[key] = _deep_merge(merged[key], value)
62
+ else:
63
+ merged[key] = value
64
+ return merged
65
+
66
+
67
+ def load_config(path: str | None) -> dict:
68
+ """Load an agent.yaml config, merged on top of DEFAULT_CONFIG so omitted
69
+ fields fall back to today's hardcoded behavior unchanged. `path=None`
70
+ returns the framework defaults as-is -- the same shape a consuming
71
+ project's own agent.yaml would produce, so callers don't special-case it."""
72
+ if path is None:
73
+ return DEFAULT_CONFIG
74
+ with open(path) as f:
75
+ user_config = yaml.safe_load(f) or {}
76
+ return _deep_merge(DEFAULT_CONFIG, user_config)
fabri/core/__init__.py ADDED
File without changes
fabri/core/agent.py ADDED
@@ -0,0 +1,229 @@
1
+ import json
2
+ import time
3
+ import uuid
4
+
5
+ from fabri.core.decompose import DEFAULT_MAX_SUBQUESTIONS, decompose
6
+ from fabri.core.llm import LLMBackend, LLMError, ToolCall
7
+ from fabri.core.logging_setup import get_logger
8
+ from fabri.toon import encode as toon_encode
9
+ from fabri.core.outcome import Outcome
10
+ from fabri.memory.store import QdrantMemoryStore
11
+ from fabri.orchestrator.retrieval import DEFAULT_TOP_K, retrieve_context
12
+ from fabri.orchestrator.traces import log_event
13
+ from fabri.tools.registry import ToolRegistry
14
+
15
+ MAX_STEPS = 10
16
+ DECOMPOSE_TOOL_NAME = "decompose"
17
+
18
+ logger = get_logger()
19
+
20
+
21
+ class AgentProtocolError(RuntimeError):
22
+ """Raised when an LLMBackend returns no tool calls and no usable final text
23
+ (None or empty) -- a malformed response that would otherwise silently burn
24
+ every remaining step before declaring INCOMPLETE with no diagnostic of why.
25
+ (An unrecoverable *provider* error is a different thing: core.llm.LLMError,
26
+ which the loop maps to Outcome.FAILED rather than raising.)"""
27
+
28
+
29
+ DEFAULT_AGENT_IDENTITY = "You are an autonomous agent. Use tools when needed, and stop once the task is done."
30
+
31
+ TOON_RESULT_NOTE = (
32
+ "Tool results are given to you in TOON, a compact format: objects are `key: value` "
33
+ "lines; arrays are `name[N]: v1,v2,...`, or a table `name[N]{f1,f2}:` followed by one "
34
+ "comma-separated row per element. Read it as structured data; keep calling tools and "
35
+ "answering normally."
36
+ )
37
+
38
+
39
+ def build_system_prompt(
40
+ context_block: str,
41
+ tool_descriptions: str,
42
+ *,
43
+ system_prompt: str = "",
44
+ system_prompt_prefix: str = "",
45
+ result_format: str = "json",
46
+ ) -> str:
47
+ # `system_prompt` (when set) replaces the framework's generic identity line
48
+ # entirely -- domain agents use this to inject "You are the story_agent..."
49
+ # with format pointers and few-shots. `system_prompt_prefix` (when set) is
50
+ # prepended verbatim; useful for global notes that apply across many configs.
51
+ # Both empty -> identical to pre-patch behavior.
52
+ identity = system_prompt or DEFAULT_AGENT_IDENTITY
53
+ parts = [
54
+ system_prompt_prefix,
55
+ identity,
56
+ f"Available tools:\n{tool_descriptions}" if tool_descriptions else "",
57
+ TOON_RESULT_NOTE if result_format == "toon" else "",
58
+ context_block,
59
+ ]
60
+ return "\n\n".join(p for p in parts if p)
61
+
62
+
63
+ def _encode_result(result: dict, result_format: str) -> str:
64
+ """Serialize a tool result for the model. TOON saves input tokens; we never
65
+ let an encode error break the loop -- fall back to JSON."""
66
+ if result_format == "toon":
67
+ try:
68
+ return toon_encode(result)
69
+ except Exception: # pragma: no cover - defensive, encode handles all JSON shapes
70
+ logger.warning("toon encode failed for a tool result; falling back to JSON")
71
+ return json.dumps(result)
72
+
73
+
74
+ def run_agent(
75
+ task: str,
76
+ llm: LLMBackend,
77
+ tools: ToolRegistry,
78
+ store: QdrantMemoryStore,
79
+ session_id: str | None = None,
80
+ max_steps: int = MAX_STEPS,
81
+ top_k: int = DEFAULT_TOP_K,
82
+ max_subquestions: int = DEFAULT_MAX_SUBQUESTIONS,
83
+ system_prompt: str = "",
84
+ system_prompt_prefix: str = "",
85
+ result_format: str = "toon",
86
+ output_format: str = "json",
87
+ ) -> dict:
88
+ # result_format: how tool results are serialized INTO the model's context
89
+ # (toon = fewer input tokens; we control this end so it's reliability-free).
90
+ # output_format: the format the model is asked to PRODUCE structured output in
91
+ # (decompose). Defaults to json for reliability; toon is opt-in and always
92
+ # falls back to json parsing. Native tool-call args are always provider JSON.
93
+ session_id = session_id or str(uuid.uuid4())
94
+ logger.info("agent run starting: task=%r session_id=%s", task, session_id)
95
+
96
+ context_block = retrieve_context(store, task, top_k=top_k, tool_names=[t.name for t in tools.list()])
97
+ tool_descriptions = "\n".join(f"- {t.name}: {t.description}" for t in tools.list())
98
+ system = build_system_prompt(
99
+ context_block,
100
+ tool_descriptions,
101
+ system_prompt=system_prompt,
102
+ system_prompt_prefix=system_prompt_prefix,
103
+ result_format=result_format,
104
+ )
105
+
106
+ log_event(session_id, {"type": "start", "task": task, "context_block": context_block})
107
+
108
+ messages = [{"role": "user", "content": task}]
109
+ final_text = None
110
+ success = False
111
+ failed = False
112
+ error_reason = None
113
+ had_tool_failure = False
114
+
115
+ for step_num in range(max_steps):
116
+ logger.debug("step %d: calling llm", step_num)
117
+ t0 = time.monotonic()
118
+ try:
119
+ response = llm.step(system, messages)
120
+ if response.tool_calls:
121
+ had_tool_failure |= _dispatch_tool_calls(
122
+ response.tool_calls, tools, llm, task, max_subquestions,
123
+ session_id, messages, step_num, result_format, output_format,
124
+ )
125
+ continue
126
+ except LLMError as e:
127
+ # Unrecoverable provider problem (API error, rate limit, truncated
128
+ # response), including one raised by a decompose() sub-call. End the
129
+ # run as FAILED rather than crashing the caller with a raw traceback.
130
+ failed = True
131
+ error_reason = str(e)
132
+ logger.error("step %d: unrecoverable llm error: %s", step_num, e)
133
+ log_event(session_id, {"type": "error", "reason": error_reason, "outcome": Outcome.FAILED.value})
134
+ break
135
+ logger.debug("step %d: llm responded in %.2fs", step_num, time.monotonic() - t0)
136
+
137
+ if response.final_text:
138
+ final_text = response.final_text
139
+ success = True
140
+ logger.info("step %d: final answer produced", step_num)
141
+ break
142
+
143
+ # No tool calls and no usable final text (empty or structurally
144
+ # malformed): raising beats silently burning every remaining step and
145
+ # then reporting an empty answer as success.
146
+ reason = "llm response had no tool calls and no final text"
147
+ logger.error("step %d: %s", step_num, reason)
148
+ log_event(session_id, {"type": "error", "reason": reason, "outcome": Outcome.FAILED.value})
149
+ raise AgentProtocolError(reason)
150
+
151
+ outcome = _classify_outcome(success, had_tool_failure, failed)
152
+ logger.info("agent run finished: outcome=%s session_id=%s", outcome.value, session_id)
153
+
154
+ if success:
155
+ log_event(session_id, {"type": "final", "text": final_text, "outcome": outcome.value})
156
+ elif failed:
157
+ log_event(session_id, {"type": "failed", "reason": error_reason, "outcome": outcome.value})
158
+ else:
159
+ log_event(session_id, {"type": "incomplete", "reason": "max steps reached", "outcome": outcome.value})
160
+
161
+ return {"session_id": session_id, "success": success, "final_text": final_text, "outcome": outcome.value}
162
+
163
+
164
+ def _dispatch_tool_calls(
165
+ calls: list[ToolCall],
166
+ tools: ToolRegistry,
167
+ llm: LLMBackend,
168
+ default_task: str,
169
+ max_subquestions: int,
170
+ session_id: str,
171
+ messages: list[dict],
172
+ step_num: int,
173
+ result_format: str = "toon",
174
+ output_format: str = "json",
175
+ ) -> bool:
176
+ """Run every tool call the model emitted this turn (a model may emit
177
+ several in parallel), then append exactly one assistant turn echoing all the
178
+ tool_use blocks and one user turn with all the matching tool_result blocks --
179
+ the Anthropic API rejects a tool_use that isn't paired with a tool_result.
180
+ Returns whether any call failed."""
181
+ had_failure = False
182
+ real_ids = all(c.id is not None for c in calls)
183
+ assistant_blocks, result_blocks = [], []
184
+ simple_calls, simple_results = [], []
185
+
186
+ for call in calls:
187
+ logger.info("step %d: dispatching tool %s args=%s", step_num, call.name, call.args)
188
+ t0 = time.monotonic()
189
+ if call.name == DECOMPOSE_TOOL_NAME:
190
+ result = decompose(
191
+ llm, call.args.get("task", default_task),
192
+ max_subquestions=max_subquestions, output_format=output_format,
193
+ )
194
+ else:
195
+ result = tools.invoke(call.name, call.args)
196
+ elapsed = time.monotonic() - t0
197
+ logger.info("step %d: tool %s returned ok=%s in %.2fs", step_num, call.name, result.get("ok"), elapsed)
198
+ if not result.get("ok"):
199
+ had_failure = True
200
+ logger.warning("step %d: tool %s failed: %s", step_num, call.name, result.get("error"))
201
+
202
+ log_event(session_id, {"type": "tool_call", "name": call.name, "args": call.args, "result": result})
203
+
204
+ # The trace keeps the raw dict; only the copy entering the model's context
205
+ # is TOON-encoded (or JSON), so token savings don't cost us a readable log.
206
+ encoded = _encode_result(result, result_format)
207
+ assistant_blocks.append({"type": "tool_use", "id": call.id, "name": call.name, "input": call.args})
208
+ result_blocks.append({"type": "tool_result", "tool_use_id": call.id, "content": encoded})
209
+ simple_calls.append(f"[tool_call:{call.name}]")
210
+ simple_results.append(f"[tool_result] {encoded}")
211
+
212
+ if real_ids:
213
+ # Real provider tool-use: echo the assistant's tool_use blocks verbatim,
214
+ # then one user turn carrying every correlated tool_result.
215
+ messages.append({"role": "assistant", "content": assistant_blocks})
216
+ messages.append({"role": "user", "content": result_blocks})
217
+ else:
218
+ # ScriptedLLMBackend / id-less path: plain strings are enough.
219
+ messages.append({"role": "assistant", "content": " ".join(simple_calls)})
220
+ messages.append({"role": "user", "content": " ".join(simple_results)})
221
+ return had_failure
222
+
223
+
224
+ def _classify_outcome(success: bool, had_tool_failure: bool, failed: bool) -> Outcome:
225
+ if failed:
226
+ return Outcome.FAILED
227
+ if not success:
228
+ return Outcome.INCOMPLETE
229
+ return Outcome.SUCCESS_WITH_RECOVERY if had_tool_failure else Outcome.SUCCESS
@@ -0,0 +1,65 @@
1
+ import json
2
+
3
+ from fabri import toon
4
+ from fabri.core.llm import LLMBackend
5
+
6
+ DEFAULT_MAX_SUBQUESTIONS = 5
7
+
8
+
9
+ def decompose(
10
+ llm: LLMBackend,
11
+ task: str,
12
+ max_subquestions: int = DEFAULT_MAX_SUBQUESTIONS,
13
+ output_format: str = "json",
14
+ ) -> dict:
15
+ """Ask the LLM (a separate step() call, not a recursive run_agent) to break a
16
+ research task into concrete sub-questions. Returns the same {ok, result}
17
+ shape tools.invoke() returns, so the caller's message-append and trace
18
+ logging stay unmodified -- this is structured planning, not a sub-agent.
19
+
20
+ `output_format` is the format the model is asked to emit. "json" is the
21
+ reliable default; "toon" is opt-in and saves a few output tokens, but we
22
+ always accept either on parse so a model that ignores the instruction (or
23
+ emits slightly-off TOON) still works."""
24
+ if output_format == "toon":
25
+ shape = "a TOON array of strings, e.g. `[3]: first question,second question,third`"
26
+ else:
27
+ shape = 'a JSON list of strings, e.g. ["first question", "second question"]'
28
+ prompt = (
29
+ f"Break this task into at most {max_subquestions} concrete, separately "
30
+ f"answerable sub-questions. Return ONLY {shape}.\n\nTask: {task}"
31
+ )
32
+ response = llm.step(
33
+ "You decompose research tasks into concrete sub-questions.",
34
+ [{"role": "user", "content": prompt}],
35
+ )
36
+ text = (response.final_text or "").strip()
37
+ subquestions = _parse_string_list(text, prefer=output_format)
38
+ if subquestions is None:
39
+ return {"ok": False, "error": f"decompose: malformed response: {text!r}"}
40
+ return {"ok": True, "result": {"subquestions": subquestions[:max_subquestions]}}
41
+
42
+
43
+ def _parse_string_list(text: str, prefer: str) -> list | None:
44
+ """Parse a list of strings from the model, trying the preferred format first
45
+ and falling back to the other -- a model may answer in either."""
46
+ parsers = [_try_toon, _try_json] if prefer == "toon" else [_try_json, _try_toon]
47
+ for parse in parsers:
48
+ value = parse(text)
49
+ if isinstance(value, list):
50
+ return value
51
+ return None
52
+
53
+
54
+ def _try_json(text: str):
55
+ try:
56
+ return json.loads(text)
57
+ except (json.JSONDecodeError, ValueError):
58
+ return None
59
+
60
+
61
+ def _try_toon(text: str):
62
+ try:
63
+ return toon.decode(text)
64
+ except Exception:
65
+ return None