agentforge-py 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. agentforge/__init__.py +114 -0
  2. agentforge/_testing/__init__.py +19 -0
  3. agentforge/_testing/fake_llm.py +126 -0
  4. agentforge/_testing/fake_tool.py +122 -0
  5. agentforge/_tools/__init__.py +14 -0
  6. agentforge/_tools/calculator.py +102 -0
  7. agentforge/_tools/decorator.py +300 -0
  8. agentforge/_tools/file_read.py +112 -0
  9. agentforge/_tools/shell.py +134 -0
  10. agentforge/_tools/web_search.py +207 -0
  11. agentforge/agent.py +817 -0
  12. agentforge/auth.py +42 -0
  13. agentforge/cli/__init__.py +18 -0
  14. agentforge/cli/_build.py +323 -0
  15. agentforge/cli/_scaffold_state.py +250 -0
  16. agentforge/cli/_shared_scaffold.py +174 -0
  17. agentforge/cli/config_cmd.py +174 -0
  18. agentforge/cli/db_cmd.py +262 -0
  19. agentforge/cli/debug_cmd.py +168 -0
  20. agentforge/cli/docs_cmd.py +217 -0
  21. agentforge/cli/eval_cmd.py +181 -0
  22. agentforge/cli/health_cmd.py +139 -0
  23. agentforge/cli/list_modules.py +85 -0
  24. agentforge/cli/main.py +81 -0
  25. agentforge/cli/manifest_apply.py +368 -0
  26. agentforge/cli/module_cmd.py +247 -0
  27. agentforge/cli/new_cmd.py +171 -0
  28. agentforge/cli/run_cmd.py +234 -0
  29. agentforge/cli/upgrade_cmd.py +230 -0
  30. agentforge/config/__init__.py +45 -0
  31. agentforge/eval/__init__.py +18 -0
  32. agentforge/eval/consistency.py +107 -0
  33. agentforge/eval/coverage.py +100 -0
  34. agentforge/eval/format_compliance.py +107 -0
  35. agentforge/eval/regression.py +143 -0
  36. agentforge/findings.py +166 -0
  37. agentforge/guardrails/__init__.py +32 -0
  38. agentforge/guardrails/allowlist.py +49 -0
  39. agentforge/guardrails/capability_check.py +58 -0
  40. agentforge/guardrails/engine.py +289 -0
  41. agentforge/guardrails/pii_redact_basic.py +61 -0
  42. agentforge/guardrails/prompt_injection_basic.py +90 -0
  43. agentforge/memory/__init__.py +16 -0
  44. agentforge/memory/in_memory.py +130 -0
  45. agentforge/memory/in_memory_graph.py +262 -0
  46. agentforge/memory/in_memory_vector.py +167 -0
  47. agentforge/pipeline/__init__.py +26 -0
  48. agentforge/pipeline/engine.py +189 -0
  49. agentforge/pipeline/errors.py +19 -0
  50. agentforge/pipeline/tool.py +93 -0
  51. agentforge/py.typed +0 -0
  52. agentforge/recording.py +189 -0
  53. agentforge/renderers/__init__.py +28 -0
  54. agentforge/renderers/_defaults.py +32 -0
  55. agentforge/renderers/markdown.py +44 -0
  56. agentforge/renderers/patch_applier.py +46 -0
  57. agentforge/renderers/registry.py +108 -0
  58. agentforge/renderers/scorecard.py +59 -0
  59. agentforge/renderers/span_table.py +71 -0
  60. agentforge/replay.py +260 -0
  61. agentforge/resolver_register.py +41 -0
  62. agentforge/retrieval.py +410 -0
  63. agentforge/runtime.py +63 -0
  64. agentforge/strategies/__init__.py +27 -0
  65. agentforge/strategies/_base.py +280 -0
  66. agentforge/strategies/_plan.py +93 -0
  67. agentforge/strategies/multi_agent.py +541 -0
  68. agentforge/strategies/plan_execute.py +506 -0
  69. agentforge/strategies/react.py +237 -0
  70. agentforge/strategies/tot.py +472 -0
  71. agentforge/templates/_shared/.cursorrules +12 -0
  72. agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
  73. agentforge/templates/_shared/.gitkeep +0 -0
  74. agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
  75. agentforge/templates/_shared/CLAUDE.md +13 -0
  76. agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
  77. agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
  78. agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
  79. agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
  80. agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
  81. agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
  82. agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
  83. agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
  84. agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
  85. agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
  86. agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
  87. agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
  88. agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
  89. agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
  90. agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
  91. agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
  92. agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
  93. agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
  94. agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
  95. agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
  96. agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
  97. agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
  98. agentforge/templates/code-reviewer/.env.example +8 -0
  99. agentforge/templates/code-reviewer/.gitignore +7 -0
  100. agentforge/templates/code-reviewer/README.md +12 -0
  101. agentforge/templates/code-reviewer/agentforge.yaml +23 -0
  102. agentforge/templates/code-reviewer/copier.yml +34 -0
  103. agentforge/templates/code-reviewer/pyproject.toml +18 -0
  104. agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  105. agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  106. agentforge/templates/docs-qa/.env.example +8 -0
  107. agentforge/templates/docs-qa/.gitignore +7 -0
  108. agentforge/templates/docs-qa/README.md +14 -0
  109. agentforge/templates/docs-qa/agentforge.yaml +19 -0
  110. agentforge/templates/docs-qa/copier.yml +31 -0
  111. agentforge/templates/docs-qa/pyproject.toml +18 -0
  112. agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  113. agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  114. agentforge/templates/minimal/.env.example +11 -0
  115. agentforge/templates/minimal/.gitignore +10 -0
  116. agentforge/templates/minimal/README.md +28 -0
  117. agentforge/templates/minimal/agentforge.yaml +10 -0
  118. agentforge/templates/minimal/copier.yml +52 -0
  119. agentforge/templates/minimal/pyproject.toml +18 -0
  120. agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  121. agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
  122. agentforge/templates/patch-bot/.env.example +8 -0
  123. agentforge/templates/patch-bot/.gitignore +7 -0
  124. agentforge/templates/patch-bot/README.md +13 -0
  125. agentforge/templates/patch-bot/agentforge.yaml +15 -0
  126. agentforge/templates/patch-bot/copier.yml +31 -0
  127. agentforge/templates/patch-bot/pyproject.toml +18 -0
  128. agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  129. agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
  130. agentforge/templates/research/.env.example +8 -0
  131. agentforge/templates/research/.gitignore +7 -0
  132. agentforge/templates/research/README.md +14 -0
  133. agentforge/templates/research/agentforge.yaml +17 -0
  134. agentforge/templates/research/copier.yml +31 -0
  135. agentforge/templates/research/pyproject.toml +18 -0
  136. agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  137. agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
  138. agentforge/templates/triage/.env.example +8 -0
  139. agentforge/templates/triage/.gitignore +7 -0
  140. agentforge/templates/triage/README.md +14 -0
  141. agentforge/templates/triage/agentforge.yaml +25 -0
  142. agentforge/templates/triage/copier.yml +31 -0
  143. agentforge/templates/triage/pyproject.toml +18 -0
  144. agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
  145. agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
  146. agentforge/testing/__init__.py +69 -0
  147. agentforge/testing/conformance.py +40 -0
  148. agentforge/testing/factory.py +89 -0
  149. agentforge/testing/fixtures.py +42 -0
  150. agentforge/testing/llm.py +235 -0
  151. agentforge/testing/recording.py +177 -0
  152. agentforge/tools/__init__.py +41 -0
  153. agentforge_py-0.2.1.dist-info/METADATA +158 -0
  154. agentforge_py-0.2.1.dist-info/RECORD +157 -0
  155. agentforge_py-0.2.1.dist-info/WHEEL +4 -0
  156. agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
  157. agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0
@@ -0,0 +1,168 @@
1
+ """`agentforge debug` — interactive replay REPL (feat-017 chunk 6).
2
+
3
+ agentforge debug --replay <RUN_ID> [--path agentforge.yaml]
4
+
5
+ Loads `category="__step"` claims for the run, exposes a `cmd.Cmd`
6
+ prompt with:
7
+
8
+ step / s advance to the next step
9
+ back / b rewind one step
10
+ state print the current step's payload
11
+ inspect FIELD print payload[FIELD] (dotted-path supported)
12
+ steps list all step kinds + indices
13
+ quit / q exit
14
+
15
+ No external dependencies — uses stdlib `cmd.Cmd`.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import asyncio
22
+ import cmd
23
+ import json
24
+ import sys
25
+ from pathlib import Path
26
+ from typing import IO, Any
27
+
28
+ from agentforge.cli._build import build_memory_from_config
29
+ from agentforge.recording import STEP_CATEGORY
30
+
31
+
32
+ def register_debug_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
33
+ parser = sub.add_parser(
34
+ "debug",
35
+ help="Interactive REPL to step through a recorded run.",
36
+ )
37
+ parser.add_argument("--replay", required=True, metavar="RUN_ID")
38
+ parser.add_argument("--path", type=Path, default=None)
39
+ parser.add_argument("--env", default=None)
40
+ parser.add_argument("--override", action="append", default=[])
41
+ parser.set_defaults(_handler=_debug_handler)
42
+
43
+
44
+ def _debug_handler(args: argparse.Namespace) -> int:
45
+ return asyncio.run(_dispatch(args))
46
+
47
+
48
+ async def _dispatch(args: argparse.Namespace) -> int:
49
+ from agentforge_core.config.loader import load_config # noqa: PLC0415
50
+
51
+ config = load_config(args.path, env=args.env, overrides=list(args.override) or None)
52
+ memory = build_memory_from_config(config)
53
+ if memory is None:
54
+ sys.stderr.write("agentforge debug: modules.memory must be configured.\n")
55
+ return 1
56
+ steps = await memory.query(category=STEP_CATEGORY, run_id=args.replay, limit=10_000)
57
+ if not steps:
58
+ sys.stderr.write(f"agentforge debug: no steps recorded for run_id={args.replay!r}.\n")
59
+ return 1
60
+ repl = _ReplayREPL([s.payload for s in steps])
61
+ repl.cmdloop()
62
+ return 0
63
+
64
+
65
+ class _ReplayREPL(cmd.Cmd):
66
+ """Interactive replay stepper. Output is plain text — no Rich."""
67
+
68
+ intro = "agentforge debug — recorded-run replay. Type 'help' for commands."
69
+ prompt = "(agentforge) "
70
+
71
+ def __init__(
72
+ self,
73
+ steps: list[dict[str, Any]],
74
+ *,
75
+ stdin: IO[str] | None = None,
76
+ stdout: IO[str] | None = None,
77
+ ) -> None:
78
+ super().__init__(stdin=stdin, stdout=stdout)
79
+ self._steps = steps
80
+ self._cursor = 0
81
+
82
+ def do_step(self, arg: str) -> bool:
83
+ del arg
84
+ if self._cursor >= len(self._steps):
85
+ self._w("END of recording.\n")
86
+ return False
87
+ self._w(_format_step(self._cursor, self._steps[self._cursor]))
88
+ self._cursor += 1
89
+ return False
90
+
91
+ do_s = do_step
92
+
93
+ def do_back(self, arg: str) -> bool:
94
+ del arg
95
+ if self._cursor <= 0:
96
+ self._w("at start.\n")
97
+ return False
98
+ self._cursor -= 1
99
+ self._w(_format_step(self._cursor, self._steps[self._cursor]))
100
+ return False
101
+
102
+ do_b = do_back
103
+
104
+ def do_state(self, arg: str) -> bool:
105
+ del arg
106
+ if self._cursor == 0:
107
+ self._w("no step entered yet.\n")
108
+ return False
109
+ idx = self._cursor - 1
110
+ self._w(json.dumps(self._steps[idx], indent=2) + "\n")
111
+ return False
112
+
113
+ def do_inspect(self, arg: str) -> bool:
114
+ if self._cursor == 0:
115
+ self._w("no step entered yet.\n")
116
+ return False
117
+ idx = self._cursor - 1
118
+ payload: Any = self._steps[idx]
119
+ for part in arg.split("."):
120
+ if not part:
121
+ continue
122
+ if isinstance(payload, dict) and part in payload:
123
+ payload = payload[part]
124
+ else:
125
+ self._w(f"no such field: {arg}\n")
126
+ return False
127
+ self._w(json.dumps(payload, indent=2) + "\n")
128
+ return False
129
+
130
+ def do_steps(self, arg: str) -> bool:
131
+ del arg
132
+ for i, s in enumerate(self._steps):
133
+ self._w(f" {i:3d} {s['kind']:<8} iter={s['iteration']}\n")
134
+ return False
135
+
136
+ def do_quit(self, arg: str) -> bool:
137
+ del arg
138
+ return True
139
+
140
+ do_q = do_quit
141
+ do_EOF = do_quit # noqa: N815 — cmd.Cmd protocol uses this exact name
142
+
143
+ def _w(self, text: str) -> None:
144
+ out = self.stdout or sys.stdout
145
+ out.write(text)
146
+ out.flush()
147
+
148
+
149
+ _CONTENT_PREVIEW_LEN = 80
150
+ _CONTENT_TRUNCATE_AT = 77
151
+
152
+
153
+ def _format_step(idx: int, payload: dict[str, Any]) -> str:
154
+ line = f"[{idx:3d}] kind={payload.get('kind')} iter={payload.get('iteration')}"
155
+ content = payload.get("content")
156
+ if isinstance(content, str):
157
+ preview = (
158
+ content
159
+ if len(content) <= _CONTENT_PREVIEW_LEN
160
+ else content[:_CONTENT_TRUNCATE_AT] + "..."
161
+ )
162
+ line += f" content={preview!r}"
163
+ elif content is not None:
164
+ line += f" content={type(content).__name__}"
165
+ return line + "\n"
166
+
167
+
168
+ __all__ = ["register_debug_cmd"]
@@ -0,0 +1,217 @@
1
+ """`agentforge docs` — open and audit runbooks (feat-019 chunk 7).
2
+
3
+ Four subcommands:
4
+
5
+ - `docs` — interactive picker (lists every runbook, prompts for
6
+ the one to open).
7
+ - `docs <topic>` — open by name. Matches filename stem (e.g.
8
+ `02-add-a-tool`), bare number (`2`), or alias (`add-tool`,
9
+ `add-mcp`).
10
+ - `docs check` — diff local runbook content against the
11
+ framework's current bundle; report drift; suggest
12
+ `agentforge upgrade`.
13
+ - `docs serve` — local HTTP browser of the runbook tree.
14
+
15
+ The runbooks live under `docs/runbooks/` relative to the
16
+ project's working directory (overrideable via
17
+ `agentforge.yaml > docs.runbooks_path`). The framework's bundled
18
+ copies live inside the `agentforge` wheel — `docs check`
19
+ compares the two.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import argparse
25
+ import http.server
26
+ import os
27
+ import re
28
+ import socketserver
29
+ import subprocess # nosec B404 — opens user-chosen $EDITOR; argv list, no shell
30
+ import sys
31
+ from importlib import resources
32
+ from pathlib import Path
33
+
34
+ from agentforge.cli._scaffold_state import _strip_marker_for_hash, hash_content
35
+
36
+
37
+ def register_docs_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
38
+ """Attach `agentforge docs` to the parent subparser action."""
39
+ parser = sub.add_parser(
40
+ "docs",
41
+ help="Open / list / audit project runbooks.",
42
+ description="Open and audit AgentForge runbooks shipped into this project.",
43
+ )
44
+ parser.add_argument(
45
+ "topic",
46
+ nargs="?",
47
+ default=None,
48
+ help=(
49
+ "Runbook to open. Matches filename stem (02-add-a-tool), "
50
+ "bare number (2), or alias (add-tool / add-memory)."
51
+ ),
52
+ )
53
+ parser.add_argument(
54
+ "--path",
55
+ type=Path,
56
+ default=None,
57
+ help="Override the runbooks directory (default: ./docs/runbooks).",
58
+ )
59
+ parser.add_argument(
60
+ "--check",
61
+ action="store_true",
62
+ help="Compare local runbooks against the framework's bundle; report drift.",
63
+ )
64
+ parser.add_argument(
65
+ "--serve",
66
+ action="store_true",
67
+ help="Start a local HTTP browser of the runbook tree on port 8765.",
68
+ )
69
+ parser.set_defaults(_handler=_run_docs)
70
+
71
+
72
+ def _run_docs(args: argparse.Namespace) -> int:
73
+ runbooks_dir = args.path if args.path is not None else Path.cwd() / "docs" / "runbooks"
74
+ if not runbooks_dir.exists():
75
+ sys.stderr.write(
76
+ f"agentforge docs: {runbooks_dir} does not exist. "
77
+ "Scaffold via `agentforge new` to install runbooks.\n"
78
+ )
79
+ return 1
80
+ if args.check:
81
+ return _do_check(runbooks_dir)
82
+ if args.serve:
83
+ return _do_serve(runbooks_dir)
84
+ if args.topic is None:
85
+ return _do_list(runbooks_dir)
86
+ return _do_open(runbooks_dir, args.topic)
87
+
88
+
89
+ def _do_list(runbooks_dir: Path) -> int:
90
+ """Print every runbook in numeric order."""
91
+ for runbook in _scan(runbooks_dir):
92
+ print(f" {runbook.stem:<40} {runbook}")
93
+ return 0
94
+
95
+
96
+ def _do_open(runbooks_dir: Path, topic: str) -> int:
97
+ """Resolve `topic` to a single runbook and open it via $EDITOR / less."""
98
+ match = _resolve_topic(runbooks_dir, topic)
99
+ if match is None:
100
+ sys.stderr.write(
101
+ f"agentforge docs: no runbook matches {topic!r}. Try `agentforge docs` to list.\n"
102
+ )
103
+ return 1
104
+ editor = os.environ.get("EDITOR")
105
+ if editor:
106
+ return subprocess.run( # noqa: S603 # nosec B603 — $EDITOR is user's own
107
+ [editor, str(match)],
108
+ check=False,
109
+ ).returncode
110
+ # No EDITOR — print to stdout so the developer can pipe to less.
111
+ sys.stdout.write(match.read_text(encoding="utf-8"))
112
+ return 0
113
+
114
+
115
+ def _do_check(runbooks_dir: Path) -> int:
116
+ """Diff local runbook hashes against the framework's bundle."""
117
+ bundled = _bundled_runbooks_dir()
118
+ if bundled is None:
119
+ sys.stderr.write(
120
+ "agentforge docs check: framework bundle not found — "
121
+ "running from a non-standard install?\n"
122
+ )
123
+ return 1
124
+ drift: list[str] = []
125
+ for local in _scan(runbooks_dir):
126
+ rel = local.relative_to(runbooks_dir)
127
+ # Bundled file may carry `.tmpl` suffix; check both.
128
+ candidates = [bundled / rel, bundled / (str(rel) + ".tmpl")]
129
+ bundled_path = next((c for c in candidates if c.exists()), None)
130
+ if bundled_path is None:
131
+ drift.append(f" +local {rel}")
132
+ continue
133
+ local_hash = hash_content(_strip_marker_for_hash(local.read_text(encoding="utf-8")))
134
+ bundled_hash = hash_content(bundled_path.read_text(encoding="utf-8"))
135
+ if local_hash != bundled_hash:
136
+ drift.append(f" ~drift {rel}")
137
+ if drift:
138
+ print("Runbook drift detected:")
139
+ for line in drift:
140
+ print(line)
141
+ print("\nRun `agentforge upgrade` to merge framework updates.")
142
+ return 1
143
+ print("All runbooks in sync with framework bundle.")
144
+ return 0
145
+
146
+
147
+ def _do_serve(runbooks_dir: Path, port: int = 8765) -> int:
148
+ """Start a basic HTTP server over the runbooks directory."""
149
+ handler_cls = http.server.SimpleHTTPRequestHandler
150
+ cwd = os.getcwd()
151
+ os.chdir(runbooks_dir)
152
+ try:
153
+ with socketserver.TCPServer(("127.0.0.1", port), handler_cls) as httpd:
154
+ sys.stdout.write(
155
+ f"agentforge docs: serving {runbooks_dir} at http://127.0.0.1:{port}/\n"
156
+ "Press Ctrl-C to stop.\n"
157
+ )
158
+ httpd.serve_forever()
159
+ except KeyboardInterrupt:
160
+ sys.stdout.write("\nstopped.\n")
161
+ finally:
162
+ os.chdir(cwd)
163
+ return 0
164
+
165
+
166
+ def _scan(runbooks_dir: Path) -> list[Path]:
167
+ """Walk the runbooks directory and return numbered runbooks in order."""
168
+ return sorted(p for p in runbooks_dir.glob("*.md") if _RUNBOOK_RE.match(p.name))
169
+
170
+
171
+ _RUNBOOK_RE = re.compile(r"^\d{2}-[a-z0-9-]+\.md$")
172
+
173
+
174
+ def _resolve_topic(runbooks_dir: Path, topic: str) -> Path | None:
175
+ """Resolve `topic` to a runbook path.
176
+
177
+ Match precedence:
178
+ 1. Exact filename stem (`02-add-a-tool`).
179
+ 2. Bare number (`2` → `02-...`).
180
+ 3. Alias (`add-tool` → matches any runbook whose body
181
+ (after the leading number) contains the alias).
182
+ """
183
+ if topic.endswith(".md"):
184
+ candidate = runbooks_dir / topic
185
+ if candidate.exists():
186
+ return candidate
187
+ candidate = runbooks_dir / f"{topic}.md"
188
+ if candidate.exists():
189
+ return candidate
190
+ if topic.isdigit():
191
+ num = f"{int(topic):02d}-"
192
+ for p in _scan(runbooks_dir):
193
+ if p.name.startswith(num):
194
+ return p
195
+ for p in _scan(runbooks_dir):
196
+ # Drop the `NN-` prefix when comparing aliases.
197
+ body = p.stem.split("-", 1)[1] if "-" in p.stem else p.stem
198
+ if topic in body:
199
+ return p
200
+ return None
201
+
202
+
203
+ def _bundled_runbooks_dir() -> Path | None:
204
+ """Return the on-disk path of the framework's bundled runbooks."""
205
+ try:
206
+ traversable = resources.files("agentforge.templates").joinpath(
207
+ "_shared", "docs", "runbooks"
208
+ )
209
+ except ModuleNotFoundError:
210
+ return None
211
+ with resources.as_file(traversable) as path:
212
+ if not path.exists() or not path.is_dir():
213
+ return None
214
+ return Path(path)
215
+
216
+
217
+ __all__ = ["register_docs_cmd"]
@@ -0,0 +1,181 @@
1
+ """`agentforge eval` — run an agent against JSONL fixtures (feat-017 chunk 5).
2
+
3
+ Each fixture line:
4
+
5
+ {"task": "...", "expected": "...", "metadata": {...}}
6
+
7
+ The command builds the agent once, iterates fixtures, runs the agent
8
+ on each, aggregates per-evaluator scores, and threshold-checks the
9
+ mean. Output formats: rich (default), json, junit. Exit 5 on
10
+ threshold failure.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import asyncio
17
+ import json
18
+ import sys
19
+ from pathlib import Path
20
+ from typing import Any
21
+ from xml.etree import (
22
+ ElementTree as ET, # nosec B405 — output only; never parses untrusted XML
23
+ )
24
+
25
+ from pydantic import ValidationError
26
+
27
+ from agentforge.cli._build import load_and_build
28
+ from agentforge.cli.run_cmd import (
29
+ EXIT_CONFIG_INVALID,
30
+ EXIT_GENERIC,
31
+ EXIT_OK,
32
+ )
33
+
34
+ EXIT_THRESHOLD = 5
35
+
36
+
37
+ def register_eval_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
38
+ parser = sub.add_parser(
39
+ "eval",
40
+ help="Run an agent against JSONL fixtures and apply evaluators.",
41
+ )
42
+ parser.add_argument("--fixtures", type=Path, required=True, help="Path to JSONL fixtures.")
43
+ parser.add_argument(
44
+ "--threshold",
45
+ type=float,
46
+ default=None,
47
+ help="Minimum mean score across all evaluators. Exit 5 if below.",
48
+ )
49
+ parser.add_argument(
50
+ "--output-format",
51
+ choices=("rich", "json", "junit"),
52
+ default="rich",
53
+ )
54
+ parser.add_argument("--path", type=Path, default=None)
55
+ parser.add_argument("--env", default=None)
56
+ parser.add_argument("--override", action="append", default=[])
57
+ parser.set_defaults(_handler=_eval_handler)
58
+
59
+
60
+ def _eval_handler(args: argparse.Namespace) -> int:
61
+ return asyncio.run(_dispatch(args))
62
+
63
+
64
+ async def _dispatch(args: argparse.Namespace) -> int:
65
+ try:
66
+ fixtures = _load_fixtures(args.fixtures)
67
+ except (OSError, json.JSONDecodeError) as exc:
68
+ sys.stderr.write(f"agentforge eval: failed to read fixtures: {exc}\n")
69
+ return EXIT_GENERIC
70
+
71
+ try:
72
+ agent = await load_and_build(
73
+ path=args.path,
74
+ env=args.env,
75
+ overrides=list(args.override) or None,
76
+ )
77
+ except ValidationError as exc:
78
+ sys.stderr.write(f"agentforge eval: config invalid:\n{exc}\n")
79
+ return EXIT_CONFIG_INVALID
80
+
81
+ results: list[dict[str, Any]] = []
82
+ for fix in fixtures:
83
+ run_result = await agent.run(fix["task"])
84
+ results.append(
85
+ {
86
+ "task": fix["task"],
87
+ "expected": fix.get("expected"),
88
+ "output": run_result.output,
89
+ "scores": [score.model_dump(mode="json") for score in run_result.eval_scores],
90
+ "run_id": run_result.run_id,
91
+ }
92
+ )
93
+
94
+ mean = _mean_score(results)
95
+ fail = args.threshold is not None and mean < args.threshold
96
+
97
+ _emit(results, mean, args.threshold, args.output_format, fail=fail)
98
+ return EXIT_THRESHOLD if fail else EXIT_OK
99
+
100
+
101
+ def _load_fixtures(path: Path) -> list[dict[str, Any]]:
102
+ fixtures: list[dict[str, Any]] = []
103
+ for line in path.read_text(encoding="utf-8").splitlines():
104
+ stripped = line.strip()
105
+ if not stripped:
106
+ continue
107
+ fixtures.append(json.loads(stripped))
108
+ return fixtures
109
+
110
+
111
+ def _mean_score(results: list[dict[str, Any]]) -> float:
112
+ values: list[float] = []
113
+ for r in results:
114
+ for s in r["scores"]:
115
+ score = s.get("score")
116
+ if isinstance(score, int | float):
117
+ values.append(float(score))
118
+ return sum(values) / len(values) if values else 0.0
119
+
120
+
121
+ def _emit(
122
+ results: list[dict[str, Any]],
123
+ mean: float,
124
+ threshold: float | None,
125
+ fmt: str,
126
+ *,
127
+ fail: bool,
128
+ ) -> None:
129
+ if fmt == "json":
130
+ print(
131
+ json.dumps(
132
+ {
133
+ "fixtures": len(results),
134
+ "mean_score": mean,
135
+ "threshold": threshold,
136
+ "passed": not fail,
137
+ "results": results,
138
+ },
139
+ indent=2,
140
+ )
141
+ )
142
+ return
143
+ if fmt == "junit":
144
+ print(_to_junit(results, mean, fail=fail))
145
+ return
146
+ # Rich-or-plain summary.
147
+ print(f"fixtures: {len(results)}")
148
+ print(f"mean_score: {mean:.4f}")
149
+ if threshold is not None:
150
+ print(f"threshold: {threshold:.4f} → {'FAIL' if fail else 'PASS'}")
151
+
152
+
153
+ def _to_junit(results: list[dict[str, Any]], mean: float, *, fail: bool) -> str:
154
+ suite = ET.Element(
155
+ "testsuite",
156
+ attrib={
157
+ "name": "agentforge-eval",
158
+ "tests": str(len(results)),
159
+ "failures": "1" if fail else "0",
160
+ },
161
+ )
162
+ for i, r in enumerate(results):
163
+ case = ET.SubElement(
164
+ suite,
165
+ "testcase",
166
+ attrib={"name": f"fixture[{i}]", "classname": "agentforge.eval"},
167
+ )
168
+ for score in r["scores"]:
169
+ if score.get("score", 1.0) < 1.0:
170
+ f = ET.SubElement(case, "failure", attrib={"type": "score"})
171
+ f.text = json.dumps(score)
172
+ if fail:
173
+ f = ET.SubElement(
174
+ suite,
175
+ "system-err",
176
+ )
177
+ f.text = f"mean_score {mean:.4f} below threshold"
178
+ return ET.tostring(suite, encoding="unicode")
179
+
180
+
181
+ __all__ = ["register_eval_cmd"]
@@ -0,0 +1,139 @@
1
+ """`agentforge health` — preflight checks (feat-017 chunk 8).
2
+
3
+ Renamed from the spec's `agentforge status` to avoid colliding with
4
+ the feat-011 scaffolding-state `agentforge status`. Checks:
5
+
6
+ 1. Config loads + validates.
7
+ 2. Every installed module resolvable via `Resolver.list_installed`.
8
+ 3. Every backend declared under `modules.{memory,graph,retriever}`
9
+ reachable (instantiate, `__aenter__`/`close()`).
10
+ 4. Provider construction is exercised as a no-API probe.
11
+
12
+ Exit codes: 0 all OK, 1 any FAIL, 2 config invalid.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import argparse
18
+ import asyncio
19
+ import json
20
+ import sys
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ from agentforge_core.config.loader import load_config
25
+ from agentforge_core.config.schema import AgentForgeConfig
26
+ from agentforge_core.production.exceptions import ModuleError
27
+ from agentforge_core.resolver import Resolver
28
+ from pydantic import ValidationError
29
+
30
+ from agentforge.cli._build import (
31
+ build_memory_from_config,
32
+ )
33
+
34
+
35
+ def register_health_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
36
+ parser = sub.add_parser(
37
+ "health",
38
+ help="Preflight: config valid, modules loadable, backends reachable.",
39
+ )
40
+ parser.add_argument("--path", type=Path, default=None)
41
+ parser.add_argument("--env", default=None)
42
+ parser.add_argument("--override", action="append", default=[])
43
+ parser.add_argument(
44
+ "--output-format",
45
+ choices=("rich", "plain", "json"),
46
+ default="plain",
47
+ )
48
+ parser.set_defaults(_handler=_health_handler)
49
+
50
+
51
+ def _health_handler(args: argparse.Namespace) -> int:
52
+ return asyncio.run(_dispatch(args))
53
+
54
+
55
+ async def _dispatch(args: argparse.Namespace) -> int:
56
+ checks: list[dict[str, Any]] = []
57
+
58
+ try:
59
+ config = load_config(args.path, env=args.env, overrides=list(args.override) or None)
60
+ checks.append({"name": "config", "kind": "config", "ok": True, "detail": "valid"})
61
+ except ValidationError as exc:
62
+ _emit([{"name": "config", "kind": "config", "ok": False, "detail": str(exc)}], args)
63
+ return 2
64
+ except ModuleError as exc:
65
+ _emit([{"name": "config", "kind": "config", "ok": False, "detail": str(exc)}], args)
66
+ return 2
67
+
68
+ checks.extend(_check_modules())
69
+ checks.extend(await _check_backends(config))
70
+
71
+ ok = all(c["ok"] for c in checks)
72
+ _emit(checks, args)
73
+ return 0 if ok else 1
74
+
75
+
76
+ def _check_modules() -> list[dict[str, Any]]:
77
+ """Walk Resolver.list_installed and assert each module resolvable."""
78
+ out: list[dict[str, Any]] = []
79
+ resolver = Resolver.global_()
80
+ for info in resolver.list_installed():
81
+ try:
82
+ resolver.resolve(info.category, info.name)
83
+ except ModuleError as exc:
84
+ out.append(
85
+ {
86
+ "name": f"{info.category}:{info.name}",
87
+ "kind": "module",
88
+ "ok": False,
89
+ "detail": str(exc),
90
+ }
91
+ )
92
+ else:
93
+ out.append(
94
+ {
95
+ "name": f"{info.category}:{info.name}",
96
+ "kind": "module",
97
+ "ok": True,
98
+ "detail": "resolvable",
99
+ }
100
+ )
101
+ return out
102
+
103
+
104
+ async def _check_backends(config: AgentForgeConfig) -> list[dict[str, Any]]:
105
+ """For each configured backend, attempt to instantiate + close."""
106
+ out: list[dict[str, Any]] = []
107
+
108
+ if config.modules.memory is not None:
109
+ out.append(await _probe("memory", lambda: build_memory_from_config(config)))
110
+ return out
111
+
112
+
113
+ async def _probe(label: str, factory: Any) -> dict[str, Any]:
114
+ try:
115
+ instance = factory()
116
+ if instance is None:
117
+ return {"name": label, "kind": "backend", "ok": True, "detail": "none configured"}
118
+ init = getattr(instance, "init_schema", None)
119
+ if callable(init):
120
+ await init()
121
+ close = getattr(instance, "close", None)
122
+ if callable(close):
123
+ await close()
124
+ except (ModuleError, OSError) as exc:
125
+ return {"name": label, "kind": "backend", "ok": False, "detail": str(exc)}
126
+ return {"name": label, "kind": "backend", "ok": True, "detail": "reachable"}
127
+
128
+
129
+ def _emit(checks: list[dict[str, Any]], args: argparse.Namespace) -> None:
130
+ if args.output_format == "json":
131
+ ok = all(c["ok"] for c in checks)
132
+ print(json.dumps({"checks": checks, "ok": ok}, indent=2))
133
+ return
134
+ for c in checks:
135
+ status = "OK " if c["ok"] else "FAIL"
136
+ sys.stdout.write(f"{status} {c['kind']:<8} {c['name']:<32} {c['detail']}\n")
137
+
138
+
139
+ __all__ = ["register_health_cmd"]