agentforge-py 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agentforge/__init__.py +114 -0
- agentforge/_testing/__init__.py +19 -0
- agentforge/_testing/fake_llm.py +126 -0
- agentforge/_testing/fake_tool.py +122 -0
- agentforge/_tools/__init__.py +14 -0
- agentforge/_tools/calculator.py +102 -0
- agentforge/_tools/decorator.py +300 -0
- agentforge/_tools/file_read.py +112 -0
- agentforge/_tools/shell.py +134 -0
- agentforge/_tools/web_search.py +207 -0
- agentforge/agent.py +817 -0
- agentforge/auth.py +42 -0
- agentforge/cli/__init__.py +18 -0
- agentforge/cli/_build.py +323 -0
- agentforge/cli/_scaffold_state.py +250 -0
- agentforge/cli/_shared_scaffold.py +174 -0
- agentforge/cli/config_cmd.py +174 -0
- agentforge/cli/db_cmd.py +262 -0
- agentforge/cli/debug_cmd.py +168 -0
- agentforge/cli/docs_cmd.py +217 -0
- agentforge/cli/eval_cmd.py +181 -0
- agentforge/cli/health_cmd.py +139 -0
- agentforge/cli/list_modules.py +85 -0
- agentforge/cli/main.py +81 -0
- agentforge/cli/manifest_apply.py +368 -0
- agentforge/cli/module_cmd.py +247 -0
- agentforge/cli/new_cmd.py +171 -0
- agentforge/cli/run_cmd.py +234 -0
- agentforge/cli/upgrade_cmd.py +230 -0
- agentforge/config/__init__.py +45 -0
- agentforge/eval/__init__.py +18 -0
- agentforge/eval/consistency.py +107 -0
- agentforge/eval/coverage.py +100 -0
- agentforge/eval/format_compliance.py +107 -0
- agentforge/eval/regression.py +143 -0
- agentforge/findings.py +166 -0
- agentforge/guardrails/__init__.py +32 -0
- agentforge/guardrails/allowlist.py +49 -0
- agentforge/guardrails/capability_check.py +58 -0
- agentforge/guardrails/engine.py +289 -0
- agentforge/guardrails/pii_redact_basic.py +61 -0
- agentforge/guardrails/prompt_injection_basic.py +90 -0
- agentforge/memory/__init__.py +16 -0
- agentforge/memory/in_memory.py +130 -0
- agentforge/memory/in_memory_graph.py +262 -0
- agentforge/memory/in_memory_vector.py +167 -0
- agentforge/pipeline/__init__.py +26 -0
- agentforge/pipeline/engine.py +189 -0
- agentforge/pipeline/errors.py +19 -0
- agentforge/pipeline/tool.py +93 -0
- agentforge/py.typed +0 -0
- agentforge/recording.py +189 -0
- agentforge/renderers/__init__.py +28 -0
- agentforge/renderers/_defaults.py +32 -0
- agentforge/renderers/markdown.py +44 -0
- agentforge/renderers/patch_applier.py +46 -0
- agentforge/renderers/registry.py +108 -0
- agentforge/renderers/scorecard.py +59 -0
- agentforge/renderers/span_table.py +71 -0
- agentforge/replay.py +260 -0
- agentforge/resolver_register.py +41 -0
- agentforge/retrieval.py +410 -0
- agentforge/runtime.py +63 -0
- agentforge/strategies/__init__.py +27 -0
- agentforge/strategies/_base.py +280 -0
- agentforge/strategies/_plan.py +93 -0
- agentforge/strategies/multi_agent.py +541 -0
- agentforge/strategies/plan_execute.py +506 -0
- agentforge/strategies/react.py +237 -0
- agentforge/strategies/tot.py +472 -0
- agentforge/templates/_shared/.cursorrules +12 -0
- agentforge/templates/_shared/.github/copilot-instructions.md +13 -0
- agentforge/templates/_shared/.gitkeep +0 -0
- agentforge/templates/_shared/AGENTS.md.tmpl +123 -0
- agentforge/templates/_shared/CLAUDE.md +13 -0
- agentforge/templates/_shared/docs/runbooks/01-set-up-new-agent.md.tmpl +67 -0
- agentforge/templates/_shared/docs/runbooks/02-add-a-tool.md +67 -0
- agentforge/templates/_shared/docs/runbooks/03-add-a-pipeline-task.md +69 -0
- agentforge/templates/_shared/docs/runbooks/04-pick-reasoning-strategy.md +67 -0
- agentforge/templates/_shared/docs/runbooks/05-write-prompts.md +75 -0
- agentforge/templates/_shared/docs/runbooks/06-test-your-agent.md +75 -0
- agentforge/templates/_shared/docs/runbooks/07-debug-a-run.md +70 -0
- agentforge/templates/_shared/docs/runbooks/08-add-memory.md +75 -0
- agentforge/templates/_shared/docs/runbooks/09-add-mcp.md +78 -0
- agentforge/templates/_shared/docs/runbooks/10-add-evaluators.md +76 -0
- agentforge/templates/_shared/docs/runbooks/11-add-safety-guardrails.md +83 -0
- agentforge/templates/_shared/docs/runbooks/12-add-observability.md +77 -0
- agentforge/templates/_shared/docs/runbooks/13-configure-multi-provider.md +91 -0
- agentforge/templates/_shared/docs/runbooks/14-deploy-your-agent.md +70 -0
- agentforge/templates/_shared/docs/runbooks/15-upgrade-your-agent.md +67 -0
- agentforge/templates/_shared/docs/runbooks/16-configuration-reference.md +81 -0
- agentforge/templates/_shared/docs/runbooks/17-add-reranker.md +78 -0
- agentforge/templates/_shared/docs/runbooks/18-add-hybrid-search.md +78 -0
- agentforge/templates/_shared/docs/runbooks/19-add-graphrag.md +83 -0
- agentforge/templates/_shared/docs/runbooks/20-apply-schema-migrations.md +92 -0
- agentforge/templates/_shared/docs/runbooks/21-use-streaming-guardrails.md +82 -0
- agentforge/templates/_shared/docs/runbooks/README.md.tmpl +68 -0
- agentforge/templates/code-reviewer/.env.example +8 -0
- agentforge/templates/code-reviewer/.gitignore +7 -0
- agentforge/templates/code-reviewer/README.md +12 -0
- agentforge/templates/code-reviewer/agentforge.yaml +23 -0
- agentforge/templates/code-reviewer/copier.yml +34 -0
- agentforge/templates/code-reviewer/pyproject.toml +18 -0
- agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/code-reviewer/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
- agentforge/templates/docs-qa/.env.example +8 -0
- agentforge/templates/docs-qa/.gitignore +7 -0
- agentforge/templates/docs-qa/README.md +14 -0
- agentforge/templates/docs-qa/agentforge.yaml +19 -0
- agentforge/templates/docs-qa/copier.yml +31 -0
- agentforge/templates/docs-qa/pyproject.toml +18 -0
- agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/docs-qa/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
- agentforge/templates/minimal/.env.example +11 -0
- agentforge/templates/minimal/.gitignore +10 -0
- agentforge/templates/minimal/README.md +28 -0
- agentforge/templates/minimal/agentforge.yaml +10 -0
- agentforge/templates/minimal/copier.yml +52 -0
- agentforge/templates/minimal/pyproject.toml +18 -0
- agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/minimal/src/{{project_slug.replace('-', '_')}}/main.py +34 -0
- agentforge/templates/patch-bot/.env.example +8 -0
- agentforge/templates/patch-bot/.gitignore +7 -0
- agentforge/templates/patch-bot/README.md +13 -0
- agentforge/templates/patch-bot/agentforge.yaml +15 -0
- agentforge/templates/patch-bot/copier.yml +31 -0
- agentforge/templates/patch-bot/pyproject.toml +18 -0
- agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/patch-bot/src/{{project_slug.replace('-', '_')}}/main.py +32 -0
- agentforge/templates/research/.env.example +8 -0
- agentforge/templates/research/.gitignore +7 -0
- agentforge/templates/research/README.md +14 -0
- agentforge/templates/research/agentforge.yaml +17 -0
- agentforge/templates/research/copier.yml +31 -0
- agentforge/templates/research/pyproject.toml +18 -0
- agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/research/src/{{project_slug.replace('-', '_')}}/main.py +31 -0
- agentforge/templates/triage/.env.example +8 -0
- agentforge/templates/triage/.gitignore +7 -0
- agentforge/templates/triage/README.md +14 -0
- agentforge/templates/triage/agentforge.yaml +25 -0
- agentforge/templates/triage/copier.yml +31 -0
- agentforge/templates/triage/pyproject.toml +18 -0
- agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/__init__.py +5 -0
- agentforge/templates/triage/src/{{project_slug.replace('-', '_')}}/main.py +30 -0
- agentforge/testing/__init__.py +69 -0
- agentforge/testing/conformance.py +40 -0
- agentforge/testing/factory.py +89 -0
- agentforge/testing/fixtures.py +42 -0
- agentforge/testing/llm.py +235 -0
- agentforge/testing/recording.py +177 -0
- agentforge/tools/__init__.py +41 -0
- agentforge_py-0.2.1.dist-info/METADATA +158 -0
- agentforge_py-0.2.1.dist-info/RECORD +157 -0
- agentforge_py-0.2.1.dist-info/WHEEL +4 -0
- agentforge_py-0.2.1.dist-info/entry_points.txt +2 -0
- agentforge_py-0.2.1.dist-info/licenses/LICENSE +202 -0
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""`agentforge debug` — interactive replay REPL (feat-017 chunk 6).
|
|
2
|
+
|
|
3
|
+
agentforge debug --replay <RUN_ID> [--path agentforge.yaml]
|
|
4
|
+
|
|
5
|
+
Loads `category="__step"` claims for the run, exposes a `cmd.Cmd`
|
|
6
|
+
prompt with:
|
|
7
|
+
|
|
8
|
+
step / s advance to the next step
|
|
9
|
+
back / b rewind one step
|
|
10
|
+
state print the current step's payload
|
|
11
|
+
inspect FIELD print payload[FIELD] (dotted-path supported)
|
|
12
|
+
steps list all step kinds + indices
|
|
13
|
+
quit / q exit
|
|
14
|
+
|
|
15
|
+
No external dependencies — uses stdlib `cmd.Cmd`.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import asyncio
|
|
22
|
+
import cmd
|
|
23
|
+
import json
|
|
24
|
+
import sys
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import IO, Any
|
|
27
|
+
|
|
28
|
+
from agentforge.cli._build import build_memory_from_config
|
|
29
|
+
from agentforge.recording import STEP_CATEGORY
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def register_debug_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
|
|
33
|
+
parser = sub.add_parser(
|
|
34
|
+
"debug",
|
|
35
|
+
help="Interactive REPL to step through a recorded run.",
|
|
36
|
+
)
|
|
37
|
+
parser.add_argument("--replay", required=True, metavar="RUN_ID")
|
|
38
|
+
parser.add_argument("--path", type=Path, default=None)
|
|
39
|
+
parser.add_argument("--env", default=None)
|
|
40
|
+
parser.add_argument("--override", action="append", default=[])
|
|
41
|
+
parser.set_defaults(_handler=_debug_handler)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _debug_handler(args: argparse.Namespace) -> int:
|
|
45
|
+
return asyncio.run(_dispatch(args))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def _dispatch(args: argparse.Namespace) -> int:
|
|
49
|
+
from agentforge_core.config.loader import load_config # noqa: PLC0415
|
|
50
|
+
|
|
51
|
+
config = load_config(args.path, env=args.env, overrides=list(args.override) or None)
|
|
52
|
+
memory = build_memory_from_config(config)
|
|
53
|
+
if memory is None:
|
|
54
|
+
sys.stderr.write("agentforge debug: modules.memory must be configured.\n")
|
|
55
|
+
return 1
|
|
56
|
+
steps = await memory.query(category=STEP_CATEGORY, run_id=args.replay, limit=10_000)
|
|
57
|
+
if not steps:
|
|
58
|
+
sys.stderr.write(f"agentforge debug: no steps recorded for run_id={args.replay!r}.\n")
|
|
59
|
+
return 1
|
|
60
|
+
repl = _ReplayREPL([s.payload for s in steps])
|
|
61
|
+
repl.cmdloop()
|
|
62
|
+
return 0
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class _ReplayREPL(cmd.Cmd):
|
|
66
|
+
"""Interactive replay stepper. Output is plain text — no Rich."""
|
|
67
|
+
|
|
68
|
+
intro = "agentforge debug — recorded-run replay. Type 'help' for commands."
|
|
69
|
+
prompt = "(agentforge) "
|
|
70
|
+
|
|
71
|
+
def __init__(
|
|
72
|
+
self,
|
|
73
|
+
steps: list[dict[str, Any]],
|
|
74
|
+
*,
|
|
75
|
+
stdin: IO[str] | None = None,
|
|
76
|
+
stdout: IO[str] | None = None,
|
|
77
|
+
) -> None:
|
|
78
|
+
super().__init__(stdin=stdin, stdout=stdout)
|
|
79
|
+
self._steps = steps
|
|
80
|
+
self._cursor = 0
|
|
81
|
+
|
|
82
|
+
def do_step(self, arg: str) -> bool:
|
|
83
|
+
del arg
|
|
84
|
+
if self._cursor >= len(self._steps):
|
|
85
|
+
self._w("END of recording.\n")
|
|
86
|
+
return False
|
|
87
|
+
self._w(_format_step(self._cursor, self._steps[self._cursor]))
|
|
88
|
+
self._cursor += 1
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
do_s = do_step
|
|
92
|
+
|
|
93
|
+
def do_back(self, arg: str) -> bool:
|
|
94
|
+
del arg
|
|
95
|
+
if self._cursor <= 0:
|
|
96
|
+
self._w("at start.\n")
|
|
97
|
+
return False
|
|
98
|
+
self._cursor -= 1
|
|
99
|
+
self._w(_format_step(self._cursor, self._steps[self._cursor]))
|
|
100
|
+
return False
|
|
101
|
+
|
|
102
|
+
do_b = do_back
|
|
103
|
+
|
|
104
|
+
def do_state(self, arg: str) -> bool:
|
|
105
|
+
del arg
|
|
106
|
+
if self._cursor == 0:
|
|
107
|
+
self._w("no step entered yet.\n")
|
|
108
|
+
return False
|
|
109
|
+
idx = self._cursor - 1
|
|
110
|
+
self._w(json.dumps(self._steps[idx], indent=2) + "\n")
|
|
111
|
+
return False
|
|
112
|
+
|
|
113
|
+
def do_inspect(self, arg: str) -> bool:
|
|
114
|
+
if self._cursor == 0:
|
|
115
|
+
self._w("no step entered yet.\n")
|
|
116
|
+
return False
|
|
117
|
+
idx = self._cursor - 1
|
|
118
|
+
payload: Any = self._steps[idx]
|
|
119
|
+
for part in arg.split("."):
|
|
120
|
+
if not part:
|
|
121
|
+
continue
|
|
122
|
+
if isinstance(payload, dict) and part in payload:
|
|
123
|
+
payload = payload[part]
|
|
124
|
+
else:
|
|
125
|
+
self._w(f"no such field: {arg}\n")
|
|
126
|
+
return False
|
|
127
|
+
self._w(json.dumps(payload, indent=2) + "\n")
|
|
128
|
+
return False
|
|
129
|
+
|
|
130
|
+
def do_steps(self, arg: str) -> bool:
|
|
131
|
+
del arg
|
|
132
|
+
for i, s in enumerate(self._steps):
|
|
133
|
+
self._w(f" {i:3d} {s['kind']:<8} iter={s['iteration']}\n")
|
|
134
|
+
return False
|
|
135
|
+
|
|
136
|
+
def do_quit(self, arg: str) -> bool:
|
|
137
|
+
del arg
|
|
138
|
+
return True
|
|
139
|
+
|
|
140
|
+
do_q = do_quit
|
|
141
|
+
do_EOF = do_quit # noqa: N815 — cmd.Cmd protocol uses this exact name
|
|
142
|
+
|
|
143
|
+
def _w(self, text: str) -> None:
|
|
144
|
+
out = self.stdout or sys.stdout
|
|
145
|
+
out.write(text)
|
|
146
|
+
out.flush()
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
_CONTENT_PREVIEW_LEN = 80
|
|
150
|
+
_CONTENT_TRUNCATE_AT = 77
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _format_step(idx: int, payload: dict[str, Any]) -> str:
|
|
154
|
+
line = f"[{idx:3d}] kind={payload.get('kind')} iter={payload.get('iteration')}"
|
|
155
|
+
content = payload.get("content")
|
|
156
|
+
if isinstance(content, str):
|
|
157
|
+
preview = (
|
|
158
|
+
content
|
|
159
|
+
if len(content) <= _CONTENT_PREVIEW_LEN
|
|
160
|
+
else content[:_CONTENT_TRUNCATE_AT] + "..."
|
|
161
|
+
)
|
|
162
|
+
line += f" content={preview!r}"
|
|
163
|
+
elif content is not None:
|
|
164
|
+
line += f" content={type(content).__name__}"
|
|
165
|
+
return line + "\n"
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
__all__ = ["register_debug_cmd"]
|
|
@@ -0,0 +1,217 @@
|
|
|
1
|
+
"""`agentforge docs` — open and audit runbooks (feat-019 chunk 7).
|
|
2
|
+
|
|
3
|
+
Four subcommands:
|
|
4
|
+
|
|
5
|
+
- `docs` — interactive picker (lists every runbook, prompts for
|
|
6
|
+
the one to open).
|
|
7
|
+
- `docs <topic>` — open by name. Matches filename stem (e.g.
|
|
8
|
+
`02-add-a-tool`), bare number (`2`), or alias (`add-tool`,
|
|
9
|
+
`add-mcp`).
|
|
10
|
+
- `docs check` — diff local runbook content against the
|
|
11
|
+
framework's current bundle; report drift; suggest
|
|
12
|
+
`agentforge upgrade`.
|
|
13
|
+
- `docs serve` — local HTTP browser of the runbook tree.
|
|
14
|
+
|
|
15
|
+
The runbooks live under `docs/runbooks/` relative to the
|
|
16
|
+
project's working directory (overrideable via
|
|
17
|
+
`agentforge.yaml > docs.runbooks_path`). The framework's bundled
|
|
18
|
+
copies live inside the `agentforge` wheel — `docs check`
|
|
19
|
+
compares the two.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from __future__ import annotations
|
|
23
|
+
|
|
24
|
+
import argparse
|
|
25
|
+
import http.server
|
|
26
|
+
import os
|
|
27
|
+
import re
|
|
28
|
+
import socketserver
|
|
29
|
+
import subprocess # nosec B404 — opens user-chosen $EDITOR; argv list, no shell
|
|
30
|
+
import sys
|
|
31
|
+
from importlib import resources
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
from agentforge.cli._scaffold_state import _strip_marker_for_hash, hash_content
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def register_docs_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
|
|
38
|
+
"""Attach `agentforge docs` to the parent subparser action."""
|
|
39
|
+
parser = sub.add_parser(
|
|
40
|
+
"docs",
|
|
41
|
+
help="Open / list / audit project runbooks.",
|
|
42
|
+
description="Open and audit AgentForge runbooks shipped into this project.",
|
|
43
|
+
)
|
|
44
|
+
parser.add_argument(
|
|
45
|
+
"topic",
|
|
46
|
+
nargs="?",
|
|
47
|
+
default=None,
|
|
48
|
+
help=(
|
|
49
|
+
"Runbook to open. Matches filename stem (02-add-a-tool), "
|
|
50
|
+
"bare number (2), or alias (add-tool / add-memory)."
|
|
51
|
+
),
|
|
52
|
+
)
|
|
53
|
+
parser.add_argument(
|
|
54
|
+
"--path",
|
|
55
|
+
type=Path,
|
|
56
|
+
default=None,
|
|
57
|
+
help="Override the runbooks directory (default: ./docs/runbooks).",
|
|
58
|
+
)
|
|
59
|
+
parser.add_argument(
|
|
60
|
+
"--check",
|
|
61
|
+
action="store_true",
|
|
62
|
+
help="Compare local runbooks against the framework's bundle; report drift.",
|
|
63
|
+
)
|
|
64
|
+
parser.add_argument(
|
|
65
|
+
"--serve",
|
|
66
|
+
action="store_true",
|
|
67
|
+
help="Start a local HTTP browser of the runbook tree on port 8765.",
|
|
68
|
+
)
|
|
69
|
+
parser.set_defaults(_handler=_run_docs)
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _run_docs(args: argparse.Namespace) -> int:
|
|
73
|
+
runbooks_dir = args.path if args.path is not None else Path.cwd() / "docs" / "runbooks"
|
|
74
|
+
if not runbooks_dir.exists():
|
|
75
|
+
sys.stderr.write(
|
|
76
|
+
f"agentforge docs: {runbooks_dir} does not exist. "
|
|
77
|
+
"Scaffold via `agentforge new` to install runbooks.\n"
|
|
78
|
+
)
|
|
79
|
+
return 1
|
|
80
|
+
if args.check:
|
|
81
|
+
return _do_check(runbooks_dir)
|
|
82
|
+
if args.serve:
|
|
83
|
+
return _do_serve(runbooks_dir)
|
|
84
|
+
if args.topic is None:
|
|
85
|
+
return _do_list(runbooks_dir)
|
|
86
|
+
return _do_open(runbooks_dir, args.topic)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _do_list(runbooks_dir: Path) -> int:
|
|
90
|
+
"""Print every runbook in numeric order."""
|
|
91
|
+
for runbook in _scan(runbooks_dir):
|
|
92
|
+
print(f" {runbook.stem:<40} {runbook}")
|
|
93
|
+
return 0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _do_open(runbooks_dir: Path, topic: str) -> int:
|
|
97
|
+
"""Resolve `topic` to a single runbook and open it via $EDITOR / less."""
|
|
98
|
+
match = _resolve_topic(runbooks_dir, topic)
|
|
99
|
+
if match is None:
|
|
100
|
+
sys.stderr.write(
|
|
101
|
+
f"agentforge docs: no runbook matches {topic!r}. Try `agentforge docs` to list.\n"
|
|
102
|
+
)
|
|
103
|
+
return 1
|
|
104
|
+
editor = os.environ.get("EDITOR")
|
|
105
|
+
if editor:
|
|
106
|
+
return subprocess.run( # noqa: S603 # nosec B603 — $EDITOR is user's own
|
|
107
|
+
[editor, str(match)],
|
|
108
|
+
check=False,
|
|
109
|
+
).returncode
|
|
110
|
+
# No EDITOR — print to stdout so the developer can pipe to less.
|
|
111
|
+
sys.stdout.write(match.read_text(encoding="utf-8"))
|
|
112
|
+
return 0
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _do_check(runbooks_dir: Path) -> int:
|
|
116
|
+
"""Diff local runbook hashes against the framework's bundle."""
|
|
117
|
+
bundled = _bundled_runbooks_dir()
|
|
118
|
+
if bundled is None:
|
|
119
|
+
sys.stderr.write(
|
|
120
|
+
"agentforge docs check: framework bundle not found — "
|
|
121
|
+
"running from a non-standard install?\n"
|
|
122
|
+
)
|
|
123
|
+
return 1
|
|
124
|
+
drift: list[str] = []
|
|
125
|
+
for local in _scan(runbooks_dir):
|
|
126
|
+
rel = local.relative_to(runbooks_dir)
|
|
127
|
+
# Bundled file may carry `.tmpl` suffix; check both.
|
|
128
|
+
candidates = [bundled / rel, bundled / (str(rel) + ".tmpl")]
|
|
129
|
+
bundled_path = next((c for c in candidates if c.exists()), None)
|
|
130
|
+
if bundled_path is None:
|
|
131
|
+
drift.append(f" +local {rel}")
|
|
132
|
+
continue
|
|
133
|
+
local_hash = hash_content(_strip_marker_for_hash(local.read_text(encoding="utf-8")))
|
|
134
|
+
bundled_hash = hash_content(bundled_path.read_text(encoding="utf-8"))
|
|
135
|
+
if local_hash != bundled_hash:
|
|
136
|
+
drift.append(f" ~drift {rel}")
|
|
137
|
+
if drift:
|
|
138
|
+
print("Runbook drift detected:")
|
|
139
|
+
for line in drift:
|
|
140
|
+
print(line)
|
|
141
|
+
print("\nRun `agentforge upgrade` to merge framework updates.")
|
|
142
|
+
return 1
|
|
143
|
+
print("All runbooks in sync with framework bundle.")
|
|
144
|
+
return 0
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _do_serve(runbooks_dir: Path, port: int = 8765) -> int:
|
|
148
|
+
"""Start a basic HTTP server over the runbooks directory."""
|
|
149
|
+
handler_cls = http.server.SimpleHTTPRequestHandler
|
|
150
|
+
cwd = os.getcwd()
|
|
151
|
+
os.chdir(runbooks_dir)
|
|
152
|
+
try:
|
|
153
|
+
with socketserver.TCPServer(("127.0.0.1", port), handler_cls) as httpd:
|
|
154
|
+
sys.stdout.write(
|
|
155
|
+
f"agentforge docs: serving {runbooks_dir} at http://127.0.0.1:{port}/\n"
|
|
156
|
+
"Press Ctrl-C to stop.\n"
|
|
157
|
+
)
|
|
158
|
+
httpd.serve_forever()
|
|
159
|
+
except KeyboardInterrupt:
|
|
160
|
+
sys.stdout.write("\nstopped.\n")
|
|
161
|
+
finally:
|
|
162
|
+
os.chdir(cwd)
|
|
163
|
+
return 0
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def _scan(runbooks_dir: Path) -> list[Path]:
|
|
167
|
+
"""Walk the runbooks directory and return numbered runbooks in order."""
|
|
168
|
+
return sorted(p for p in runbooks_dir.glob("*.md") if _RUNBOOK_RE.match(p.name))
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
_RUNBOOK_RE = re.compile(r"^\d{2}-[a-z0-9-]+\.md$")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def _resolve_topic(runbooks_dir: Path, topic: str) -> Path | None:
|
|
175
|
+
"""Resolve `topic` to a runbook path.
|
|
176
|
+
|
|
177
|
+
Match precedence:
|
|
178
|
+
1. Exact filename stem (`02-add-a-tool`).
|
|
179
|
+
2. Bare number (`2` → `02-...`).
|
|
180
|
+
3. Alias (`add-tool` → matches any runbook whose body
|
|
181
|
+
(after the leading number) contains the alias).
|
|
182
|
+
"""
|
|
183
|
+
if topic.endswith(".md"):
|
|
184
|
+
candidate = runbooks_dir / topic
|
|
185
|
+
if candidate.exists():
|
|
186
|
+
return candidate
|
|
187
|
+
candidate = runbooks_dir / f"{topic}.md"
|
|
188
|
+
if candidate.exists():
|
|
189
|
+
return candidate
|
|
190
|
+
if topic.isdigit():
|
|
191
|
+
num = f"{int(topic):02d}-"
|
|
192
|
+
for p in _scan(runbooks_dir):
|
|
193
|
+
if p.name.startswith(num):
|
|
194
|
+
return p
|
|
195
|
+
for p in _scan(runbooks_dir):
|
|
196
|
+
# Drop the `NN-` prefix when comparing aliases.
|
|
197
|
+
body = p.stem.split("-", 1)[1] if "-" in p.stem else p.stem
|
|
198
|
+
if topic in body:
|
|
199
|
+
return p
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _bundled_runbooks_dir() -> Path | None:
|
|
204
|
+
"""Return the on-disk path of the framework's bundled runbooks."""
|
|
205
|
+
try:
|
|
206
|
+
traversable = resources.files("agentforge.templates").joinpath(
|
|
207
|
+
"_shared", "docs", "runbooks"
|
|
208
|
+
)
|
|
209
|
+
except ModuleNotFoundError:
|
|
210
|
+
return None
|
|
211
|
+
with resources.as_file(traversable) as path:
|
|
212
|
+
if not path.exists() or not path.is_dir():
|
|
213
|
+
return None
|
|
214
|
+
return Path(path)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
__all__ = ["register_docs_cmd"]
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
"""`agentforge eval` — run an agent against JSONL fixtures (feat-017 chunk 5).
|
|
2
|
+
|
|
3
|
+
Each fixture line:
|
|
4
|
+
|
|
5
|
+
{"task": "...", "expected": "...", "metadata": {...}}
|
|
6
|
+
|
|
7
|
+
The command builds the agent once, iterates fixtures, runs the agent
|
|
8
|
+
on each, aggregates per-evaluator scores, and threshold-checks the
|
|
9
|
+
mean. Output formats: rich (default), json, junit. Exit 5 on
|
|
10
|
+
threshold failure.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import asyncio
|
|
17
|
+
import json
|
|
18
|
+
import sys
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
from typing import Any
|
|
21
|
+
from xml.etree import (
|
|
22
|
+
ElementTree as ET, # nosec B405 — output only; never parses untrusted XML
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from pydantic import ValidationError
|
|
26
|
+
|
|
27
|
+
from agentforge.cli._build import load_and_build
|
|
28
|
+
from agentforge.cli.run_cmd import (
|
|
29
|
+
EXIT_CONFIG_INVALID,
|
|
30
|
+
EXIT_GENERIC,
|
|
31
|
+
EXIT_OK,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
EXIT_THRESHOLD = 5
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def register_eval_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
|
|
38
|
+
parser = sub.add_parser(
|
|
39
|
+
"eval",
|
|
40
|
+
help="Run an agent against JSONL fixtures and apply evaluators.",
|
|
41
|
+
)
|
|
42
|
+
parser.add_argument("--fixtures", type=Path, required=True, help="Path to JSONL fixtures.")
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--threshold",
|
|
45
|
+
type=float,
|
|
46
|
+
default=None,
|
|
47
|
+
help="Minimum mean score across all evaluators. Exit 5 if below.",
|
|
48
|
+
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--output-format",
|
|
51
|
+
choices=("rich", "json", "junit"),
|
|
52
|
+
default="rich",
|
|
53
|
+
)
|
|
54
|
+
parser.add_argument("--path", type=Path, default=None)
|
|
55
|
+
parser.add_argument("--env", default=None)
|
|
56
|
+
parser.add_argument("--override", action="append", default=[])
|
|
57
|
+
parser.set_defaults(_handler=_eval_handler)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _eval_handler(args: argparse.Namespace) -> int:
|
|
61
|
+
return asyncio.run(_dispatch(args))
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
async def _dispatch(args: argparse.Namespace) -> int:
|
|
65
|
+
try:
|
|
66
|
+
fixtures = _load_fixtures(args.fixtures)
|
|
67
|
+
except (OSError, json.JSONDecodeError) as exc:
|
|
68
|
+
sys.stderr.write(f"agentforge eval: failed to read fixtures: {exc}\n")
|
|
69
|
+
return EXIT_GENERIC
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
agent = await load_and_build(
|
|
73
|
+
path=args.path,
|
|
74
|
+
env=args.env,
|
|
75
|
+
overrides=list(args.override) or None,
|
|
76
|
+
)
|
|
77
|
+
except ValidationError as exc:
|
|
78
|
+
sys.stderr.write(f"agentforge eval: config invalid:\n{exc}\n")
|
|
79
|
+
return EXIT_CONFIG_INVALID
|
|
80
|
+
|
|
81
|
+
results: list[dict[str, Any]] = []
|
|
82
|
+
for fix in fixtures:
|
|
83
|
+
run_result = await agent.run(fix["task"])
|
|
84
|
+
results.append(
|
|
85
|
+
{
|
|
86
|
+
"task": fix["task"],
|
|
87
|
+
"expected": fix.get("expected"),
|
|
88
|
+
"output": run_result.output,
|
|
89
|
+
"scores": [score.model_dump(mode="json") for score in run_result.eval_scores],
|
|
90
|
+
"run_id": run_result.run_id,
|
|
91
|
+
}
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
mean = _mean_score(results)
|
|
95
|
+
fail = args.threshold is not None and mean < args.threshold
|
|
96
|
+
|
|
97
|
+
_emit(results, mean, args.threshold, args.output_format, fail=fail)
|
|
98
|
+
return EXIT_THRESHOLD if fail else EXIT_OK
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _load_fixtures(path: Path) -> list[dict[str, Any]]:
|
|
102
|
+
fixtures: list[dict[str, Any]] = []
|
|
103
|
+
for line in path.read_text(encoding="utf-8").splitlines():
|
|
104
|
+
stripped = line.strip()
|
|
105
|
+
if not stripped:
|
|
106
|
+
continue
|
|
107
|
+
fixtures.append(json.loads(stripped))
|
|
108
|
+
return fixtures
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _mean_score(results: list[dict[str, Any]]) -> float:
|
|
112
|
+
values: list[float] = []
|
|
113
|
+
for r in results:
|
|
114
|
+
for s in r["scores"]:
|
|
115
|
+
score = s.get("score")
|
|
116
|
+
if isinstance(score, int | float):
|
|
117
|
+
values.append(float(score))
|
|
118
|
+
return sum(values) / len(values) if values else 0.0
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _emit(
|
|
122
|
+
results: list[dict[str, Any]],
|
|
123
|
+
mean: float,
|
|
124
|
+
threshold: float | None,
|
|
125
|
+
fmt: str,
|
|
126
|
+
*,
|
|
127
|
+
fail: bool,
|
|
128
|
+
) -> None:
|
|
129
|
+
if fmt == "json":
|
|
130
|
+
print(
|
|
131
|
+
json.dumps(
|
|
132
|
+
{
|
|
133
|
+
"fixtures": len(results),
|
|
134
|
+
"mean_score": mean,
|
|
135
|
+
"threshold": threshold,
|
|
136
|
+
"passed": not fail,
|
|
137
|
+
"results": results,
|
|
138
|
+
},
|
|
139
|
+
indent=2,
|
|
140
|
+
)
|
|
141
|
+
)
|
|
142
|
+
return
|
|
143
|
+
if fmt == "junit":
|
|
144
|
+
print(_to_junit(results, mean, fail=fail))
|
|
145
|
+
return
|
|
146
|
+
# Rich-or-plain summary.
|
|
147
|
+
print(f"fixtures: {len(results)}")
|
|
148
|
+
print(f"mean_score: {mean:.4f}")
|
|
149
|
+
if threshold is not None:
|
|
150
|
+
print(f"threshold: {threshold:.4f} → {'FAIL' if fail else 'PASS'}")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _to_junit(results: list[dict[str, Any]], mean: float, *, fail: bool) -> str:
|
|
154
|
+
suite = ET.Element(
|
|
155
|
+
"testsuite",
|
|
156
|
+
attrib={
|
|
157
|
+
"name": "agentforge-eval",
|
|
158
|
+
"tests": str(len(results)),
|
|
159
|
+
"failures": "1" if fail else "0",
|
|
160
|
+
},
|
|
161
|
+
)
|
|
162
|
+
for i, r in enumerate(results):
|
|
163
|
+
case = ET.SubElement(
|
|
164
|
+
suite,
|
|
165
|
+
"testcase",
|
|
166
|
+
attrib={"name": f"fixture[{i}]", "classname": "agentforge.eval"},
|
|
167
|
+
)
|
|
168
|
+
for score in r["scores"]:
|
|
169
|
+
if score.get("score", 1.0) < 1.0:
|
|
170
|
+
f = ET.SubElement(case, "failure", attrib={"type": "score"})
|
|
171
|
+
f.text = json.dumps(score)
|
|
172
|
+
if fail:
|
|
173
|
+
f = ET.SubElement(
|
|
174
|
+
suite,
|
|
175
|
+
"system-err",
|
|
176
|
+
)
|
|
177
|
+
f.text = f"mean_score {mean:.4f} below threshold"
|
|
178
|
+
return ET.tostring(suite, encoding="unicode")
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
__all__ = ["register_eval_cmd"]
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
"""`agentforge health` — preflight checks (feat-017 chunk 8).
|
|
2
|
+
|
|
3
|
+
Renamed from the spec's `agentforge status` to avoid colliding with
|
|
4
|
+
the feat-011 scaffolding-state `agentforge status`. Checks:
|
|
5
|
+
|
|
6
|
+
1. Config loads + validates.
|
|
7
|
+
2. Every installed module resolvable via `Resolver.list_installed`.
|
|
8
|
+
3. Every backend declared under `modules.{memory,graph,retriever}`
|
|
9
|
+
reachable (instantiate, `__aenter__`/`close()`).
|
|
10
|
+
4. Provider construction is exercised as a no-API probe.
|
|
11
|
+
|
|
12
|
+
Exit codes: 0 all OK, 1 any FAIL, 2 config invalid.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import argparse
|
|
18
|
+
import asyncio
|
|
19
|
+
import json
|
|
20
|
+
import sys
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
from agentforge_core.config.loader import load_config
|
|
25
|
+
from agentforge_core.config.schema import AgentForgeConfig
|
|
26
|
+
from agentforge_core.production.exceptions import ModuleError
|
|
27
|
+
from agentforge_core.resolver import Resolver
|
|
28
|
+
from pydantic import ValidationError
|
|
29
|
+
|
|
30
|
+
from agentforge.cli._build import (
|
|
31
|
+
build_memory_from_config,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def register_health_cmd(sub: argparse._SubParsersAction) -> None: # type: ignore[type-arg]
|
|
36
|
+
parser = sub.add_parser(
|
|
37
|
+
"health",
|
|
38
|
+
help="Preflight: config valid, modules loadable, backends reachable.",
|
|
39
|
+
)
|
|
40
|
+
parser.add_argument("--path", type=Path, default=None)
|
|
41
|
+
parser.add_argument("--env", default=None)
|
|
42
|
+
parser.add_argument("--override", action="append", default=[])
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--output-format",
|
|
45
|
+
choices=("rich", "plain", "json"),
|
|
46
|
+
default="plain",
|
|
47
|
+
)
|
|
48
|
+
parser.set_defaults(_handler=_health_handler)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def _health_handler(args: argparse.Namespace) -> int:
|
|
52
|
+
return asyncio.run(_dispatch(args))
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
async def _dispatch(args: argparse.Namespace) -> int:
|
|
56
|
+
checks: list[dict[str, Any]] = []
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
config = load_config(args.path, env=args.env, overrides=list(args.override) or None)
|
|
60
|
+
checks.append({"name": "config", "kind": "config", "ok": True, "detail": "valid"})
|
|
61
|
+
except ValidationError as exc:
|
|
62
|
+
_emit([{"name": "config", "kind": "config", "ok": False, "detail": str(exc)}], args)
|
|
63
|
+
return 2
|
|
64
|
+
except ModuleError as exc:
|
|
65
|
+
_emit([{"name": "config", "kind": "config", "ok": False, "detail": str(exc)}], args)
|
|
66
|
+
return 2
|
|
67
|
+
|
|
68
|
+
checks.extend(_check_modules())
|
|
69
|
+
checks.extend(await _check_backends(config))
|
|
70
|
+
|
|
71
|
+
ok = all(c["ok"] for c in checks)
|
|
72
|
+
_emit(checks, args)
|
|
73
|
+
return 0 if ok else 1
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _check_modules() -> list[dict[str, Any]]:
|
|
77
|
+
"""Walk Resolver.list_installed and assert each module resolvable."""
|
|
78
|
+
out: list[dict[str, Any]] = []
|
|
79
|
+
resolver = Resolver.global_()
|
|
80
|
+
for info in resolver.list_installed():
|
|
81
|
+
try:
|
|
82
|
+
resolver.resolve(info.category, info.name)
|
|
83
|
+
except ModuleError as exc:
|
|
84
|
+
out.append(
|
|
85
|
+
{
|
|
86
|
+
"name": f"{info.category}:{info.name}",
|
|
87
|
+
"kind": "module",
|
|
88
|
+
"ok": False,
|
|
89
|
+
"detail": str(exc),
|
|
90
|
+
}
|
|
91
|
+
)
|
|
92
|
+
else:
|
|
93
|
+
out.append(
|
|
94
|
+
{
|
|
95
|
+
"name": f"{info.category}:{info.name}",
|
|
96
|
+
"kind": "module",
|
|
97
|
+
"ok": True,
|
|
98
|
+
"detail": "resolvable",
|
|
99
|
+
}
|
|
100
|
+
)
|
|
101
|
+
return out
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
async def _check_backends(config: AgentForgeConfig) -> list[dict[str, Any]]:
|
|
105
|
+
"""For each configured backend, attempt to instantiate + close."""
|
|
106
|
+
out: list[dict[str, Any]] = []
|
|
107
|
+
|
|
108
|
+
if config.modules.memory is not None:
|
|
109
|
+
out.append(await _probe("memory", lambda: build_memory_from_config(config)))
|
|
110
|
+
return out
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
async def _probe(label: str, factory: Any) -> dict[str, Any]:
|
|
114
|
+
try:
|
|
115
|
+
instance = factory()
|
|
116
|
+
if instance is None:
|
|
117
|
+
return {"name": label, "kind": "backend", "ok": True, "detail": "none configured"}
|
|
118
|
+
init = getattr(instance, "init_schema", None)
|
|
119
|
+
if callable(init):
|
|
120
|
+
await init()
|
|
121
|
+
close = getattr(instance, "close", None)
|
|
122
|
+
if callable(close):
|
|
123
|
+
await close()
|
|
124
|
+
except (ModuleError, OSError) as exc:
|
|
125
|
+
return {"name": label, "kind": "backend", "ok": False, "detail": str(exc)}
|
|
126
|
+
return {"name": label, "kind": "backend", "ok": True, "detail": "reachable"}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _emit(checks: list[dict[str, Any]], args: argparse.Namespace) -> None:
|
|
130
|
+
if args.output_format == "json":
|
|
131
|
+
ok = all(c["ok"] for c in checks)
|
|
132
|
+
print(json.dumps({"checks": checks, "ok": ok}, indent=2))
|
|
133
|
+
return
|
|
134
|
+
for c in checks:
|
|
135
|
+
status = "OK " if c["ok"] else "FAIL"
|
|
136
|
+
sys.stdout.write(f"{status} {c['kind']:<8} {c['name']:<32} {c['detail']}\n")
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
__all__ = ["register_health_cmd"]
|