agent-tool-forge 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/lib/agent-registry.js +170 -0
- package/lib/api-client.js +792 -0
- package/lib/api-loader.js +260 -0
- package/lib/auth.d.ts +25 -0
- package/lib/auth.js +158 -0
- package/lib/checks/check-adapter.js +172 -0
- package/lib/checks/compose.js +42 -0
- package/lib/checks/content-match.js +14 -0
- package/lib/checks/cost-budget.js +11 -0
- package/lib/checks/index.js +18 -0
- package/lib/checks/json-valid.js +15 -0
- package/lib/checks/latency.js +11 -0
- package/lib/checks/length-bounds.js +17 -0
- package/lib/checks/negative-match.js +14 -0
- package/lib/checks/no-hallucinated-numbers.js +63 -0
- package/lib/checks/non-empty.js +34 -0
- package/lib/checks/regex-match.js +12 -0
- package/lib/checks/run-checks.js +84 -0
- package/lib/checks/schema-match.js +26 -0
- package/lib/checks/tool-call-count.js +16 -0
- package/lib/checks/tool-selection.js +34 -0
- package/lib/checks/types.js +45 -0
- package/lib/comparison/compare.js +86 -0
- package/lib/comparison/format.js +104 -0
- package/lib/comparison/index.js +6 -0
- package/lib/comparison/statistics.js +59 -0
- package/lib/comparison/types.js +41 -0
- package/lib/config-schema.js +200 -0
- package/lib/config.d.ts +66 -0
- package/lib/conversation-store.d.ts +77 -0
- package/lib/conversation-store.js +443 -0
- package/lib/db.d.ts +6 -0
- package/lib/db.js +1112 -0
- package/lib/dep-check.js +99 -0
- package/lib/drift-background.js +61 -0
- package/lib/drift-monitor.js +187 -0
- package/lib/eval-runner.js +566 -0
- package/lib/fixtures/fixture-store.js +161 -0
- package/lib/fixtures/index.js +11 -0
- package/lib/forge-engine.js +982 -0
- package/lib/forge-eval-generator.js +417 -0
- package/lib/forge-file-writer.js +386 -0
- package/lib/forge-service-client.js +190 -0
- package/lib/forge-service.d.ts +4 -0
- package/lib/forge-service.js +655 -0
- package/lib/forge-verifier-generator.js +271 -0
- package/lib/handlers/admin.js +151 -0
- package/lib/handlers/agents.js +229 -0
- package/lib/handlers/chat-resume.js +334 -0
- package/lib/handlers/chat-sync.js +320 -0
- package/lib/handlers/chat.js +320 -0
- package/lib/handlers/conversations.js +92 -0
- package/lib/handlers/preferences.js +88 -0
- package/lib/handlers/tools-list.js +58 -0
- package/lib/hitl-engine.d.ts +60 -0
- package/lib/hitl-engine.js +261 -0
- package/lib/http-utils.js +92 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.js +141 -0
- package/lib/init.js +636 -0
- package/lib/manual-entry.js +59 -0
- package/lib/mcp-server.js +252 -0
- package/lib/output-groups.js +54 -0
- package/lib/postgres-store.d.ts +31 -0
- package/lib/postgres-store.js +465 -0
- package/lib/preference-store.d.ts +47 -0
- package/lib/preference-store.js +79 -0
- package/lib/prompt-store.d.ts +42 -0
- package/lib/prompt-store.js +60 -0
- package/lib/rate-limiter.d.ts +30 -0
- package/lib/rate-limiter.js +104 -0
- package/lib/react-engine.d.ts +110 -0
- package/lib/react-engine.js +337 -0
- package/lib/runner/cli.js +156 -0
- package/lib/runner/cost-estimator.js +71 -0
- package/lib/runner/gate.js +46 -0
- package/lib/runner/index.js +165 -0
- package/lib/sidecar.d.ts +83 -0
- package/lib/sidecar.js +161 -0
- package/lib/sse.d.ts +15 -0
- package/lib/sse.js +30 -0
- package/lib/tools-scanner.js +91 -0
- package/lib/tui.js +253 -0
- package/lib/verifier-report.js +78 -0
- package/lib/verifier-runner.js +338 -0
- package/lib/verifier-scanner.js +70 -0
- package/lib/verifier-worker-pool.js +196 -0
- package/lib/views/chat.js +340 -0
- package/lib/views/endpoints.js +203 -0
- package/lib/views/eval-run.js +206 -0
- package/lib/views/forge-agent.js +538 -0
- package/lib/views/forge.js +410 -0
- package/lib/views/main-menu.js +275 -0
- package/lib/views/mediation.js +381 -0
- package/lib/views/model-compare.js +430 -0
- package/lib/views/model-comparison.js +333 -0
- package/lib/views/onboarding.js +470 -0
- package/lib/views/performance.js +237 -0
- package/lib/views/run-evals.js +205 -0
- package/lib/views/settings.js +829 -0
- package/lib/views/tools-evals.js +514 -0
- package/lib/views/verifier-coverage.js +617 -0
- package/lib/workers/verifier-worker.js +52 -0
- package/package.json +123 -0
- package/widget/forge-chat.js +789 -0
|
@@ -0,0 +1,205 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Run Evals View — Browse and run eval files directly from the TUI.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import blessed from 'blessed';
|
|
6
|
+
import { existsSync, readdirSync } from 'node:fs';
|
|
7
|
+
import { resolve, basename } from 'node:path';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Find eval files in the configured evalsDir.
|
|
11
|
+
* @param {object} config
|
|
12
|
+
* @returns {string[]} - array of file paths
|
|
13
|
+
*/
|
|
14
|
+
function findEvalFiles(config) {
|
|
15
|
+
const evalsDir = resolve(process.cwd(), config?.project?.evalsDir || 'docs/examples');
|
|
16
|
+
if (!existsSync(evalsDir)) return [];
|
|
17
|
+
try {
|
|
18
|
+
return readdirSync(evalsDir)
|
|
19
|
+
.filter(f => f.endsWith('.golden.json') || f.endsWith('.labeled.json'))
|
|
20
|
+
.map(f => resolve(evalsDir, f));
|
|
21
|
+
} catch {
|
|
22
|
+
return [];
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export function createView({ screen, content, config, navigate, setFooter }) {
|
|
27
|
+
const container = blessed.box({
|
|
28
|
+
top: 0, left: 0, width: '100%', height: '100%', tags: true
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
// Title
|
|
32
|
+
const title = blessed.box({
|
|
33
|
+
parent: container,
|
|
34
|
+
top: 0, left: 0, width: '100%', height: 3,
|
|
35
|
+
tags: true,
|
|
36
|
+
border: { type: 'line' },
|
|
37
|
+
style: { border: { fg: 'blue' } },
|
|
38
|
+
align: 'center',
|
|
39
|
+
valign: 'middle',
|
|
40
|
+
content: ' {bold}{cyan-fg}▲ Run Evals{/cyan-fg}{/bold} '
|
|
41
|
+
});
|
|
42
|
+
|
|
43
|
+
// Eval file list
|
|
44
|
+
const list = blessed.list({
|
|
45
|
+
parent: container,
|
|
46
|
+
top: 3, left: 2,
|
|
47
|
+
width: '50%-2', height: '100%-6',
|
|
48
|
+
tags: true, keys: true, vi: true, mouse: true,
|
|
49
|
+
border: { type: 'line' },
|
|
50
|
+
style: {
|
|
51
|
+
border: { fg: 'blue' },
|
|
52
|
+
selected: { bg: '#1a3a5c', bold: true },
|
|
53
|
+
item: { fg: 'white' }
|
|
54
|
+
},
|
|
55
|
+
label: ' Eval Files '
|
|
56
|
+
});
|
|
57
|
+
|
|
58
|
+
// Results pane
|
|
59
|
+
const resultsBox = blessed.box({
|
|
60
|
+
parent: container,
|
|
61
|
+
top: 3, left: '50%',
|
|
62
|
+
width: '50%', height: '100%-6',
|
|
63
|
+
tags: true, scrollable: true, alwaysScroll: true,
|
|
64
|
+
border: { type: 'line' },
|
|
65
|
+
style: { border: { fg: 'blue' } },
|
|
66
|
+
label: ' Results ',
|
|
67
|
+
padding: { left: 1, right: 1 }
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
const statusBar = blessed.box({
|
|
71
|
+
parent: container,
|
|
72
|
+
bottom: 2, left: 0, width: '100%', height: 1,
|
|
73
|
+
tags: true,
|
|
74
|
+
content: ''
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
setFooter(
|
|
78
|
+
' {cyan-fg}↑↓{/cyan-fg} select {cyan-fg}Enter{/cyan-fg} run {cyan-fg}r{/cyan-fg} record {cyan-fg}p{/cyan-fg} replay {cyan-fg}b{/cyan-fg} back'
|
|
79
|
+
);
|
|
80
|
+
|
|
81
|
+
let evalFiles = [];
|
|
82
|
+
let running = false;
|
|
83
|
+
|
|
84
|
+
function loadFiles() {
|
|
85
|
+
evalFiles = findEvalFiles(config);
|
|
86
|
+
if (evalFiles.length === 0) {
|
|
87
|
+
list.setItems([' {#888888-fg}No eval files found{/#888888-fg}']);
|
|
88
|
+
} else {
|
|
89
|
+
list.setItems(evalFiles.map(f => {
|
|
90
|
+
const name = basename(f);
|
|
91
|
+
return ` ${name}`;
|
|
92
|
+
}));
|
|
93
|
+
}
|
|
94
|
+
screen.render();
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async function runSelected(mode) {
|
|
98
|
+
if (running || evalFiles.length === 0) return;
|
|
99
|
+
const idx = list.selected;
|
|
100
|
+
if (idx < 0 || idx >= evalFiles.length) return;
|
|
101
|
+
|
|
102
|
+
const evalPath = evalFiles[idx];
|
|
103
|
+
const fileName = basename(evalPath);
|
|
104
|
+
|
|
105
|
+
running = true;
|
|
106
|
+
statusBar.setContent(` {yellow-fg}⟳ Running ${fileName}...{/yellow-fg}`);
|
|
107
|
+
resultsBox.setContent('Running...');
|
|
108
|
+
screen.render();
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
const { runEvalSuite } = await import('../runner/index.js');
|
|
112
|
+
const agentConfig = config?.agent ?? {};
|
|
113
|
+
|
|
114
|
+
if (!agentConfig.endpoint) {
|
|
115
|
+
resultsBox.setContent(
|
|
116
|
+
'{red-fg}No agent.endpoint configured.{/red-fg}\n\n' +
|
|
117
|
+
'Add to forge.config.json:\n' +
|
|
118
|
+
'{\n "agent": {\n "endpoint": "http://localhost:8001/agent-api/chat-sync"\n }\n}'
|
|
119
|
+
);
|
|
120
|
+
statusBar.setContent(' {red-fg}✗ Configuration error{/red-fg}');
|
|
121
|
+
screen.render();
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
const method = agentConfig.method ?? 'POST';
|
|
126
|
+
const headers = { 'Content-Type': 'application/json', ...(agentConfig.headers ?? {}) };
|
|
127
|
+
const inputField = agentConfig.inputField ?? 'message';
|
|
128
|
+
const outputField = agentConfig.outputField ?? 'text';
|
|
129
|
+
|
|
130
|
+
const agentFn = async (message) => {
|
|
131
|
+
const t0 = Date.now();
|
|
132
|
+
const res = await fetch(agentConfig.endpoint, {
|
|
133
|
+
method, headers, body: JSON.stringify({ [inputField]: message })
|
|
134
|
+
});
|
|
135
|
+
if (!res.ok) throw new Error(`Agent returned ${res.status}`);
|
|
136
|
+
const data = await res.json();
|
|
137
|
+
return { responseText: data[outputField] ?? '', toolsCalled: data.toolsCalled ?? [], latencyMs: Date.now() - t0 };
|
|
138
|
+
};
|
|
139
|
+
|
|
140
|
+
const fixturesDir = resolve(process.cwd(), config?.fixtures?.dir ?? '.forge-fixtures');
|
|
141
|
+
const ttlDays = config?.fixtures?.ttlDays ?? 30;
|
|
142
|
+
const gates = config?.gates ?? {};
|
|
143
|
+
|
|
144
|
+
const summary = await runEvalSuite(evalPath, agentFn, {
|
|
145
|
+
record: mode === 'record',
|
|
146
|
+
replay: mode === 'replay',
|
|
147
|
+
fixturesDir,
|
|
148
|
+
ttlDays,
|
|
149
|
+
gates,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
const { total, passed, failed, skipped, passRate } = summary;
|
|
153
|
+
const pct = (passRate * 100).toFixed(1);
|
|
154
|
+
const passIcon = failed === 0 ? '{green-fg}✓{/green-fg}' : '{red-fg}✗{/red-fg}';
|
|
155
|
+
|
|
156
|
+
const lines = [
|
|
157
|
+
`${passIcon} {bold}${passed}/${total} passed (${pct}%){/bold}`,
|
|
158
|
+
skipped > 0 ? `{#888888-fg}${skipped} skipped{/#888888-fg}` : null,
|
|
159
|
+
summary.p95LatencyMs > 0 ? `p95 latency: ${summary.p95LatencyMs}ms` : null,
|
|
160
|
+
'',
|
|
161
|
+
].filter(l => l !== null);
|
|
162
|
+
|
|
163
|
+
if (summary.gates?.results?.length > 0) {
|
|
164
|
+
lines.push('{bold}Gates:{/bold}');
|
|
165
|
+
for (const r of summary.gates.results) {
|
|
166
|
+
const gi = r.pass ? '{green-fg}✓{/green-fg}' : '{red-fg}✗{/red-fg}';
|
|
167
|
+
lines.push(`${gi} ${r.gate}: ${r.actual} (≥ ${r.threshold})`);
|
|
168
|
+
}
|
|
169
|
+
lines.push('');
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const failingCases = summary.cases.filter(c => c.status === 'failed');
|
|
173
|
+
if (failingCases.length > 0) {
|
|
174
|
+
lines.push('{bold}Failures:{/bold}');
|
|
175
|
+
for (const f of failingCases) {
|
|
176
|
+
lines.push(`{red-fg}✗{/red-fg} ${f.id ?? '(unnamed)'}: ${f.reason}`);
|
|
177
|
+
}
|
|
178
|
+
} else if (failed === 0) {
|
|
179
|
+
lines.push('{green-fg}All cases passed!{/green-fg}');
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
resultsBox.setContent(lines.join('\n'));
|
|
183
|
+
statusBar.setContent(` ${failed === 0 ? '{green-fg}✓ Passed{/green-fg}' : '{red-fg}✗ Failed{/red-fg}'} ${fileName}`);
|
|
184
|
+
} catch (err) {
|
|
185
|
+
resultsBox.setContent(`{red-fg}Error: ${err.message}{/red-fg}`);
|
|
186
|
+
statusBar.setContent(' {red-fg}✗ Error{/red-fg}');
|
|
187
|
+
} finally {
|
|
188
|
+
running = false;
|
|
189
|
+
screen.render();
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
list.key('enter', () => runSelected('normal'));
|
|
194
|
+
list.key('r', () => runSelected('record'));
|
|
195
|
+
list.key('p', () => runSelected('replay'));
|
|
196
|
+
list.key('b', () => navigate('main-menu'));
|
|
197
|
+
|
|
198
|
+
container.refresh = () => {
|
|
199
|
+
loadFiles();
|
|
200
|
+
};
|
|
201
|
+
|
|
202
|
+
loadFiles();
|
|
203
|
+
list.focus();
|
|
204
|
+
return container;
|
|
205
|
+
}
|