agent-tool-forge 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/lib/agent-registry.js +170 -0
- package/lib/api-client.js +792 -0
- package/lib/api-loader.js +260 -0
- package/lib/auth.d.ts +25 -0
- package/lib/auth.js +158 -0
- package/lib/checks/check-adapter.js +172 -0
- package/lib/checks/compose.js +42 -0
- package/lib/checks/content-match.js +14 -0
- package/lib/checks/cost-budget.js +11 -0
- package/lib/checks/index.js +18 -0
- package/lib/checks/json-valid.js +15 -0
- package/lib/checks/latency.js +11 -0
- package/lib/checks/length-bounds.js +17 -0
- package/lib/checks/negative-match.js +14 -0
- package/lib/checks/no-hallucinated-numbers.js +63 -0
- package/lib/checks/non-empty.js +34 -0
- package/lib/checks/regex-match.js +12 -0
- package/lib/checks/run-checks.js +84 -0
- package/lib/checks/schema-match.js +26 -0
- package/lib/checks/tool-call-count.js +16 -0
- package/lib/checks/tool-selection.js +34 -0
- package/lib/checks/types.js +45 -0
- package/lib/comparison/compare.js +86 -0
- package/lib/comparison/format.js +104 -0
- package/lib/comparison/index.js +6 -0
- package/lib/comparison/statistics.js +59 -0
- package/lib/comparison/types.js +41 -0
- package/lib/config-schema.js +200 -0
- package/lib/config.d.ts +66 -0
- package/lib/conversation-store.d.ts +77 -0
- package/lib/conversation-store.js +443 -0
- package/lib/db.d.ts +6 -0
- package/lib/db.js +1112 -0
- package/lib/dep-check.js +99 -0
- package/lib/drift-background.js +61 -0
- package/lib/drift-monitor.js +187 -0
- package/lib/eval-runner.js +566 -0
- package/lib/fixtures/fixture-store.js +161 -0
- package/lib/fixtures/index.js +11 -0
- package/lib/forge-engine.js +982 -0
- package/lib/forge-eval-generator.js +417 -0
- package/lib/forge-file-writer.js +386 -0
- package/lib/forge-service-client.js +190 -0
- package/lib/forge-service.d.ts +4 -0
- package/lib/forge-service.js +655 -0
- package/lib/forge-verifier-generator.js +271 -0
- package/lib/handlers/admin.js +151 -0
- package/lib/handlers/agents.js +229 -0
- package/lib/handlers/chat-resume.js +334 -0
- package/lib/handlers/chat-sync.js +320 -0
- package/lib/handlers/chat.js +320 -0
- package/lib/handlers/conversations.js +92 -0
- package/lib/handlers/preferences.js +88 -0
- package/lib/handlers/tools-list.js +58 -0
- package/lib/hitl-engine.d.ts +60 -0
- package/lib/hitl-engine.js +261 -0
- package/lib/http-utils.js +92 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.js +141 -0
- package/lib/init.js +636 -0
- package/lib/manual-entry.js +59 -0
- package/lib/mcp-server.js +252 -0
- package/lib/output-groups.js +54 -0
- package/lib/postgres-store.d.ts +31 -0
- package/lib/postgres-store.js +465 -0
- package/lib/preference-store.d.ts +47 -0
- package/lib/preference-store.js +79 -0
- package/lib/prompt-store.d.ts +42 -0
- package/lib/prompt-store.js +60 -0
- package/lib/rate-limiter.d.ts +30 -0
- package/lib/rate-limiter.js +104 -0
- package/lib/react-engine.d.ts +110 -0
- package/lib/react-engine.js +337 -0
- package/lib/runner/cli.js +156 -0
- package/lib/runner/cost-estimator.js +71 -0
- package/lib/runner/gate.js +46 -0
- package/lib/runner/index.js +165 -0
- package/lib/sidecar.d.ts +83 -0
- package/lib/sidecar.js +161 -0
- package/lib/sse.d.ts +15 -0
- package/lib/sse.js +30 -0
- package/lib/tools-scanner.js +91 -0
- package/lib/tui.js +253 -0
- package/lib/verifier-report.js +78 -0
- package/lib/verifier-runner.js +338 -0
- package/lib/verifier-scanner.js +70 -0
- package/lib/verifier-worker-pool.js +196 -0
- package/lib/views/chat.js +340 -0
- package/lib/views/endpoints.js +203 -0
- package/lib/views/eval-run.js +206 -0
- package/lib/views/forge-agent.js +538 -0
- package/lib/views/forge.js +410 -0
- package/lib/views/main-menu.js +275 -0
- package/lib/views/mediation.js +381 -0
- package/lib/views/model-compare.js +430 -0
- package/lib/views/model-comparison.js +333 -0
- package/lib/views/onboarding.js +470 -0
- package/lib/views/performance.js +237 -0
- package/lib/views/run-evals.js +205 -0
- package/lib/views/settings.js +829 -0
- package/lib/views/tools-evals.js +514 -0
- package/lib/views/verifier-coverage.js +617 -0
- package/lib/workers/verifier-worker.js +52 -0
- package/package.json +123 -0
- package/widget/forge-chat.js +789 -0
|
@@ -0,0 +1,514 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tools & Evals View — Table of tools with eval run counts and verifier coverage.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import blessed from 'blessed';
|
|
6
|
+
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'fs';
|
|
7
|
+
import { resolve, dirname } from 'path';
|
|
8
|
+
import { getToolsWithMetadata } from '../tools-scanner.js';
|
|
9
|
+
import { getExistingVerifiers } from '../verifier-scanner.js';
|
|
10
|
+
import { inferOutputGroups, getVerifiersForGroups } from '../output-groups.js';
|
|
11
|
+
|
|
12
|
+
async function loadData(config) {
|
|
13
|
+
const project = config?.project || {};
|
|
14
|
+
const verification = config?.verification || {};
|
|
15
|
+
|
|
16
|
+
const tools = getToolsWithMetadata(project);
|
|
17
|
+
const verifiers = getExistingVerifiers(verification);
|
|
18
|
+
|
|
19
|
+
let evalMap = {};
|
|
20
|
+
let registryMap = {};
|
|
21
|
+
let driftMap = {};
|
|
22
|
+
try {
|
|
23
|
+
const dbPath = resolve(process.cwd(), config?.dbPath || 'forge.db');
|
|
24
|
+
if (existsSync(dbPath)) {
|
|
25
|
+
const { getDb, getEvalSummary, getAllToolRegistry, getDriftAlerts } = await import('../db.js');
|
|
26
|
+
const db = getDb(dbPath);
|
|
27
|
+
const summary = getEvalSummary(db);
|
|
28
|
+
evalMap = Object.fromEntries(summary.map((r) => [r.tool_name, r]));
|
|
29
|
+
const registry = getAllToolRegistry(db);
|
|
30
|
+
registryMap = Object.fromEntries(registry.map((r) => [r.tool_name, r]));
|
|
31
|
+
const alerts = getDriftAlerts(db, null);
|
|
32
|
+
for (const a of alerts) {
|
|
33
|
+
driftMap[a.tool_name] = a;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
} catch (err) {
|
|
37
|
+
// DB unavailable or schema mismatch — tools still display, just without eval/registry data
|
|
38
|
+
// DB unavailable or schema mismatch — tools still display without eval/registry data
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
return tools.map((t) => {
|
|
42
|
+
const groups = inferOutputGroups(t);
|
|
43
|
+
const covering = getVerifiersForGroups(groups).filter((v) => verifiers.includes(v));
|
|
44
|
+
const evalRow = evalMap[t.name];
|
|
45
|
+
const regRow = registryMap[t.name];
|
|
46
|
+
const hasDrift = !!driftMap[t.name];
|
|
47
|
+
const lifecycle = regRow?.lifecycle_state || 'candidate';
|
|
48
|
+
const passRate = evalRow && evalRow.total_cases > 0
|
|
49
|
+
? `${Math.round((evalRow.passed / evalRow.total_cases) * 100)}%`
|
|
50
|
+
: '—';
|
|
51
|
+
return {
|
|
52
|
+
name: t.name,
|
|
53
|
+
category: (t.tags || []).join(',') || '—',
|
|
54
|
+
lifecycle,
|
|
55
|
+
passRate,
|
|
56
|
+
hasDrift,
|
|
57
|
+
evalRuns: evalRow ? String(evalRow.total_cases) : '0',
|
|
58
|
+
verifiers: covering.length > 0 ? covering.join(', ') : '—',
|
|
59
|
+
_regRow: regRow,
|
|
60
|
+
_driftAlert: driftMap[t.name] || null
|
|
61
|
+
};
|
|
62
|
+
});
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export function createView({ screen, content, config, navigate, setFooter, screenKey, openPopup, closePopup, startService }) {
|
|
66
|
+
const container = blessed.box({
|
|
67
|
+
top: 0,
|
|
68
|
+
left: 0,
|
|
69
|
+
width: '100%',
|
|
70
|
+
height: '100%',
|
|
71
|
+
tags: true
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
const table = blessed.listtable({
|
|
75
|
+
parent: container,
|
|
76
|
+
top: 0,
|
|
77
|
+
left: 0,
|
|
78
|
+
width: '100%',
|
|
79
|
+
height: '100%-1',
|
|
80
|
+
tags: true,
|
|
81
|
+
keys: true,
|
|
82
|
+
vi: true,
|
|
83
|
+
mouse: true,
|
|
84
|
+
border: { type: 'line' },
|
|
85
|
+
align: 'left',
|
|
86
|
+
style: {
|
|
87
|
+
header: { bold: true, fg: 'cyan' },
|
|
88
|
+
cell: { selected: { bg: '#1a3a5c', fg: 'white' } }
|
|
89
|
+
},
|
|
90
|
+
pad: 1
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
const statusBar = blessed.box({
|
|
94
|
+
parent: container,
|
|
95
|
+
bottom: 0,
|
|
96
|
+
left: 0,
|
|
97
|
+
width: '100%',
|
|
98
|
+
height: 1,
|
|
99
|
+
tags: true
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
setFooter(' {cyan-fg}↑↓{/cyan-fg} navigate {cyan-fg}Enter{/cyan-fg} actions {cyan-fg}r{/cyan-fg} refresh {cyan-fg}b{/cyan-fg} back');
|
|
103
|
+
|
|
104
|
+
let rowData = [];
|
|
105
|
+
|
|
106
|
+
table.key('enter', () => {
|
|
107
|
+
const idx = table.selected;
|
|
108
|
+
if (idx < 1 || !rowData[idx - 1]) return;
|
|
109
|
+
showActionMenu(screen, rowData[idx - 1], navigate, config, openPopup, closePopup, (msg, isError) => {
|
|
110
|
+
statusBar.setContent(isError ? ` {red-fg}${msg}{/red-fg}` : ` {green-fg}${msg}{/green-fg}`);
|
|
111
|
+
screen.render();
|
|
112
|
+
});
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
container.refresh = async () => {
|
|
116
|
+
try {
|
|
117
|
+
rowData = await loadData(config);
|
|
118
|
+
const headers = ['Name', 'Category', 'Lifecycle', 'Pass Rate', 'Drift'];
|
|
119
|
+
const rows = rowData.map((r) => {
|
|
120
|
+
const lifecycleBadge = lifecycleBadgeFor(r.lifecycle);
|
|
121
|
+
const driftBadge = r.hasDrift ? '{red-fg}⚠ drift{/red-fg}' : '{#555555-fg}—{/#555555-fg}';
|
|
122
|
+
return [r.name, r.category, lifecycleBadge, r.passRate, driftBadge];
|
|
123
|
+
});
|
|
124
|
+
table.setData([headers, ...rows]);
|
|
125
|
+
} catch (err) {
|
|
126
|
+
table.setData([['Name', 'Category', 'Lifecycle', 'Pass Rate', 'Drift'], ['Error loading: ' + err.message, '', '', '', '']]);
|
|
127
|
+
}
|
|
128
|
+
screen.render();
|
|
129
|
+
};
|
|
130
|
+
|
|
131
|
+
container.refresh();
|
|
132
|
+
table.focus();
|
|
133
|
+
return container;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function lifecycleBadgeFor(state) {
|
|
137
|
+
switch (state) {
|
|
138
|
+
case 'promoted': return '{green-fg}promoted{/green-fg}';
|
|
139
|
+
case 'flagged': return '{yellow-fg}flagged{/yellow-fg}';
|
|
140
|
+
case 'retired': return '{#555555-fg}retired{/#555555-fg}';
|
|
141
|
+
case 'swapped': return '{#555555-fg}swapped{/#555555-fg}';
|
|
142
|
+
default: return '{#888888-fg}candidate{/#888888-fg}';
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
function showActionMenu(screen, tool, navigate, config, openPopup, closePopup, setStatus) {
|
|
147
|
+
const items = [
|
|
148
|
+
`{cyan-fg}▸{/cyan-fg} Run evals {#888888-fg}(uses API key from .env){/#888888-fg}`,
|
|
149
|
+
` Compare models`,
|
|
150
|
+
` View eval results`,
|
|
151
|
+
` View tool file`,
|
|
152
|
+
` Generate evals (AI)`,
|
|
153
|
+
` Generate verifiers (AI)`,
|
|
154
|
+
` Re-forge tool`,
|
|
155
|
+
` Promote to registry`,
|
|
156
|
+
` View drift report`,
|
|
157
|
+
` Mediate (fast-track)`,
|
|
158
|
+
` — Cancel —`
|
|
159
|
+
];
|
|
160
|
+
|
|
161
|
+
const menu = blessed.list({
|
|
162
|
+
parent: screen,
|
|
163
|
+
border: 'line',
|
|
164
|
+
height: items.length + 4,
|
|
165
|
+
width: 54,
|
|
166
|
+
top: 'center',
|
|
167
|
+
left: 'center',
|
|
168
|
+
label: ` ⚙ ${tool.name} `,
|
|
169
|
+
tags: true,
|
|
170
|
+
keys: true,
|
|
171
|
+
vi: true,
|
|
172
|
+
style: {
|
|
173
|
+
border: { fg: 'blue' },
|
|
174
|
+
selected: { bg: '#1a3a5c', fg: 'white' }
|
|
175
|
+
},
|
|
176
|
+
items
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
openPopup?.();
|
|
180
|
+
menu.on('select', async (item, idx) => {
|
|
181
|
+
closePopup?.();
|
|
182
|
+
menu.destroy();
|
|
183
|
+
screen.render();
|
|
184
|
+
|
|
185
|
+
if (idx === 0) {
|
|
186
|
+
config._evalTarget = tool.name;
|
|
187
|
+
navigate('eval-run');
|
|
188
|
+
} else if (idx === 1) {
|
|
189
|
+
// Compare models
|
|
190
|
+
await compareModelsForTool(tool, config, screen, setStatus, navigate, openPopup, closePopup);
|
|
191
|
+
} else if (idx === 2) {
|
|
192
|
+
navigate('performance');
|
|
193
|
+
} else if (idx === 3) {
|
|
194
|
+
setStatus(`Tool file: ${config?.project?.toolsDir || 'example/tools'}/${tool.name}.tool.*`, false);
|
|
195
|
+
} else if (idx === 4) {
|
|
196
|
+
// Generate evals (AI)
|
|
197
|
+
await generateEvalsForTool(tool, config, screen, setStatus, openPopup, closePopup);
|
|
198
|
+
} else if (idx === 5) {
|
|
199
|
+
// Generate verifiers (AI)
|
|
200
|
+
await generateVerifiersForTool(tool, config, screen, setStatus, openPopup, closePopup);
|
|
201
|
+
} else if (idx === 6) {
|
|
202
|
+
// Re-forge tool
|
|
203
|
+
config._forgeTarget = { toolName: tool.name, spec: null };
|
|
204
|
+
navigate('forge');
|
|
205
|
+
} else if (idx === 7) {
|
|
206
|
+
// Promote to registry
|
|
207
|
+
await promoteToolToRegistry(tool, config, screen, setStatus, openPopup, closePopup);
|
|
208
|
+
} else if (idx === 8) {
|
|
209
|
+
// View drift report
|
|
210
|
+
await showDriftReport(tool, config, screen, openPopup, closePopup);
|
|
211
|
+
} else if (idx === 9) {
|
|
212
|
+
// Mediate (fast-track)
|
|
213
|
+
config._mediationTarget = tool.name;
|
|
214
|
+
navigate('mediation');
|
|
215
|
+
}
|
|
216
|
+
// idx 10 = cancel
|
|
217
|
+
});
|
|
218
|
+
|
|
219
|
+
menu.key(['escape', 'q'], () => { closePopup?.(); menu.destroy(); screen.render(); });
|
|
220
|
+
menu.focus();
|
|
221
|
+
screen.render();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
async function generateEvalsForTool(tool, config, screen, setStatus, openPopup, closePopup) {
|
|
225
|
+
setStatus('Generating evals with AI…', false);
|
|
226
|
+
|
|
227
|
+
const progressBox = blessed.box({
|
|
228
|
+
parent: screen,
|
|
229
|
+
border: 'line',
|
|
230
|
+
top: 'center',
|
|
231
|
+
left: 'center',
|
|
232
|
+
width: 50,
|
|
233
|
+
height: 5,
|
|
234
|
+
label: ' Generating Evals ',
|
|
235
|
+
tags: true,
|
|
236
|
+
content: '\n {yellow-fg}⟳ Calling AI…{/yellow-fg}'
|
|
237
|
+
});
|
|
238
|
+
openPopup?.();
|
|
239
|
+
screen.render();
|
|
240
|
+
|
|
241
|
+
try {
|
|
242
|
+
const { resolveModelConfig } = await import('../api-client.js');
|
|
243
|
+
const { generateEvals } = await import('../forge-eval-generator.js');
|
|
244
|
+
|
|
245
|
+
// Load env
|
|
246
|
+
const envPath = resolve(process.cwd(), '.env');
|
|
247
|
+
const env = {};
|
|
248
|
+
if (existsSync(envPath)) {
|
|
249
|
+
for (const line of readFileSync(envPath, 'utf-8').split('\n')) {
|
|
250
|
+
const t = line.trim();
|
|
251
|
+
if (!t || t.startsWith('#')) continue;
|
|
252
|
+
const eq = t.indexOf('=');
|
|
253
|
+
if (eq === -1) continue;
|
|
254
|
+
env[t.slice(0, eq).trim()] = t.slice(eq + 1).trim().replace(/^["']|["']$/g, '');
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
const modelConfig = resolveModelConfig(config, env, 'eval');
|
|
259
|
+
if (!modelConfig.apiKey) {
|
|
260
|
+
throw new Error('No API key found. Add one in Settings → API Keys.');
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Build a minimal spec from tool name
|
|
264
|
+
const spec = { name: tool.name, description: '', triggerPhrases: [], tags: [] };
|
|
265
|
+
|
|
266
|
+
const result = await generateEvals({
|
|
267
|
+
spec,
|
|
268
|
+
allTools: [],
|
|
269
|
+
projectConfig: config,
|
|
270
|
+
projectRoot: process.cwd(),
|
|
271
|
+
modelConfig
|
|
272
|
+
});
|
|
273
|
+
|
|
274
|
+
// Write files
|
|
275
|
+
mkdirSync(dirname(result.goldenPath), { recursive: true });
|
|
276
|
+
writeFileSync(result.goldenPath, JSON.stringify(result.goldenCases, null, 2), 'utf-8');
|
|
277
|
+
writeFileSync(result.labeledPath, JSON.stringify(result.labeledCases, null, 2), 'utf-8');
|
|
278
|
+
|
|
279
|
+
progressBox.setContent(`\n {green-fg}✓ Generated ${result.goldenCases.length} golden + ${result.labeledCases.length} labeled cases{/green-fg}`);
|
|
280
|
+
screen.render();
|
|
281
|
+
setTimeout(() => {
|
|
282
|
+
closePopup?.();
|
|
283
|
+
progressBox.destroy();
|
|
284
|
+
screen.render();
|
|
285
|
+
setStatus(`Evals written to ${result.goldenPath}`, false);
|
|
286
|
+
}, 2000);
|
|
287
|
+
|
|
288
|
+
} catch (err) {
|
|
289
|
+
progressBox.setContent(`\n {red-fg}⚠ ${err.message}{/red-fg}`);
|
|
290
|
+
screen.render();
|
|
291
|
+
setTimeout(() => {
|
|
292
|
+
closePopup?.();
|
|
293
|
+
progressBox.destroy();
|
|
294
|
+
screen.render();
|
|
295
|
+
}, 3000);
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async function compareModelsForTool(tool, config, screen, setStatus, navigate, openPopup, closePopup) {
|
|
300
|
+
const matrix = config?.modelMatrix || [];
|
|
301
|
+
if (matrix.length === 0) {
|
|
302
|
+
setStatus('No model matrix configured. Go to Settings → Model Matrix to add models.', true);
|
|
303
|
+
return;
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
const progressBox = blessed.box({
|
|
307
|
+
parent: screen,
|
|
308
|
+
border: 'line',
|
|
309
|
+
top: 'center', left: 'center',
|
|
310
|
+
width: 60, height: 8,
|
|
311
|
+
label: ' Comparing Models ',
|
|
312
|
+
tags: true,
|
|
313
|
+
content: `\n {yellow-fg}⟳ Running evals across ${matrix.length} model(s)…{/yellow-fg}\n\n This may take a few minutes.`
|
|
314
|
+
});
|
|
315
|
+
openPopup?.();
|
|
316
|
+
screen.render();
|
|
317
|
+
|
|
318
|
+
try {
|
|
319
|
+
const { runEvalsMultiPass } = await import('../eval-runner.js');
|
|
320
|
+
|
|
321
|
+
let lastStatus = '';
|
|
322
|
+
const result = await runEvalsMultiPass(
|
|
323
|
+
tool.name,
|
|
324
|
+
config,
|
|
325
|
+
process.cwd(),
|
|
326
|
+
{},
|
|
327
|
+
(progress) => {
|
|
328
|
+
const line = ` ${progress.model}: case ${progress.done}/${progress.total}`;
|
|
329
|
+
if (line !== lastStatus) {
|
|
330
|
+
lastStatus = line;
|
|
331
|
+
progressBox.setContent(`\n{yellow-fg}⟳ Running…{/yellow-fg}\n\n${line}`);
|
|
332
|
+
screen.render();
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
);
|
|
336
|
+
|
|
337
|
+
closePopup?.();
|
|
338
|
+
progressBox.destroy();
|
|
339
|
+
screen.render();
|
|
340
|
+
|
|
341
|
+
// Warn if any models failed due to missing API keys
|
|
342
|
+
const errorModels = Object.entries(result.perModel)
|
|
343
|
+
.filter(([, v]) => v.error)
|
|
344
|
+
.map(([k]) => k);
|
|
345
|
+
if (errorModels.length > 0) {
|
|
346
|
+
setStatus(`Warning: ${errorModels.join(', ')} skipped (no API key). Check Settings → Model Matrix.`, true);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
// Navigate to model-comparison view with results
|
|
350
|
+
config._comparisonTarget = { toolName: tool.name, perModel: result.perModel };
|
|
351
|
+
navigate('model-comparison');
|
|
352
|
+
|
|
353
|
+
} catch (err) {
|
|
354
|
+
progressBox.setContent(`\n {red-fg}⚠ ${err.message}{/red-fg}`);
|
|
355
|
+
screen.render();
|
|
356
|
+
setTimeout(() => {
|
|
357
|
+
closePopup?.();
|
|
358
|
+
progressBox.destroy();
|
|
359
|
+
screen.render();
|
|
360
|
+
}, 4000);
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
async function promoteToolToRegistry(tool, config, screen, setStatus, openPopup, closePopup) {
|
|
365
|
+
setStatus('Promoting tool to registry…', false);
|
|
366
|
+
try {
|
|
367
|
+
const dbPath = resolve(process.cwd(), config?.dbPath || 'forge.db');
|
|
368
|
+
if (!existsSync(dbPath)) {
|
|
369
|
+
setStatus('No forge.db found — run evals first.', true);
|
|
370
|
+
return;
|
|
371
|
+
}
|
|
372
|
+
const { getDb, upsertToolRegistry, getEvalSummary } = await import('../db.js');
|
|
373
|
+
const db = getDb(dbPath);
|
|
374
|
+
const summary = getEvalSummary(db);
|
|
375
|
+
const evalRow = summary.find((r) => r.tool_name === tool.name);
|
|
376
|
+
const baseline = evalRow && evalRow.total_cases > 0
|
|
377
|
+
? evalRow.passed / evalRow.total_cases
|
|
378
|
+
: null;
|
|
379
|
+
|
|
380
|
+
// Upsert the registry row (creates or updates in a single statement)
|
|
381
|
+
upsertToolRegistry(db, {
|
|
382
|
+
tool_name: tool.name,
|
|
383
|
+
lifecycle_state: 'promoted',
|
|
384
|
+
promoted_at: new Date().toISOString(),
|
|
385
|
+
baseline_pass_rate: baseline
|
|
386
|
+
});
|
|
387
|
+
|
|
388
|
+
setStatus(`${tool.name} promoted. Baseline: ${baseline != null ? `${Math.round(baseline * 100)}%` : 'N/A'}`, false);
|
|
389
|
+
} catch (err) {
|
|
390
|
+
setStatus(`Promote failed: ${err.message}`, true);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
async function showDriftReport(tool, config, screen, openPopup, closePopup) {
|
|
395
|
+
let content = '';
|
|
396
|
+
try {
|
|
397
|
+
const dbPath = resolve(process.cwd(), config?.dbPath || 'forge.db');
|
|
398
|
+
if (existsSync(dbPath)) {
|
|
399
|
+
const { getDb, getDriftAlerts } = await import('../db.js');
|
|
400
|
+
const { computeSuspects } = await import('../drift-monitor.js');
|
|
401
|
+
const db = getDb(dbPath);
|
|
402
|
+
const alerts = getDriftAlerts(db, tool.name);
|
|
403
|
+
if (alerts.length === 0) {
|
|
404
|
+
content = '\n {green-fg}No open drift alerts for this tool.{/green-fg}';
|
|
405
|
+
} else {
|
|
406
|
+
const alert = alerts[0];
|
|
407
|
+
const suspects = computeSuspects(db, tool.name);
|
|
408
|
+
content = `\n {yellow-fg}Drift Detected{/yellow-fg}\n` +
|
|
409
|
+
` Detected: ${alert.detected_at?.slice(0, 19) || '?'}\n` +
|
|
410
|
+
` Baseline: ${alert.baseline_rate != null ? `${Math.round(alert.baseline_rate * 100)}%` : 'N/A'}\n` +
|
|
411
|
+
` Current: ${alert.current_rate != null ? `${Math.round(alert.current_rate * 100)}%` : 'N/A'}\n` +
|
|
412
|
+
` Delta: ${alert.delta != null ? `-${Math.round(alert.delta * 100)}pp` : '?'}\n\n` +
|
|
413
|
+
` {cyan-fg}Suspects:{/cyan-fg} ${suspects.length > 0 ? suspects.join(', ') : '(none identified)'}`;
|
|
414
|
+
}
|
|
415
|
+
} else {
|
|
416
|
+
content = '\n {#888888-fg}No database found.{/#888888-fg}';
|
|
417
|
+
}
|
|
418
|
+
} catch (err) {
|
|
419
|
+
content = `\n {red-fg}Error: ${err.message}{/red-fg}`;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
const popup = blessed.box({
|
|
423
|
+
parent: screen,
|
|
424
|
+
border: 'line',
|
|
425
|
+
top: 'center',
|
|
426
|
+
left: 'center',
|
|
427
|
+
width: 60,
|
|
428
|
+
height: 14,
|
|
429
|
+
label: ` Drift Report: ${tool.name} `,
|
|
430
|
+
tags: true,
|
|
431
|
+
content
|
|
432
|
+
});
|
|
433
|
+
openPopup?.();
|
|
434
|
+
popup.key(['escape', 'q', 'enter'], () => {
|
|
435
|
+
closePopup?.();
|
|
436
|
+
popup.destroy();
|
|
437
|
+
screen.render();
|
|
438
|
+
});
|
|
439
|
+
popup.focus();
|
|
440
|
+
screen.render();
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
async function generateVerifiersForTool(tool, config, screen, setStatus, openPopup, closePopup) {
|
|
444
|
+
setStatus('Generating verifiers with AI…', false);
|
|
445
|
+
|
|
446
|
+
const progressBox = blessed.box({
|
|
447
|
+
parent: screen,
|
|
448
|
+
border: 'line',
|
|
449
|
+
top: 'center',
|
|
450
|
+
left: 'center',
|
|
451
|
+
width: 50,
|
|
452
|
+
height: 5,
|
|
453
|
+
label: ' Generating Verifiers ',
|
|
454
|
+
tags: true,
|
|
455
|
+
content: '\n {yellow-fg}⟳ Calling AI…{/yellow-fg}'
|
|
456
|
+
});
|
|
457
|
+
openPopup?.();
|
|
458
|
+
screen.render();
|
|
459
|
+
|
|
460
|
+
try {
|
|
461
|
+
const { resolveModelConfig } = await import('../api-client.js');
|
|
462
|
+
const { generateVerifiers } = await import('../forge-verifier-generator.js');
|
|
463
|
+
|
|
464
|
+
// Load env
|
|
465
|
+
const envPath = resolve(process.cwd(), '.env');
|
|
466
|
+
const env = {};
|
|
467
|
+
if (existsSync(envPath)) {
|
|
468
|
+
for (const line of readFileSync(envPath, 'utf-8').split('\n')) {
|
|
469
|
+
const t = line.trim();
|
|
470
|
+
if (!t || t.startsWith('#')) continue;
|
|
471
|
+
const eq = t.indexOf('=');
|
|
472
|
+
if (eq === -1) continue;
|
|
473
|
+
env[t.slice(0, eq).trim()] = t.slice(eq + 1).trim().replace(/^["']|["']$/g, '');
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
const modelConfig = resolveModelConfig(config, env, 'verifier');
|
|
478
|
+
if (!modelConfig.apiKey) {
|
|
479
|
+
throw new Error('No API key found. Add one in Settings → API Keys.');
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const spec = { name: tool.name, description: '', tags: [] };
|
|
483
|
+
const result = await generateVerifiers({
|
|
484
|
+
spec,
|
|
485
|
+
projectConfig: config,
|
|
486
|
+
projectRoot: process.cwd(),
|
|
487
|
+
modelConfig
|
|
488
|
+
});
|
|
489
|
+
|
|
490
|
+
// Write files
|
|
491
|
+
for (const vf of result.verifierFiles) {
|
|
492
|
+
mkdirSync(dirname(vf.path), { recursive: true });
|
|
493
|
+
writeFileSync(vf.path, vf.content, 'utf-8');
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
progressBox.setContent(`\n {green-fg}✓ Generated ${result.verifierFiles.length} verifier(s){/green-fg}`);
|
|
497
|
+
screen.render();
|
|
498
|
+
setTimeout(() => {
|
|
499
|
+
closePopup?.();
|
|
500
|
+
progressBox.destroy();
|
|
501
|
+
screen.render();
|
|
502
|
+
setStatus(`${result.verifierFiles.length} verifier(s) written`, false);
|
|
503
|
+
}, 2000);
|
|
504
|
+
|
|
505
|
+
} catch (err) {
|
|
506
|
+
progressBox.setContent(`\n {red-fg}⚠ ${err.message}{/red-fg}`);
|
|
507
|
+
screen.render();
|
|
508
|
+
setTimeout(() => {
|
|
509
|
+
closePopup?.();
|
|
510
|
+
progressBox.destroy();
|
|
511
|
+
screen.render();
|
|
512
|
+
}, 3000);
|
|
513
|
+
}
|
|
514
|
+
}
|