agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +209 -0
  3. package/lib/agent-registry.js +170 -0
  4. package/lib/api-client.js +792 -0
  5. package/lib/api-loader.js +260 -0
  6. package/lib/auth.d.ts +25 -0
  7. package/lib/auth.js +158 -0
  8. package/lib/checks/check-adapter.js +172 -0
  9. package/lib/checks/compose.js +42 -0
  10. package/lib/checks/content-match.js +14 -0
  11. package/lib/checks/cost-budget.js +11 -0
  12. package/lib/checks/index.js +18 -0
  13. package/lib/checks/json-valid.js +15 -0
  14. package/lib/checks/latency.js +11 -0
  15. package/lib/checks/length-bounds.js +17 -0
  16. package/lib/checks/negative-match.js +14 -0
  17. package/lib/checks/no-hallucinated-numbers.js +63 -0
  18. package/lib/checks/non-empty.js +34 -0
  19. package/lib/checks/regex-match.js +12 -0
  20. package/lib/checks/run-checks.js +84 -0
  21. package/lib/checks/schema-match.js +26 -0
  22. package/lib/checks/tool-call-count.js +16 -0
  23. package/lib/checks/tool-selection.js +34 -0
  24. package/lib/checks/types.js +45 -0
  25. package/lib/comparison/compare.js +86 -0
  26. package/lib/comparison/format.js +104 -0
  27. package/lib/comparison/index.js +6 -0
  28. package/lib/comparison/statistics.js +59 -0
  29. package/lib/comparison/types.js +41 -0
  30. package/lib/config-schema.js +200 -0
  31. package/lib/config.d.ts +66 -0
  32. package/lib/conversation-store.d.ts +77 -0
  33. package/lib/conversation-store.js +443 -0
  34. package/lib/db.d.ts +6 -0
  35. package/lib/db.js +1112 -0
  36. package/lib/dep-check.js +99 -0
  37. package/lib/drift-background.js +61 -0
  38. package/lib/drift-monitor.js +187 -0
  39. package/lib/eval-runner.js +566 -0
  40. package/lib/fixtures/fixture-store.js +161 -0
  41. package/lib/fixtures/index.js +11 -0
  42. package/lib/forge-engine.js +982 -0
  43. package/lib/forge-eval-generator.js +417 -0
  44. package/lib/forge-file-writer.js +386 -0
  45. package/lib/forge-service-client.js +190 -0
  46. package/lib/forge-service.d.ts +4 -0
  47. package/lib/forge-service.js +655 -0
  48. package/lib/forge-verifier-generator.js +271 -0
  49. package/lib/handlers/admin.js +151 -0
  50. package/lib/handlers/agents.js +229 -0
  51. package/lib/handlers/chat-resume.js +334 -0
  52. package/lib/handlers/chat-sync.js +320 -0
  53. package/lib/handlers/chat.js +320 -0
  54. package/lib/handlers/conversations.js +92 -0
  55. package/lib/handlers/preferences.js +88 -0
  56. package/lib/handlers/tools-list.js +58 -0
  57. package/lib/hitl-engine.d.ts +60 -0
  58. package/lib/hitl-engine.js +261 -0
  59. package/lib/http-utils.js +92 -0
  60. package/lib/index.d.ts +20 -0
  61. package/lib/index.js +141 -0
  62. package/lib/init.js +636 -0
  63. package/lib/manual-entry.js +59 -0
  64. package/lib/mcp-server.js +252 -0
  65. package/lib/output-groups.js +54 -0
  66. package/lib/postgres-store.d.ts +31 -0
  67. package/lib/postgres-store.js +465 -0
  68. package/lib/preference-store.d.ts +47 -0
  69. package/lib/preference-store.js +79 -0
  70. package/lib/prompt-store.d.ts +42 -0
  71. package/lib/prompt-store.js +60 -0
  72. package/lib/rate-limiter.d.ts +30 -0
  73. package/lib/rate-limiter.js +104 -0
  74. package/lib/react-engine.d.ts +110 -0
  75. package/lib/react-engine.js +337 -0
  76. package/lib/runner/cli.js +156 -0
  77. package/lib/runner/cost-estimator.js +71 -0
  78. package/lib/runner/gate.js +46 -0
  79. package/lib/runner/index.js +165 -0
  80. package/lib/sidecar.d.ts +83 -0
  81. package/lib/sidecar.js +161 -0
  82. package/lib/sse.d.ts +15 -0
  83. package/lib/sse.js +30 -0
  84. package/lib/tools-scanner.js +91 -0
  85. package/lib/tui.js +253 -0
  86. package/lib/verifier-report.js +78 -0
  87. package/lib/verifier-runner.js +338 -0
  88. package/lib/verifier-scanner.js +70 -0
  89. package/lib/verifier-worker-pool.js +196 -0
  90. package/lib/views/chat.js +340 -0
  91. package/lib/views/endpoints.js +203 -0
  92. package/lib/views/eval-run.js +206 -0
  93. package/lib/views/forge-agent.js +538 -0
  94. package/lib/views/forge.js +410 -0
  95. package/lib/views/main-menu.js +275 -0
  96. package/lib/views/mediation.js +381 -0
  97. package/lib/views/model-compare.js +430 -0
  98. package/lib/views/model-comparison.js +333 -0
  99. package/lib/views/onboarding.js +470 -0
  100. package/lib/views/performance.js +237 -0
  101. package/lib/views/run-evals.js +205 -0
  102. package/lib/views/settings.js +829 -0
  103. package/lib/views/tools-evals.js +514 -0
  104. package/lib/views/verifier-coverage.js +617 -0
  105. package/lib/workers/verifier-worker.js +52 -0
  106. package/package.json +123 -0
  107. package/widget/forge-chat.js +789 -0
@@ -0,0 +1,206 @@
1
+ /**
2
+ * Eval Run View — Live progress display for running evals via Anthropic/OpenAI.
3
+ * No forge service required. The tool name to evaluate comes from config._evalTarget.
4
+ */
5
+
6
+ import blessed from 'blessed';
7
+ import { resolve, dirname } from 'path';
8
+ import { fileURLToPath } from 'url';
9
+
10
+ const __dirname = dirname(fileURLToPath(import.meta.url));
11
+ const PROJECT_ROOT = resolve(__dirname, '../..');
12
+
13
+ export function createView({ screen, config, navigate, setFooter, screenKey }) {
14
+ const toolName = config._evalTarget;
15
+ if (!toolName) {
16
+ // Navigated here directly without selecting a tool — redirect back
17
+ const container = blessed.box({ top: 0, left: 0, width: '100%', height: '100%', tags: true });
18
+ blessed.box({
19
+ parent: container, top: 'center', left: 'center', width: '60%', height: 3,
20
+ tags: true, align: 'center',
21
+ content: '{yellow-fg}No tool selected.\nGo to Tools & Evals and press Enter on a tool to run its evals.{/yellow-fg}'
22
+ });
23
+ setImmediate(() => { screen.render(); });
24
+ return container;
25
+ }
26
+
27
+ const container = blessed.box({ top: 0, left: 0, width: '100%', height: '100%', tags: true });
28
+
29
+ // ── Title strip ───────────────────────────────────────────────────────────
30
+ const titleBar = blessed.box({
31
+ parent: container,
32
+ top: 0, left: 0, width: '100%', height: 1,
33
+ tags: true, style: { bg: 'default' }
34
+ });
35
+
36
+ // ── Progress bar ──────────────────────────────────────────────────────────
37
+ const progressBox = blessed.box({
38
+ parent: container,
39
+ top: 1, left: 1, width: '100%-2', height: 3,
40
+ border: { type: 'line' }, tags: true,
41
+ style: { border: { fg: '#333333' } }
42
+ });
43
+
44
+ // ── Results table ─────────────────────────────────────────────────────────
45
+ const resultsTable = blessed.listtable({
46
+ parent: container,
47
+ top: 4, left: 0, width: '100%', height: '100%-7',
48
+ tags: true, keys: true, vi: true,
49
+ border: { type: 'line' }, align: 'left',
50
+ style: {
51
+ header: { bold: true, fg: 'cyan' },
52
+ cell: { selected: { bg: '#1a3a5c' } },
53
+ border: { fg: '#333333' }
54
+ },
55
+ pad: 1
56
+ });
57
+
58
+ // ── Summary bar ───────────────────────────────────────────────────────────
59
+ const summaryBar = blessed.box({
60
+ parent: container,
61
+ bottom: 0, left: 0, width: '100%', height: 2,
62
+ border: { type: 'line' }, tags: true,
63
+ style: { border: { fg: '#333333' } }
64
+ });
65
+
66
+ setFooter(
67
+ ' {cyan-fg}↑↓{/cyan-fg} scroll results {cyan-fg}p{/cyan-fg} performance history {cyan-fg}b{/cyan-fg} back'
68
+ );
69
+
70
+ // 'p' is view-specific — registered via screenKey so it's cleaned up on navigation.
71
+ screenKey('p', () => navigate('performance'));
72
+
73
+ // ── State ─────────────────────────────────────────────────────────────────
74
+ let total = 0;
75
+ let passedCount = 0;
76
+ let skippedCount = 0;
77
+ let doneCount = 0;
78
+ const rows = [];
79
+
80
+ function updateProgress() {
81
+ const pct = total > 0 ? Math.round((doneCount / total) * 100) : 0;
82
+ const barWidth = Math.max(10, (screen.width || 80) - 22);
83
+ const filled = Math.round((pct / 100) * barWidth);
84
+ const bar =
85
+ '{green-fg}' + '█'.repeat(filled) + '{/green-fg}' +
86
+ '{#333333-fg}' + '░'.repeat(barWidth - filled) + '{/#333333-fg}';
87
+ progressBox.setContent(
88
+ ` ${bar}\n` +
89
+ ` {white-fg}${doneCount}/${total}{/white-fg} ` +
90
+ `{green-fg}${passedCount} passed{/green-fg} ` +
91
+ `{red-fg}${doneCount - passedCount - skippedCount} failed{/red-fg} ` +
92
+ `{#888888-fg}${skippedCount} skipped{/#888888-fg} ` +
93
+ `${pct}%`
94
+ );
95
+ screen.render();
96
+ }
97
+
98
+ function updateTable() {
99
+ const data = rows.map((r) => [
100
+ (r.id || '').slice(0, 22),
101
+ r.status === 'passed'
102
+ ? '{green-fg}✓ pass{/green-fg}'
103
+ : r.status === 'skipped'
104
+ ? '{#888888-fg}— skip{/#888888-fg}'
105
+ : '{red-fg}✗ fail{/red-fg}',
106
+ (r.toolsCalled || []).join(', ') || '—',
107
+ (r.reason || r.description || '').slice(0, 38)
108
+ ]);
109
+ resultsTable.setData([
110
+ ['ID', 'Status', 'Tools Called', 'Notes'],
111
+ ...(data.length ? data : [['Running...', '', '', '']])
112
+ ]);
113
+ screen.render();
114
+ }
115
+
116
+ // ── Runner ────────────────────────────────────────────────────────────────
117
+ async function startRun() {
118
+ const { runEvals, findEvalFiles } = await import('../eval-runner.js');
119
+ const { existsSync, readFileSync } = await import('fs');
120
+
121
+ // Pre-flight: check API key
122
+ const envPath = resolve(PROJECT_ROOT, '.env');
123
+ let hasKey = false;
124
+ if (existsSync(envPath)) {
125
+ hasKey = /ANTHROPIC_API_KEY|OPENAI_API_KEY/.test(readFileSync(envPath, 'utf-8'));
126
+ }
127
+ if (!hasKey) {
128
+ titleBar.setContent(` {red-fg}⚠ No API key{/red-fg} {white-fg}${toolName}{/white-fg}`);
129
+ summaryBar.setContent(
130
+ ' Add {cyan-fg}ANTHROPIC_API_KEY{/cyan-fg} or {cyan-fg}OPENAI_API_KEY{/cyan-fg}' +
131
+ ' in Settings → API Keys / Secrets, then press {cyan-fg}b{/cyan-fg} and retry.'
132
+ );
133
+ screen.render();
134
+ return;
135
+ }
136
+
137
+ // Pre-flight: check eval files exist
138
+ let evalFiles = [];
139
+ try { evalFiles = findEvalFiles(toolName, config); } catch (_) { /* ignore */ }
140
+ if (evalFiles.length === 0) {
141
+ titleBar.setContent(` {yellow-fg}⚠ No eval files{/yellow-fg} {white-fg}${toolName}{/white-fg}`);
142
+ summaryBar.setContent(
143
+ ' Run {cyan-fg}/forge-tool{/cyan-fg} in Claude to generate eval files for this tool.'
144
+ );
145
+ screen.render();
146
+ return;
147
+ }
148
+
149
+ // Count cases for progress bar
150
+ let caseCount = 0;
151
+ for (const f of evalFiles) {
152
+ try { caseCount += JSON.parse(readFileSync(f, 'utf-8')).length; } catch (_) { /* ignore */ }
153
+ }
154
+ total = caseCount;
155
+ titleBar.setContent(
156
+ ` Running evals for {cyan-fg}${toolName}{/cyan-fg}` +
157
+ ` — ${total} cases across ${evalFiles.length} file(s)`
158
+ );
159
+ updateProgress();
160
+ updateTable();
161
+
162
+ let summary;
163
+ try {
164
+ summary = await runEvals(toolName, config, PROJECT_ROOT, (progress) => {
165
+ doneCount = progress.done;
166
+ if (progress.passed === null) skippedCount++;
167
+ else if (progress.passed) passedCount++;
168
+ rows.push({
169
+ id: progress.caseId,
170
+ status: progress.passed === null ? 'skipped' : progress.passed ? 'passed' : 'failed',
171
+ toolsCalled: progress.toolsCalled,
172
+ reason: progress.reason
173
+ });
174
+ updateProgress();
175
+ updateTable();
176
+ });
177
+ } catch (err) {
178
+ summaryBar.setContent(` {red-fg}Error: ${err.message}{/red-fg}`);
179
+ screen.render();
180
+ return;
181
+ }
182
+
183
+ const ran = summary.passed + summary.failed;
184
+ const rate = ran > 0 ? Math.round((summary.passed / ran) * 100) : 0;
185
+ const rateColor = rate >= 80 ? 'green' : rate >= 50 ? 'yellow' : 'red';
186
+ titleBar.setContent(
187
+ ` {bold}${toolName}{/bold} complete ` +
188
+ `via {cyan-fg}${summary.provider}{/cyan-fg} / {cyan-fg}${summary.model}{/cyan-fg}`
189
+ );
190
+ summaryBar.setContent(
191
+ ` {bold}Result:{/bold} ` +
192
+ `{${rateColor}-fg}${summary.passed}/${summary.passed + summary.failed} passed (${rate}%){/${rateColor}-fg}` +
193
+ ` ${summary.failed} failed ${summary.skipped} skipped` +
194
+ ` {#888888-fg}saved to forge.db{/#888888-fg}`
195
+ );
196
+ screen.render();
197
+ }
198
+
199
+ startRun().catch((err) => {
200
+ summaryBar.setContent(` {red-fg}Unexpected error: ${err.message}{/red-fg}`);
201
+ screen.render();
202
+ });
203
+ resultsTable.focus();
204
+ return container;
205
+ }
206
+