agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +209 -0
  3. package/lib/agent-registry.js +170 -0
  4. package/lib/api-client.js +792 -0
  5. package/lib/api-loader.js +260 -0
  6. package/lib/auth.d.ts +25 -0
  7. package/lib/auth.js +158 -0
  8. package/lib/checks/check-adapter.js +172 -0
  9. package/lib/checks/compose.js +42 -0
  10. package/lib/checks/content-match.js +14 -0
  11. package/lib/checks/cost-budget.js +11 -0
  12. package/lib/checks/index.js +18 -0
  13. package/lib/checks/json-valid.js +15 -0
  14. package/lib/checks/latency.js +11 -0
  15. package/lib/checks/length-bounds.js +17 -0
  16. package/lib/checks/negative-match.js +14 -0
  17. package/lib/checks/no-hallucinated-numbers.js +63 -0
  18. package/lib/checks/non-empty.js +34 -0
  19. package/lib/checks/regex-match.js +12 -0
  20. package/lib/checks/run-checks.js +84 -0
  21. package/lib/checks/schema-match.js +26 -0
  22. package/lib/checks/tool-call-count.js +16 -0
  23. package/lib/checks/tool-selection.js +34 -0
  24. package/lib/checks/types.js +45 -0
  25. package/lib/comparison/compare.js +86 -0
  26. package/lib/comparison/format.js +104 -0
  27. package/lib/comparison/index.js +6 -0
  28. package/lib/comparison/statistics.js +59 -0
  29. package/lib/comparison/types.js +41 -0
  30. package/lib/config-schema.js +200 -0
  31. package/lib/config.d.ts +66 -0
  32. package/lib/conversation-store.d.ts +77 -0
  33. package/lib/conversation-store.js +443 -0
  34. package/lib/db.d.ts +6 -0
  35. package/lib/db.js +1112 -0
  36. package/lib/dep-check.js +99 -0
  37. package/lib/drift-background.js +61 -0
  38. package/lib/drift-monitor.js +187 -0
  39. package/lib/eval-runner.js +566 -0
  40. package/lib/fixtures/fixture-store.js +161 -0
  41. package/lib/fixtures/index.js +11 -0
  42. package/lib/forge-engine.js +982 -0
  43. package/lib/forge-eval-generator.js +417 -0
  44. package/lib/forge-file-writer.js +386 -0
  45. package/lib/forge-service-client.js +190 -0
  46. package/lib/forge-service.d.ts +4 -0
  47. package/lib/forge-service.js +655 -0
  48. package/lib/forge-verifier-generator.js +271 -0
  49. package/lib/handlers/admin.js +151 -0
  50. package/lib/handlers/agents.js +229 -0
  51. package/lib/handlers/chat-resume.js +334 -0
  52. package/lib/handlers/chat-sync.js +320 -0
  53. package/lib/handlers/chat.js +320 -0
  54. package/lib/handlers/conversations.js +92 -0
  55. package/lib/handlers/preferences.js +88 -0
  56. package/lib/handlers/tools-list.js +58 -0
  57. package/lib/hitl-engine.d.ts +60 -0
  58. package/lib/hitl-engine.js +261 -0
  59. package/lib/http-utils.js +92 -0
  60. package/lib/index.d.ts +20 -0
  61. package/lib/index.js +141 -0
  62. package/lib/init.js +636 -0
  63. package/lib/manual-entry.js +59 -0
  64. package/lib/mcp-server.js +252 -0
  65. package/lib/output-groups.js +54 -0
  66. package/lib/postgres-store.d.ts +31 -0
  67. package/lib/postgres-store.js +465 -0
  68. package/lib/preference-store.d.ts +47 -0
  69. package/lib/preference-store.js +79 -0
  70. package/lib/prompt-store.d.ts +42 -0
  71. package/lib/prompt-store.js +60 -0
  72. package/lib/rate-limiter.d.ts +30 -0
  73. package/lib/rate-limiter.js +104 -0
  74. package/lib/react-engine.d.ts +110 -0
  75. package/lib/react-engine.js +337 -0
  76. package/lib/runner/cli.js +156 -0
  77. package/lib/runner/cost-estimator.js +71 -0
  78. package/lib/runner/gate.js +46 -0
  79. package/lib/runner/index.js +165 -0
  80. package/lib/sidecar.d.ts +83 -0
  81. package/lib/sidecar.js +161 -0
  82. package/lib/sse.d.ts +15 -0
  83. package/lib/sse.js +30 -0
  84. package/lib/tools-scanner.js +91 -0
  85. package/lib/tui.js +253 -0
  86. package/lib/verifier-report.js +78 -0
  87. package/lib/verifier-runner.js +338 -0
  88. package/lib/verifier-scanner.js +70 -0
  89. package/lib/verifier-worker-pool.js +196 -0
  90. package/lib/views/chat.js +340 -0
  91. package/lib/views/endpoints.js +203 -0
  92. package/lib/views/eval-run.js +206 -0
  93. package/lib/views/forge-agent.js +538 -0
  94. package/lib/views/forge.js +410 -0
  95. package/lib/views/main-menu.js +275 -0
  96. package/lib/views/mediation.js +381 -0
  97. package/lib/views/model-compare.js +430 -0
  98. package/lib/views/model-comparison.js +333 -0
  99. package/lib/views/onboarding.js +470 -0
  100. package/lib/views/performance.js +237 -0
  101. package/lib/views/run-evals.js +205 -0
  102. package/lib/views/settings.js +829 -0
  103. package/lib/views/tools-evals.js +514 -0
  104. package/lib/views/verifier-coverage.js +617 -0
  105. package/lib/workers/verifier-worker.js +52 -0
  106. package/package.json +123 -0
  107. package/widget/forge-chat.js +789 -0
@@ -0,0 +1,205 @@
1
+ /**
2
+ * Run Evals View — Browse and run eval files directly from the TUI.
3
+ */
4
+
5
+ import blessed from 'blessed';
6
+ import { existsSync, readdirSync } from 'node:fs';
7
+ import { resolve, basename } from 'node:path';
8
+
9
+ /**
10
+ * Find eval files in the configured evalsDir.
11
+ * @param {object} config
12
+ * @returns {string[]} - array of file paths
13
+ */
14
+ function findEvalFiles(config) {
15
+ const evalsDir = resolve(process.cwd(), config?.project?.evalsDir || 'docs/examples');
16
+ if (!existsSync(evalsDir)) return [];
17
+ try {
18
+ return readdirSync(evalsDir)
19
+ .filter(f => f.endsWith('.golden.json') || f.endsWith('.labeled.json'))
20
+ .map(f => resolve(evalsDir, f));
21
+ } catch {
22
+ return [];
23
+ }
24
+ }
25
+
26
+ export function createView({ screen, content, config, navigate, setFooter }) {
27
+ const container = blessed.box({
28
+ top: 0, left: 0, width: '100%', height: '100%', tags: true
29
+ });
30
+
31
+ // Title
32
+ const title = blessed.box({
33
+ parent: container,
34
+ top: 0, left: 0, width: '100%', height: 3,
35
+ tags: true,
36
+ border: { type: 'line' },
37
+ style: { border: { fg: 'blue' } },
38
+ align: 'center',
39
+ valign: 'middle',
40
+ content: ' {bold}{cyan-fg}▲ Run Evals{/cyan-fg}{/bold} '
41
+ });
42
+
43
+ // Eval file list
44
+ const list = blessed.list({
45
+ parent: container,
46
+ top: 3, left: 2,
47
+ width: '50%-2', height: '100%-6',
48
+ tags: true, keys: true, vi: true, mouse: true,
49
+ border: { type: 'line' },
50
+ style: {
51
+ border: { fg: 'blue' },
52
+ selected: { bg: '#1a3a5c', bold: true },
53
+ item: { fg: 'white' }
54
+ },
55
+ label: ' Eval Files '
56
+ });
57
+
58
+ // Results pane
59
+ const resultsBox = blessed.box({
60
+ parent: container,
61
+ top: 3, left: '50%',
62
+ width: '50%', height: '100%-6',
63
+ tags: true, scrollable: true, alwaysScroll: true,
64
+ border: { type: 'line' },
65
+ style: { border: { fg: 'blue' } },
66
+ label: ' Results ',
67
+ padding: { left: 1, right: 1 }
68
+ });
69
+
70
+ const statusBar = blessed.box({
71
+ parent: container,
72
+ bottom: 2, left: 0, width: '100%', height: 1,
73
+ tags: true,
74
+ content: ''
75
+ });
76
+
77
+ setFooter(
78
+ ' {cyan-fg}↑↓{/cyan-fg} select {cyan-fg}Enter{/cyan-fg} run {cyan-fg}r{/cyan-fg} record {cyan-fg}p{/cyan-fg} replay {cyan-fg}b{/cyan-fg} back'
79
+ );
80
+
81
+ let evalFiles = [];
82
+ let running = false;
83
+
84
+ function loadFiles() {
85
+ evalFiles = findEvalFiles(config);
86
+ if (evalFiles.length === 0) {
87
+ list.setItems([' {#888888-fg}No eval files found{/#888888-fg}']);
88
+ } else {
89
+ list.setItems(evalFiles.map(f => {
90
+ const name = basename(f);
91
+ return ` ${name}`;
92
+ }));
93
+ }
94
+ screen.render();
95
+ }
96
+
97
+ async function runSelected(mode) {
98
+ if (running || evalFiles.length === 0) return;
99
+ const idx = list.selected;
100
+ if (idx < 0 || idx >= evalFiles.length) return;
101
+
102
+ const evalPath = evalFiles[idx];
103
+ const fileName = basename(evalPath);
104
+
105
+ running = true;
106
+ statusBar.setContent(` {yellow-fg}⟳ Running ${fileName}...{/yellow-fg}`);
107
+ resultsBox.setContent('Running...');
108
+ screen.render();
109
+
110
+ try {
111
+ const { runEvalSuite } = await import('../runner/index.js');
112
+ const agentConfig = config?.agent ?? {};
113
+
114
+ if (!agentConfig.endpoint) {
115
+ resultsBox.setContent(
116
+ '{red-fg}No agent.endpoint configured.{/red-fg}\n\n' +
117
+ 'Add to forge.config.json:\n' +
118
+ '{\n "agent": {\n "endpoint": "http://localhost:8001/agent-api/chat-sync"\n }\n}'
119
+ );
120
+ statusBar.setContent(' {red-fg}✗ Configuration error{/red-fg}');
121
+ screen.render();
122
+ return;
123
+ }
124
+
125
+ const method = agentConfig.method ?? 'POST';
126
+ const headers = { 'Content-Type': 'application/json', ...(agentConfig.headers ?? {}) };
127
+ const inputField = agentConfig.inputField ?? 'message';
128
+ const outputField = agentConfig.outputField ?? 'text';
129
+
130
+ const agentFn = async (message) => {
131
+ const t0 = Date.now();
132
+ const res = await fetch(agentConfig.endpoint, {
133
+ method, headers, body: JSON.stringify({ [inputField]: message })
134
+ });
135
+ if (!res.ok) throw new Error(`Agent returned ${res.status}`);
136
+ const data = await res.json();
137
+ return { responseText: data[outputField] ?? '', toolsCalled: data.toolsCalled ?? [], latencyMs: Date.now() - t0 };
138
+ };
139
+
140
+ const fixturesDir = resolve(process.cwd(), config?.fixtures?.dir ?? '.forge-fixtures');
141
+ const ttlDays = config?.fixtures?.ttlDays ?? 30;
142
+ const gates = config?.gates ?? {};
143
+
144
+ const summary = await runEvalSuite(evalPath, agentFn, {
145
+ record: mode === 'record',
146
+ replay: mode === 'replay',
147
+ fixturesDir,
148
+ ttlDays,
149
+ gates,
150
+ });
151
+
152
+ const { total, passed, failed, skipped, passRate } = summary;
153
+ const pct = (passRate * 100).toFixed(1);
154
+ const passIcon = failed === 0 ? '{green-fg}✓{/green-fg}' : '{red-fg}✗{/red-fg}';
155
+
156
+ const lines = [
157
+ `${passIcon} {bold}${passed}/${total} passed (${pct}%){/bold}`,
158
+ skipped > 0 ? `{#888888-fg}${skipped} skipped{/#888888-fg}` : null,
159
+ summary.p95LatencyMs > 0 ? `p95 latency: ${summary.p95LatencyMs}ms` : null,
160
+ '',
161
+ ].filter(l => l !== null);
162
+
163
+ if (summary.gates?.results?.length > 0) {
164
+ lines.push('{bold}Gates:{/bold}');
165
+ for (const r of summary.gates.results) {
166
+ const gi = r.pass ? '{green-fg}✓{/green-fg}' : '{red-fg}✗{/red-fg}';
167
+ lines.push(`${gi} ${r.gate}: ${r.actual} (≥ ${r.threshold})`);
168
+ }
169
+ lines.push('');
170
+ }
171
+
172
+ const failingCases = summary.cases.filter(c => c.status === 'failed');
173
+ if (failingCases.length > 0) {
174
+ lines.push('{bold}Failures:{/bold}');
175
+ for (const f of failingCases) {
176
+ lines.push(`{red-fg}✗{/red-fg} ${f.id ?? '(unnamed)'}: ${f.reason}`);
177
+ }
178
+ } else if (failed === 0) {
179
+ lines.push('{green-fg}All cases passed!{/green-fg}');
180
+ }
181
+
182
+ resultsBox.setContent(lines.join('\n'));
183
+ statusBar.setContent(` ${failed === 0 ? '{green-fg}✓ Passed{/green-fg}' : '{red-fg}✗ Failed{/red-fg}'} ${fileName}`);
184
+ } catch (err) {
185
+ resultsBox.setContent(`{red-fg}Error: ${err.message}{/red-fg}`);
186
+ statusBar.setContent(' {red-fg}✗ Error{/red-fg}');
187
+ } finally {
188
+ running = false;
189
+ screen.render();
190
+ }
191
+ }
192
+
193
+ list.key('enter', () => runSelected('normal'));
194
+ list.key('r', () => runSelected('record'));
195
+ list.key('p', () => runSelected('replay'));
196
+ list.key('b', () => navigate('main-menu'));
197
+
198
+ container.refresh = () => {
199
+ loadFiles();
200
+ };
201
+
202
+ loadFiles();
203
+ list.focus();
204
+ return container;
205
+ }