agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +209 -0
  3. package/lib/agent-registry.js +170 -0
  4. package/lib/api-client.js +792 -0
  5. package/lib/api-loader.js +260 -0
  6. package/lib/auth.d.ts +25 -0
  7. package/lib/auth.js +158 -0
  8. package/lib/checks/check-adapter.js +172 -0
  9. package/lib/checks/compose.js +42 -0
  10. package/lib/checks/content-match.js +14 -0
  11. package/lib/checks/cost-budget.js +11 -0
  12. package/lib/checks/index.js +18 -0
  13. package/lib/checks/json-valid.js +15 -0
  14. package/lib/checks/latency.js +11 -0
  15. package/lib/checks/length-bounds.js +17 -0
  16. package/lib/checks/negative-match.js +14 -0
  17. package/lib/checks/no-hallucinated-numbers.js +63 -0
  18. package/lib/checks/non-empty.js +34 -0
  19. package/lib/checks/regex-match.js +12 -0
  20. package/lib/checks/run-checks.js +84 -0
  21. package/lib/checks/schema-match.js +26 -0
  22. package/lib/checks/tool-call-count.js +16 -0
  23. package/lib/checks/tool-selection.js +34 -0
  24. package/lib/checks/types.js +45 -0
  25. package/lib/comparison/compare.js +86 -0
  26. package/lib/comparison/format.js +104 -0
  27. package/lib/comparison/index.js +6 -0
  28. package/lib/comparison/statistics.js +59 -0
  29. package/lib/comparison/types.js +41 -0
  30. package/lib/config-schema.js +200 -0
  31. package/lib/config.d.ts +66 -0
  32. package/lib/conversation-store.d.ts +77 -0
  33. package/lib/conversation-store.js +443 -0
  34. package/lib/db.d.ts +6 -0
  35. package/lib/db.js +1112 -0
  36. package/lib/dep-check.js +99 -0
  37. package/lib/drift-background.js +61 -0
  38. package/lib/drift-monitor.js +187 -0
  39. package/lib/eval-runner.js +566 -0
  40. package/lib/fixtures/fixture-store.js +161 -0
  41. package/lib/fixtures/index.js +11 -0
  42. package/lib/forge-engine.js +982 -0
  43. package/lib/forge-eval-generator.js +417 -0
  44. package/lib/forge-file-writer.js +386 -0
  45. package/lib/forge-service-client.js +190 -0
  46. package/lib/forge-service.d.ts +4 -0
  47. package/lib/forge-service.js +655 -0
  48. package/lib/forge-verifier-generator.js +271 -0
  49. package/lib/handlers/admin.js +151 -0
  50. package/lib/handlers/agents.js +229 -0
  51. package/lib/handlers/chat-resume.js +334 -0
  52. package/lib/handlers/chat-sync.js +320 -0
  53. package/lib/handlers/chat.js +320 -0
  54. package/lib/handlers/conversations.js +92 -0
  55. package/lib/handlers/preferences.js +88 -0
  56. package/lib/handlers/tools-list.js +58 -0
  57. package/lib/hitl-engine.d.ts +60 -0
  58. package/lib/hitl-engine.js +261 -0
  59. package/lib/http-utils.js +92 -0
  60. package/lib/index.d.ts +20 -0
  61. package/lib/index.js +141 -0
  62. package/lib/init.js +636 -0
  63. package/lib/manual-entry.js +59 -0
  64. package/lib/mcp-server.js +252 -0
  65. package/lib/output-groups.js +54 -0
  66. package/lib/postgres-store.d.ts +31 -0
  67. package/lib/postgres-store.js +465 -0
  68. package/lib/preference-store.d.ts +47 -0
  69. package/lib/preference-store.js +79 -0
  70. package/lib/prompt-store.d.ts +42 -0
  71. package/lib/prompt-store.js +60 -0
  72. package/lib/rate-limiter.d.ts +30 -0
  73. package/lib/rate-limiter.js +104 -0
  74. package/lib/react-engine.d.ts +110 -0
  75. package/lib/react-engine.js +337 -0
  76. package/lib/runner/cli.js +156 -0
  77. package/lib/runner/cost-estimator.js +71 -0
  78. package/lib/runner/gate.js +46 -0
  79. package/lib/runner/index.js +165 -0
  80. package/lib/sidecar.d.ts +83 -0
  81. package/lib/sidecar.js +161 -0
  82. package/lib/sse.d.ts +15 -0
  83. package/lib/sse.js +30 -0
  84. package/lib/tools-scanner.js +91 -0
  85. package/lib/tui.js +253 -0
  86. package/lib/verifier-report.js +78 -0
  87. package/lib/verifier-runner.js +338 -0
  88. package/lib/verifier-scanner.js +70 -0
  89. package/lib/verifier-worker-pool.js +196 -0
  90. package/lib/views/chat.js +340 -0
  91. package/lib/views/endpoints.js +203 -0
  92. package/lib/views/eval-run.js +206 -0
  93. package/lib/views/forge-agent.js +538 -0
  94. package/lib/views/forge.js +410 -0
  95. package/lib/views/main-menu.js +275 -0
  96. package/lib/views/mediation.js +381 -0
  97. package/lib/views/model-compare.js +430 -0
  98. package/lib/views/model-comparison.js +333 -0
  99. package/lib/views/onboarding.js +470 -0
  100. package/lib/views/performance.js +237 -0
  101. package/lib/views/run-evals.js +205 -0
  102. package/lib/views/settings.js +829 -0
  103. package/lib/views/tools-evals.js +514 -0
  104. package/lib/views/verifier-coverage.js +617 -0
  105. package/lib/workers/verifier-worker.js +52 -0
  106. package/package.json +123 -0
  107. package/widget/forge-chat.js +789 -0
@@ -0,0 +1,430 @@
1
+ /**
2
+ * Model Compare View — side-by-side model spec comparison.
3
+ *
4
+ * Runs the current forge state against two models in parallel (config.models.generation
5
+ * and config.models.secondary) and shows the resulting specs side by side so the user
6
+ * can choose which result to carry forward into the forge dialogue.
7
+ *
8
+ * Key bindings:
9
+ * a / 1 — use Model A result
10
+ * b / 2 — use Model B result
11
+ * m — merge (Model A base, overlay non-null fields from Model B)
12
+ * Escape — cancel, navigate back to forge
13
+ */
14
+
15
+ import blessed from 'blessed';
16
+ import { existsSync, readFileSync } from 'fs';
17
+ import { resolve, dirname } from 'path';
18
+ import { fileURLToPath } from 'url';
19
+ import { forgeStep, createInitialState } from '../forge-engine.js';
20
+ import { resolveModelConfig } from '../api-client.js';
21
+
22
+ const __dirname = dirname(fileURLToPath(import.meta.url));
23
+ const ENV_FILE = resolve(__dirname, '../../.env');
24
+
25
+ // ── Helpers ────────────────────────────────────────────────────────────────
26
+
27
+ /**
28
+ * Read key=value pairs from .env into a plain object.
29
+ * Skips blank lines and comments. Strips surrounding quotes from values.
30
+ *
31
+ * @returns {Record<string, string>}
32
+ */
33
+ function loadEnv() {
34
+ if (!existsSync(ENV_FILE)) return {};
35
+ const lines = readFileSync(ENV_FILE, 'utf-8').split('\n');
36
+ const out = {};
37
+ for (const line of lines) {
38
+ const trimmed = line.trim();
39
+ if (!trimmed || trimmed.startsWith('#')) continue;
40
+ const eqIdx = trimmed.indexOf('=');
41
+ if (eqIdx === -1) continue;
42
+ const key = trimmed.slice(0, eqIdx).trim();
43
+ const val = trimmed.slice(eqIdx + 1).trim().replace(/^["']|["']$/g, '');
44
+ out[key] = val;
45
+ }
46
+ return out;
47
+ }
48
+
49
+ /**
50
+ * Deep-merge two spec objects: use specA as the base and overlay any non-null,
51
+ * non-undefined top-level field from specB.
52
+ *
53
+ * @param {object} specA
54
+ * @param {object} specB
55
+ * @returns {object}
56
+ */
57
+ function mergeSpecs(specA, specB) {
58
+ const merged = { ...specA };
59
+ for (const [key, val] of Object.entries(specB)) {
60
+ if (val !== null && val !== undefined) {
61
+ merged[key] = val;
62
+ }
63
+ }
64
+ return merged;
65
+ }
66
+
67
+ /**
68
+ * Format a forgeStep result for display in a panel.
69
+ * Shows the assistant text (if any) followed by the spec as pretty-printed JSON.
70
+ *
71
+ * @param {{ assistantText: string, nextState: { spec: object } }} result
72
+ * @returns {string}
73
+ */
74
+ function formatResult(result) {
75
+ const parts = [];
76
+ if (result.assistantText && result.assistantText.trim()) {
77
+ parts.push(result.assistantText.trim());
78
+ parts.push('');
79
+ parts.push('─'.repeat(40));
80
+ parts.push('');
81
+ }
82
+ parts.push('Spec:');
83
+ try {
84
+ parts.push(JSON.stringify(result.nextState.spec, null, 2));
85
+ } catch (_) {
86
+ parts.push('(could not serialise spec)');
87
+ }
88
+ return parts.join('\n');
89
+ }
90
+
91
+ // ── createView ─────────────────────────────────────────────────────────────
92
+
93
+ export function createView({
94
+ screen,
95
+ content,
96
+ config,
97
+ navigate,
98
+ setFooter,
99
+ screenKey,
100
+ openPopup,
101
+ closePopup,
102
+ startService
103
+ }) {
104
+ // ── Root container (fills the content area) ─────────────────────────────
105
+
106
+ const root = blessed.box({
107
+ top: 0, left: 0, width: '100%', height: '100%',
108
+ tags: true
109
+ });
110
+ // Escape/b navigates back immediately.
111
+
112
+ // ── Header row ──────────────────────────────────────────────────────────
113
+
114
+ const header = blessed.box({
115
+ parent: root,
116
+ top: 0, left: 0, width: '100%', height: 1,
117
+ tags: true,
118
+ content: '{bold}{cyan-fg} Model Comparison{/cyan-fg}{/bold}',
119
+ style: { bg: 'black' }
120
+ });
121
+
122
+ // ── Status bar (below header, above panels) ──────────────────────────────
123
+
124
+ const statusBar = blessed.box({
125
+ parent: root,
126
+ top: 1, left: 0, width: '100%', height: 1,
127
+ tags: true,
128
+ content: '{yellow-fg} Initialising…{/yellow-fg}',
129
+ style: { bg: 'black' }
130
+ });
131
+
132
+ // ── Panel labels row ─────────────────────────────────────────────────────
133
+
134
+ const labelRow = blessed.box({
135
+ parent: root,
136
+ top: 2, left: 0, width: '100%', height: 2,
137
+ tags: true,
138
+ style: { bg: 'black' }
139
+ });
140
+
141
+ const labelA = blessed.box({
142
+ parent: labelRow,
143
+ top: 0, left: 0, width: '50%', height: 2,
144
+ tags: true,
145
+ content: '{bold} Model A:{/bold} {cyan-fg}loading…{/cyan-fg}\n {#555555-fg}───────────────────────────────────{/#555555-fg}',
146
+ style: { bg: 'black' }
147
+ });
148
+
149
+ const labelB = blessed.box({
150
+ parent: labelRow,
151
+ top: 0, left: '50%', width: '50%', height: 2,
152
+ tags: true,
153
+ content: '{bold} Model B:{/bold} {cyan-fg}loading…{/cyan-fg}\n {#555555-fg}───────────────────────────────────{/#555555-fg}',
154
+ style: { bg: 'black' }
155
+ });
156
+
157
+ // ── Panel boxes (scrollable) ─────────────────────────────────────────────
158
+
159
+ const panelTop = 4; // header(1) + status(1) + labelRow(2)
160
+ const panelHeight = `100%-${panelTop + 1}`; // leave 1 row for footer
161
+
162
+ const panelA = blessed.scrollablebox({
163
+ parent: root,
164
+ top: panelTop, left: 0, width: '50%', height: panelHeight,
165
+ border: { type: 'line', fg: '#333333' },
166
+ scrollable: true,
167
+ alwaysScroll: true,
168
+ keys: true,
169
+ vi: true,
170
+ tags: false,
171
+ content: '',
172
+ scrollbar: { ch: '│', style: { fg: '#555555' } },
173
+ style: { bg: 'black', fg: 'white' }
174
+ });
175
+
176
+ const panelB = blessed.scrollablebox({
177
+ parent: root,
178
+ top: panelTop, left: '50%', width: '50%', height: panelHeight,
179
+ border: { type: 'line', fg: '#333333' },
180
+ scrollable: true,
181
+ alwaysScroll: true,
182
+ keys: true,
183
+ vi: true,
184
+ tags: false,
185
+ content: '',
186
+ scrollbar: { ch: '│', style: { fg: '#555555' } },
187
+ style: { bg: 'black', fg: 'white' }
188
+ });
189
+
190
+ // ── State ────────────────────────────────────────────────────────────────
191
+
192
+ let modelAResult = null;
193
+ let modelBResult = null;
194
+ let modelAConfig = null;
195
+ let modelBConfig = null;
196
+ let forgeState = null;
197
+ let ready = false; // true once both results have arrived
198
+
199
+ // ── Helpers ──────────────────────────────────────────────────────────────
200
+
201
+ function setStatus(text) {
202
+ statusBar.setContent(text);
203
+ screen.render();
204
+ }
205
+
206
+ function updateLabelA(modelName) {
207
+ labelA.setContent(
208
+ `{bold} Model A:{/bold} {cyan-fg}${modelName}{/cyan-fg}\n {#555555-fg}───────────────────────────────────{/#555555-fg}`
209
+ );
210
+ }
211
+
212
+ function updateLabelB(modelName) {
213
+ labelB.setContent(
214
+ `{bold} Model B:{/bold} {cyan-fg}${modelName}{/cyan-fg}\n {#555555-fg}───────────────────────────────────{/#555555-fg}`
215
+ );
216
+ }
217
+
218
+ function showFooterReady() {
219
+ setFooter(
220
+ ' {bold}a{/bold}/{bold}1{/bold} use A ' +
221
+ '{bold}2{/bold} use B ' +
222
+ '{bold}m{/bold} merge ' +
223
+ '{bold}Escape{/bold} back'
224
+ );
225
+ }
226
+
227
+ function showFooterWaiting() {
228
+ setFooter(' {yellow-fg}Running comparison…{/yellow-fg} {bold}Escape{/bold} back');
229
+ }
230
+
231
+ // ── DB recording ─────────────────────────────────────────────────────────
232
+
233
+ async function recordComparison(chosenModel) {
234
+ try {
235
+ const dbPath = resolve(process.cwd(), config?.dbPath || 'forge.db');
236
+ const { getDb, insertModelComparison } = await import('../db.js');
237
+ const db = getDb(dbPath);
238
+ insertModelComparison(db, {
239
+ tool_name: forgeState?.spec?.name || 'unknown',
240
+ model_a: modelAConfig?.model || 'unknown',
241
+ model_b: modelBConfig?.model || 'unknown',
242
+ spec_a_json: modelAResult ? JSON.stringify(modelAResult.nextState.spec) : null,
243
+ spec_b_json: modelBResult ? JSON.stringify(modelBResult.nextState.spec) : null,
244
+ chosen_model: chosenModel,
245
+ phase: forgeState?.phase || null
246
+ });
247
+ } catch (_) {
248
+ // Non-fatal — DB write failures should not block the workflow.
249
+ }
250
+ }
251
+
252
+ // ── Choice actions ────────────────────────────────────────────────────────
253
+
254
+ async function chooseA() {
255
+ if (!ready || !modelAResult) return;
256
+ config._chosenSpec = modelAResult.nextState.spec;
257
+ await recordComparison(modelAConfig?.model || 'model_a');
258
+ navigate('forge');
259
+ }
260
+
261
+ async function chooseB() {
262
+ if (!ready || !modelBResult) return;
263
+ config._chosenSpec = modelBResult.nextState.spec;
264
+ await recordComparison(modelBConfig?.model || 'model_b');
265
+ navigate('forge');
266
+ }
267
+
268
+ async function chooseMerge() {
269
+ if (!ready || !modelAResult || !modelBResult) return;
270
+ config._chosenSpec = mergeSpecs(
271
+ modelAResult.nextState.spec,
272
+ modelBResult.nextState.spec
273
+ );
274
+ await recordComparison('merge');
275
+ navigate('forge');
276
+ }
277
+
278
+ // ── Key bindings ─────────────────────────────────────────────────────────
279
+
280
+ screenKey(['a', '1'], () => { chooseA(); });
281
+ screenKey(['2'], () => { chooseB(); });
282
+ screenKey(['m'], () => { chooseMerge(); });
283
+ screenKey(['escape'], () => { navigate('forge'); });
284
+
285
+ // Also handle scroll between panels with Tab
286
+ screenKey(['tab'], () => {
287
+ if (screen.focused === panelA) {
288
+ panelB.focus();
289
+ } else {
290
+ panelA.focus();
291
+ }
292
+ screen.render();
293
+ });
294
+
295
+ // ── Main async init (deferred to avoid blocking render) ──────────────────
296
+
297
+ setImmediate(async () => {
298
+ // 1. Load environment
299
+ const env = loadEnv();
300
+
301
+ // 2. Resolve model configs
302
+ modelAConfig = resolveModelConfig(config, env, 'generation');
303
+ modelBConfig = resolveModelConfig(config, env, 'secondary');
304
+
305
+ // 3. Guard: no secondary model
306
+ if (!modelBConfig.model) {
307
+ updateLabelA(modelAConfig.model || 'unknown');
308
+ updateLabelB('not configured');
309
+ panelA.setContent('');
310
+ panelB.setContent(
311
+ 'No secondary model configured.\n\n' +
312
+ 'Set models.secondary in Settings (option 1 → secondary role).'
313
+ );
314
+ setStatus('{red-fg} No secondary model configured.{/red-fg}');
315
+ setFooter(' {bold}Escape{/bold} back');
316
+ screen.render();
317
+ return;
318
+ }
319
+
320
+ // 4. Update labels with resolved model names
321
+ updateLabelA(modelAConfig.model || 'unknown');
322
+ updateLabelB(modelBConfig.model || 'unknown');
323
+ screen.render();
324
+
325
+ // 5. Determine forge state
326
+ forgeState = config._forgeState || createInitialState();
327
+ if (!config._forgeState) {
328
+ // No live state — inject a synthetic user input so the explore phase
329
+ // has something to work with during a standalone comparison test.
330
+ forgeState = {
331
+ ...forgeState,
332
+ messages: [{ role: 'user', content: config._forgeInput || 'test comparison' }]
333
+ };
334
+ }
335
+
336
+ const userInput = config._forgeInput || null;
337
+
338
+ // 6. Show running status and run both models in parallel
339
+ setStatus('{yellow-fg} Running comparison against both models…{/yellow-fg}');
340
+ showFooterWaiting();
341
+
342
+ let errorA = null;
343
+ let errorB = null;
344
+
345
+ [modelAResult, modelBResult] = await Promise.all([
346
+ forgeStep({
347
+ state: forgeState,
348
+ userInput,
349
+ modelConfig: modelAConfig,
350
+ existingTools: [],
351
+ projectConfig: config,
352
+ projectRoot: process.cwd()
353
+ }).catch((err) => {
354
+ errorA = err;
355
+ return null;
356
+ }),
357
+ forgeStep({
358
+ state: forgeState,
359
+ userInput,
360
+ modelConfig: modelBConfig,
361
+ existingTools: [],
362
+ projectConfig: config,
363
+ projectRoot: process.cwd()
364
+ }).catch((err) => {
365
+ errorB = err;
366
+ return null;
367
+ })
368
+ ]);
369
+
370
+ // 7. Populate panels with results (or error messages)
371
+ if (errorA || !modelAResult) {
372
+ panelA.setContent(
373
+ `Error calling ${modelAConfig.model}:\n\n${errorA?.message || 'unknown error'}`
374
+ );
375
+ } else {
376
+ panelA.setContent(formatResult(modelAResult));
377
+ }
378
+
379
+ if (errorB || !modelBResult) {
380
+ panelB.setContent(
381
+ `Error calling ${modelBConfig.model}:\n\n${errorB?.message || 'unknown error'}`
382
+ );
383
+ } else {
384
+ panelB.setContent(formatResult(modelBResult));
385
+ }
386
+
387
+ // 8. Mark ready and update status/footer
388
+ const readyA = !!modelAResult;
389
+ const readyB = !!modelBResult;
390
+ ready = readyA || readyB;
391
+
392
+ if (readyA && readyB) {
393
+ setStatus('{green-fg} Done.{/green-fg} Pick a result or merge.');
394
+ setFooter(
395
+ ' {bold}a{/bold}/{bold}1{/bold} use A ' +
396
+ '{bold}b{/bold}/{bold}2{/bold} use B ' +
397
+ '{bold}m{/bold} merge ' +
398
+ '{bold}Escape{/bold} back'
399
+ );
400
+ } else if (readyA) {
401
+ const failures = [errorA && 'A', errorB && 'B'].filter(Boolean).join(', ');
402
+ setStatus(`{yellow-fg} Model(s) failed: ${failures}. Showing partial results.{/yellow-fg}`);
403
+ setFooter(
404
+ ' {bold}a{/bold}/{bold}1{/bold} use A ' +
405
+ '{#555555-fg}b/2 use B (unavailable){/#555555-fg} ' +
406
+ '{#555555-fg}m merge (unavailable){/#555555-fg} ' +
407
+ '{bold}Escape{/bold} back'
408
+ );
409
+ } else if (readyB) {
410
+ const failures = [errorA && 'A', errorB && 'B'].filter(Boolean).join(', ');
411
+ setStatus(`{yellow-fg} Model(s) failed: ${failures}. Showing partial results.{/yellow-fg}`);
412
+ setFooter(
413
+ '{#555555-fg} a/1 use A (unavailable){/#555555-fg} ' +
414
+ '{bold}b{/bold}/{bold}2{/bold} use B ' +
415
+ '{#555555-fg}m merge (unavailable){/#555555-fg} ' +
416
+ '{bold}Escape{/bold} back'
417
+ );
418
+ } else {
419
+ const failures = [errorA && 'A', errorB && 'B'].filter(Boolean).join(', ');
420
+ setStatus(`{red-fg} Error in model(s): ${failures}. Check API keys in Settings.{/red-fg}`);
421
+ setFooter(' {bold}Escape{/bold} back');
422
+ }
423
+
424
+ panelA.focus();
425
+ screen.render();
426
+ });
427
+
428
+ // Return the root node so tui.js can track it as the active view box.
429
+ return root;
430
+ }