npm - agent-tool-forge - Versions diffs - 0.3.0 - Mend

agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

package/LICENSE +21 -0
package/README.md +209 -0
package/lib/agent-registry.js +170 -0
package/lib/api-client.js +792 -0
package/lib/api-loader.js +260 -0
package/lib/auth.d.ts +25 -0
package/lib/auth.js +158 -0
package/lib/checks/check-adapter.js +172 -0
package/lib/checks/compose.js +42 -0
package/lib/checks/content-match.js +14 -0
package/lib/checks/cost-budget.js +11 -0
package/lib/checks/index.js +18 -0
package/lib/checks/json-valid.js +15 -0
package/lib/checks/latency.js +11 -0
package/lib/checks/length-bounds.js +17 -0
package/lib/checks/negative-match.js +14 -0
package/lib/checks/no-hallucinated-numbers.js +63 -0
package/lib/checks/non-empty.js +34 -0
package/lib/checks/regex-match.js +12 -0
package/lib/checks/run-checks.js +84 -0
package/lib/checks/schema-match.js +26 -0
package/lib/checks/tool-call-count.js +16 -0
package/lib/checks/tool-selection.js +34 -0
package/lib/checks/types.js +45 -0
package/lib/comparison/compare.js +86 -0
package/lib/comparison/format.js +104 -0
package/lib/comparison/index.js +6 -0
package/lib/comparison/statistics.js +59 -0
package/lib/comparison/types.js +41 -0
package/lib/config-schema.js +200 -0
package/lib/config.d.ts +66 -0
package/lib/conversation-store.d.ts +77 -0
package/lib/conversation-store.js +443 -0
package/lib/db.d.ts +6 -0
package/lib/db.js +1112 -0
package/lib/dep-check.js +99 -0
package/lib/drift-background.js +61 -0
package/lib/drift-monitor.js +187 -0
package/lib/eval-runner.js +566 -0
package/lib/fixtures/fixture-store.js +161 -0
package/lib/fixtures/index.js +11 -0
package/lib/forge-engine.js +982 -0
package/lib/forge-eval-generator.js +417 -0
package/lib/forge-file-writer.js +386 -0
package/lib/forge-service-client.js +190 -0
package/lib/forge-service.d.ts +4 -0
package/lib/forge-service.js +655 -0
package/lib/forge-verifier-generator.js +271 -0
package/lib/handlers/admin.js +151 -0
package/lib/handlers/agents.js +229 -0
package/lib/handlers/chat-resume.js +334 -0
package/lib/handlers/chat-sync.js +320 -0
package/lib/handlers/chat.js +320 -0
package/lib/handlers/conversations.js +92 -0
package/lib/handlers/preferences.js +88 -0
package/lib/handlers/tools-list.js +58 -0
package/lib/hitl-engine.d.ts +60 -0
package/lib/hitl-engine.js +261 -0
package/lib/http-utils.js +92 -0
package/lib/index.d.ts +20 -0
package/lib/index.js +141 -0
package/lib/init.js +636 -0
package/lib/manual-entry.js +59 -0
package/lib/mcp-server.js +252 -0
package/lib/output-groups.js +54 -0
package/lib/postgres-store.d.ts +31 -0
package/lib/postgres-store.js +465 -0
package/lib/preference-store.d.ts +47 -0
package/lib/preference-store.js +79 -0
package/lib/prompt-store.d.ts +42 -0
package/lib/prompt-store.js +60 -0
package/lib/rate-limiter.d.ts +30 -0
package/lib/rate-limiter.js +104 -0
package/lib/react-engine.d.ts +110 -0
package/lib/react-engine.js +337 -0
package/lib/runner/cli.js +156 -0
package/lib/runner/cost-estimator.js +71 -0
package/lib/runner/gate.js +46 -0
package/lib/runner/index.js +165 -0
package/lib/sidecar.d.ts +83 -0
package/lib/sidecar.js +161 -0
package/lib/sse.d.ts +15 -0
package/lib/sse.js +30 -0
package/lib/tools-scanner.js +91 -0
package/lib/tui.js +253 -0
package/lib/verifier-report.js +78 -0
package/lib/verifier-runner.js +338 -0
package/lib/verifier-scanner.js +70 -0
package/lib/verifier-worker-pool.js +196 -0
package/lib/views/chat.js +340 -0
package/lib/views/endpoints.js +203 -0
package/lib/views/eval-run.js +206 -0
package/lib/views/forge-agent.js +538 -0
package/lib/views/forge.js +410 -0
package/lib/views/main-menu.js +275 -0
package/lib/views/mediation.js +381 -0
package/lib/views/model-compare.js +430 -0
package/lib/views/model-comparison.js +333 -0
package/lib/views/onboarding.js +470 -0
package/lib/views/performance.js +237 -0
package/lib/views/run-evals.js +205 -0
package/lib/views/settings.js +829 -0
package/lib/views/tools-evals.js +514 -0
package/lib/views/verifier-coverage.js +617 -0
package/lib/workers/verifier-worker.js +52 -0
package/package.json +123 -0
package/widget/forge-chat.js +789 -0

package/lib/views/run-evals.js ADDED Viewed

@@ -0,0 +1,205 @@
+/**
+ * Run Evals View — Browse and run eval files directly from the TUI.
+ */
+import blessed from 'blessed';
+import { existsSync, readdirSync } from 'node:fs';
+import { resolve, basename } from 'node:path';
+/**
+ * Find eval files in the configured evalsDir.
+ * @param {object} config
+ * @returns {string[]} - array of file paths
+ */
+function findEvalFiles(config) {
+  const evalsDir = resolve(process.cwd(), config?.project?.evalsDir || 'docs/examples');
+  if (!existsSync(evalsDir)) return [];
+  try {
+    return readdirSync(evalsDir)
+      .filter(f => f.endsWith('.golden.json') || f.endsWith('.labeled.json'))
+      .map(f => resolve(evalsDir, f));
+  } catch {
+    return [];
+  }
+}
+export function createView({ screen, content, config, navigate, setFooter }) {
+  const container = blessed.box({
+    top: 0, left: 0, width: '100%', height: '100%', tags: true
+  });
+  // Title
+  const title = blessed.box({
+    parent: container,
+    top: 0, left: 0, width: '100%', height: 3,
+    tags: true,
+    border: { type: 'line' },
+    style: { border: { fg: 'blue' } },
+    align: 'center',
+    valign: 'middle',
+    content: ' {bold}{cyan-fg}▲  Run Evals{/cyan-fg}{/bold} '
+  });
+  // Eval file list
+  const list = blessed.list({
+    parent: container,
+    top: 3, left: 2,
+    width: '50%-2', height: '100%-6',
+    tags: true, keys: true, vi: true, mouse: true,
+    border: { type: 'line' },
+    style: {
+      border: { fg: 'blue' },
+      selected: { bg: '#1a3a5c', bold: true },
+      item: { fg: 'white' }
+    },
+    label: ' Eval Files '
+  });
+  // Results pane
+  const resultsBox = blessed.box({
+    parent: container,
+    top: 3, left: '50%',
+    width: '50%', height: '100%-6',
+    tags: true, scrollable: true, alwaysScroll: true,
+    border: { type: 'line' },
+    style: { border: { fg: 'blue' } },
+    label: ' Results ',
+    padding: { left: 1, right: 1 }
+  });
+  const statusBar = blessed.box({
+    parent: container,
+    bottom: 2, left: 0, width: '100%', height: 1,
+    tags: true,
+    content: ''
+  });
+  setFooter(
+    ' {cyan-fg}↑↓{/cyan-fg} select  {cyan-fg}Enter{/cyan-fg} run  {cyan-fg}r{/cyan-fg} record  {cyan-fg}p{/cyan-fg} replay  {cyan-fg}b{/cyan-fg} back'
+  );
+  let evalFiles = [];
+  let running = false;
+  function loadFiles() {
+    evalFiles = findEvalFiles(config);
+    if (evalFiles.length === 0) {
+      list.setItems([' {#888888-fg}No eval files found{/#888888-fg}']);
+    } else {
+      list.setItems(evalFiles.map(f => {
+        const name = basename(f);
+        return ` ${name}`;
+      }));
+    }
+    screen.render();
+  }
+  async function runSelected(mode) {
+    if (running || evalFiles.length === 0) return;
+    const idx = list.selected;
+    if (idx < 0 || idx >= evalFiles.length) return;
+    const evalPath = evalFiles[idx];
+    const fileName = basename(evalPath);
+    running = true;
+    statusBar.setContent(` {yellow-fg}⟳ Running ${fileName}...{/yellow-fg}`);
+    resultsBox.setContent('Running...');
+    screen.render();
+    try {
+      const { runEvalSuite } = await import('../runner/index.js');
+      const agentConfig = config?.agent ?? {};
+      if (!agentConfig.endpoint) {
+        resultsBox.setContent(
+          '{red-fg}No agent.endpoint configured.{/red-fg}\n\n' +
+          'Add to forge.config.json:\n' +
+          '{\n  "agent": {\n    "endpoint": "http://localhost:8001/agent-api/chat-sync"\n  }\n}'
+        );
+        statusBar.setContent(' {red-fg}✗ Configuration error{/red-fg}');
+        screen.render();
+        return;
+      }
+      const method = agentConfig.method ?? 'POST';
+      const headers = { 'Content-Type': 'application/json', ...(agentConfig.headers ?? {}) };
+      const inputField = agentConfig.inputField ?? 'message';
+      const outputField = agentConfig.outputField ?? 'text';
+      const agentFn = async (message) => {
+        const t0 = Date.now();
+        const res = await fetch(agentConfig.endpoint, {
+          method, headers, body: JSON.stringify({ [inputField]: message })
+        });
+        if (!res.ok) throw new Error(`Agent returned ${res.status}`);
+        const data = await res.json();
+        return { responseText: data[outputField] ?? '', toolsCalled: data.toolsCalled ?? [], latencyMs: Date.now() - t0 };
+      };
+      const fixturesDir = resolve(process.cwd(), config?.fixtures?.dir ?? '.forge-fixtures');
+      const ttlDays = config?.fixtures?.ttlDays ?? 30;
+      const gates = config?.gates ?? {};
+      const summary = await runEvalSuite(evalPath, agentFn, {
+        record: mode === 'record',
+        replay: mode === 'replay',
+        fixturesDir,
+        ttlDays,
+        gates,
+      });
+      const { total, passed, failed, skipped, passRate } = summary;
+      const pct = (passRate * 100).toFixed(1);
+      const passIcon = failed === 0 ? '{green-fg}✓{/green-fg}' : '{red-fg}✗{/red-fg}';
+      const lines = [
+        `${passIcon} {bold}${passed}/${total} passed (${pct}%){/bold}`,
+        skipped > 0 ? `{#888888-fg}${skipped} skipped{/#888888-fg}` : null,
+        summary.p95LatencyMs > 0 ? `p95 latency: ${summary.p95LatencyMs}ms` : null,
+        '',
+      ].filter(l => l !== null);
+      if (summary.gates?.results?.length > 0) {
+        lines.push('{bold}Gates:{/bold}');
+        for (const r of summary.gates.results) {
+          const gi = r.pass ? '{green-fg}✓{/green-fg}' : '{red-fg}✗{/red-fg}';
+          lines.push(`${gi} ${r.gate}: ${r.actual} (≥ ${r.threshold})`);
+        }
+        lines.push('');
+      }
+      const failingCases = summary.cases.filter(c => c.status === 'failed');
+      if (failingCases.length > 0) {
+        lines.push('{bold}Failures:{/bold}');
+        for (const f of failingCases) {
+          lines.push(`{red-fg}✗{/red-fg} ${f.id ?? '(unnamed)'}: ${f.reason}`);
+        }
+      } else if (failed === 0) {
+        lines.push('{green-fg}All cases passed!{/green-fg}');
+      }
+      resultsBox.setContent(lines.join('\n'));
+      statusBar.setContent(` ${failed === 0 ? '{green-fg}✓ Passed{/green-fg}' : '{red-fg}✗ Failed{/red-fg}'}  ${fileName}`);
+    } catch (err) {
+      resultsBox.setContent(`{red-fg}Error: ${err.message}{/red-fg}`);
+      statusBar.setContent(' {red-fg}✗ Error{/red-fg}');
+    } finally {
+      running = false;
+      screen.render();
+    }
+  }
+  list.key('enter', () => runSelected('normal'));
+  list.key('r', () => runSelected('record'));
+  list.key('p', () => runSelected('replay'));
+  list.key('b', () => navigate('main-menu'));
+  container.refresh = () => {
+    loadFiles();
+  };
+  loadFiles();
+  list.focus();
+  return container;
+}