agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +209 -0
  3. package/lib/agent-registry.js +170 -0
  4. package/lib/api-client.js +792 -0
  5. package/lib/api-loader.js +260 -0
  6. package/lib/auth.d.ts +25 -0
  7. package/lib/auth.js +158 -0
  8. package/lib/checks/check-adapter.js +172 -0
  9. package/lib/checks/compose.js +42 -0
  10. package/lib/checks/content-match.js +14 -0
  11. package/lib/checks/cost-budget.js +11 -0
  12. package/lib/checks/index.js +18 -0
  13. package/lib/checks/json-valid.js +15 -0
  14. package/lib/checks/latency.js +11 -0
  15. package/lib/checks/length-bounds.js +17 -0
  16. package/lib/checks/negative-match.js +14 -0
  17. package/lib/checks/no-hallucinated-numbers.js +63 -0
  18. package/lib/checks/non-empty.js +34 -0
  19. package/lib/checks/regex-match.js +12 -0
  20. package/lib/checks/run-checks.js +84 -0
  21. package/lib/checks/schema-match.js +26 -0
  22. package/lib/checks/tool-call-count.js +16 -0
  23. package/lib/checks/tool-selection.js +34 -0
  24. package/lib/checks/types.js +45 -0
  25. package/lib/comparison/compare.js +86 -0
  26. package/lib/comparison/format.js +104 -0
  27. package/lib/comparison/index.js +6 -0
  28. package/lib/comparison/statistics.js +59 -0
  29. package/lib/comparison/types.js +41 -0
  30. package/lib/config-schema.js +200 -0
  31. package/lib/config.d.ts +66 -0
  32. package/lib/conversation-store.d.ts +77 -0
  33. package/lib/conversation-store.js +443 -0
  34. package/lib/db.d.ts +6 -0
  35. package/lib/db.js +1112 -0
  36. package/lib/dep-check.js +99 -0
  37. package/lib/drift-background.js +61 -0
  38. package/lib/drift-monitor.js +187 -0
  39. package/lib/eval-runner.js +566 -0
  40. package/lib/fixtures/fixture-store.js +161 -0
  41. package/lib/fixtures/index.js +11 -0
  42. package/lib/forge-engine.js +982 -0
  43. package/lib/forge-eval-generator.js +417 -0
  44. package/lib/forge-file-writer.js +386 -0
  45. package/lib/forge-service-client.js +190 -0
  46. package/lib/forge-service.d.ts +4 -0
  47. package/lib/forge-service.js +655 -0
  48. package/lib/forge-verifier-generator.js +271 -0
  49. package/lib/handlers/admin.js +151 -0
  50. package/lib/handlers/agents.js +229 -0
  51. package/lib/handlers/chat-resume.js +334 -0
  52. package/lib/handlers/chat-sync.js +320 -0
  53. package/lib/handlers/chat.js +320 -0
  54. package/lib/handlers/conversations.js +92 -0
  55. package/lib/handlers/preferences.js +88 -0
  56. package/lib/handlers/tools-list.js +58 -0
  57. package/lib/hitl-engine.d.ts +60 -0
  58. package/lib/hitl-engine.js +261 -0
  59. package/lib/http-utils.js +92 -0
  60. package/lib/index.d.ts +20 -0
  61. package/lib/index.js +141 -0
  62. package/lib/init.js +636 -0
  63. package/lib/manual-entry.js +59 -0
  64. package/lib/mcp-server.js +252 -0
  65. package/lib/output-groups.js +54 -0
  66. package/lib/postgres-store.d.ts +31 -0
  67. package/lib/postgres-store.js +465 -0
  68. package/lib/preference-store.d.ts +47 -0
  69. package/lib/preference-store.js +79 -0
  70. package/lib/prompt-store.d.ts +42 -0
  71. package/lib/prompt-store.js +60 -0
  72. package/lib/rate-limiter.d.ts +30 -0
  73. package/lib/rate-limiter.js +104 -0
  74. package/lib/react-engine.d.ts +110 -0
  75. package/lib/react-engine.js +337 -0
  76. package/lib/runner/cli.js +156 -0
  77. package/lib/runner/cost-estimator.js +71 -0
  78. package/lib/runner/gate.js +46 -0
  79. package/lib/runner/index.js +165 -0
  80. package/lib/sidecar.d.ts +83 -0
  81. package/lib/sidecar.js +161 -0
  82. package/lib/sse.d.ts +15 -0
  83. package/lib/sse.js +30 -0
  84. package/lib/tools-scanner.js +91 -0
  85. package/lib/tui.js +253 -0
  86. package/lib/verifier-report.js +78 -0
  87. package/lib/verifier-runner.js +338 -0
  88. package/lib/verifier-scanner.js +70 -0
  89. package/lib/verifier-worker-pool.js +196 -0
  90. package/lib/views/chat.js +340 -0
  91. package/lib/views/endpoints.js +203 -0
  92. package/lib/views/eval-run.js +206 -0
  93. package/lib/views/forge-agent.js +538 -0
  94. package/lib/views/forge.js +410 -0
  95. package/lib/views/main-menu.js +275 -0
  96. package/lib/views/mediation.js +381 -0
  97. package/lib/views/model-compare.js +430 -0
  98. package/lib/views/model-comparison.js +333 -0
  99. package/lib/views/onboarding.js +470 -0
  100. package/lib/views/performance.js +237 -0
  101. package/lib/views/run-evals.js +205 -0
  102. package/lib/views/settings.js +829 -0
  103. package/lib/views/tools-evals.js +514 -0
  104. package/lib/views/verifier-coverage.js +617 -0
  105. package/lib/workers/verifier-worker.js +52 -0
  106. package/package.json +123 -0
  107. package/widget/forge-chat.js +789 -0
@@ -0,0 +1,156 @@
1
+ /**
2
+ * CLI handler for `node lib/index.js run`.
3
+ *
4
+ * Usage:
5
+ * node lib/index.js run --eval <path> [--record] [--replay] [--suite <name>]
6
+ */
7
+
8
+ import { readFileSync, existsSync } from 'node:fs';
9
+ import { resolve } from 'node:path';
10
+ import { runEvalSuite } from './index.js';
11
+
12
+ function parseArgs(args) {
13
+ const opts = { record: false, replay: false, evalPath: null, suite: null };
14
+ for (let i = 0; i < args.length; i++) {
15
+ if (args[i] === '--eval' && args[i + 1]) { opts.evalPath = args[++i]; continue; }
16
+ if (args[i] === '--record') { opts.record = true; continue; }
17
+ if (args[i] === '--replay') { opts.replay = true; continue; }
18
+ if (args[i] === '--suite' && args[i + 1]) { opts.suite = args[++i]; continue; }
19
+ }
20
+ return opts;
21
+ }
22
+
23
+ function loadConfig() {
24
+ const configPath = resolve(process.cwd(), 'forge.config.json');
25
+ if (!existsSync(configPath)) return {};
26
+ try {
27
+ return JSON.parse(readFileSync(configPath, 'utf8'));
28
+ } catch (err) {
29
+ console.error(`Warning: forge.config.json is invalid JSON: ${err.message}`);
30
+ return {};
31
+ }
32
+ }
33
+
34
+ async function buildAgentFn(config) {
35
+ const agentConfig = config.agent ?? {};
36
+ const endpoint = agentConfig.endpoint;
37
+ if (!endpoint) {
38
+ throw new Error('No agent.endpoint configured in forge.config.json.\nAdd: { "agent": { "endpoint": "http://localhost:8001/agent-api/chat-sync" } }');
39
+ }
40
+
41
+ const method = agentConfig.method ?? 'POST';
42
+ const headers = { 'Content-Type': 'application/json', ...(agentConfig.headers ?? {}) };
43
+ const inputField = agentConfig.inputField ?? 'message';
44
+ const outputField = agentConfig.outputField ?? 'text';
45
+
46
+ return async (message) => {
47
+ const t0 = Date.now();
48
+ const body = JSON.stringify({ [inputField]: message });
49
+ let res;
50
+ try {
51
+ res = await fetch(endpoint, { method, headers, body });
52
+ } catch (err) {
53
+ throw new Error(`Agent request failed: ${err.message}`);
54
+ }
55
+ if (!res.ok) throw new Error(`Agent returned ${res.status}`);
56
+ let data;
57
+ try {
58
+ data = await res.json();
59
+ } catch {
60
+ throw new Error(`Agent at ${endpoint} returned non-JSON response (status ${res.status})`);
61
+ }
62
+ const latencyMs = Date.now() - t0;
63
+ return {
64
+ responseText: data[outputField] ?? '',
65
+ toolsCalled: data.toolsCalled ?? [],
66
+ latencyMs,
67
+ };
68
+ };
69
+ }
70
+
71
+ export async function runCli(args) {
72
+ const opts = parseArgs(args);
73
+
74
+ if (!opts.evalPath) {
75
+ console.error('Usage: node lib/index.js run --eval <path> [--record] [--replay] [--suite <name>]');
76
+ process.exit(1);
77
+ }
78
+
79
+ const evalPath = resolve(process.cwd(), opts.evalPath);
80
+ if (!existsSync(evalPath)) {
81
+ console.error(`Eval file not found: ${evalPath}`);
82
+ process.exit(1);
83
+ }
84
+
85
+ const config = loadConfig();
86
+ const gates = config.gates ?? {};
87
+ const fixturesDir = resolve(process.cwd(), config.fixtures?.dir ?? '.forge-fixtures');
88
+ const ttlDays = config.fixtures?.ttlDays ?? 30;
89
+
90
+ console.log(`\nRunning evals: ${opts.evalPath}`);
91
+ if (opts.record) console.log(' [record mode] Saving fixtures');
92
+ if (opts.replay) console.log(' [replay mode] Using cached fixtures where available');
93
+
94
+ let agentFn;
95
+ try {
96
+ agentFn = await buildAgentFn(config);
97
+ } catch (err) {
98
+ console.error(`\nConfiguration error: ${err.message}`);
99
+ process.exit(1);
100
+ }
101
+
102
+ let summary;
103
+ try {
104
+ summary = await runEvalSuite(evalPath, agentFn, {
105
+ record: opts.record,
106
+ replay: opts.replay,
107
+ fixturesDir,
108
+ ttlDays,
109
+ gates,
110
+ suiteName: opts.suite,
111
+ });
112
+ } catch (err) {
113
+ console.error(`\nEval run failed: ${err.message}`);
114
+ process.exit(1);
115
+ }
116
+
117
+ // Print results
118
+ const { total, passed, failed, skipped, passRate, p95LatencyMs, totalCost, suiteName } = summary;
119
+ const ran = passed + failed;
120
+ const passRatePct = (passRate * 100).toFixed(1);
121
+ const icon = failed === 0 ? '✓' : '✗';
122
+
123
+ console.log(`\n${suiteName ? `[${suiteName}] ` : ''}${icon} ${passed}/${ran} passed (${passRatePct}%)` +
124
+ (skipped > 0 ? `, ${skipped} skipped` : '') +
125
+ (p95LatencyMs > 0 ? `, p95 latency: ${p95LatencyMs}ms` : '') +
126
+ (totalCost > 0 ? `, est. cost: $${totalCost.toFixed(6)}` : ''));
127
+
128
+ // Print failing cases (shown regardless of gate outcome)
129
+ const failures = summary.cases.filter(c => c.status === 'failed');
130
+ if (failures.length > 0) {
131
+ console.log('\nFailing cases:');
132
+ for (const f of failures) {
133
+ console.log(` ✗ ${f.id ?? '(unnamed)'}: ${f.reason}`);
134
+ }
135
+ }
136
+
137
+ if (summary.gates) {
138
+ console.log('\nGate results:');
139
+ for (const r of summary.gates.results) {
140
+ const gateIcon = r.pass ? ' ✓' : ' ✗';
141
+ console.log(`${gateIcon} ${r.gate}: ${r.actual} (threshold: ${r.threshold})`);
142
+ }
143
+ if (!summary.gates.pass) {
144
+ console.log('\n✗ Gates failed — build should be blocked');
145
+ process.exit(1);
146
+ } else {
147
+ console.log('\n✓ All gates passed');
148
+ }
149
+ }
150
+
151
+ if (failures.length > 0) {
152
+ process.exit(1);
153
+ }
154
+
155
+ process.exit(0);
156
+ }
@@ -0,0 +1,71 @@
1
+ // Adapted from agent-eval-kit by FlanaganSe (https://github.com/FlanaganSe/agent-eval-kit)
2
+ // MIT License — see LICENSE
3
+
4
+ /**
5
+ * Per-million-token costs for common models.
6
+ * Format: { input: $/M tokens, output: $/M tokens }
7
+ */
8
+ const MODEL_COSTS = {
9
+ 'claude-opus-4-6': { input: 15, output: 75 },
10
+ 'claude-sonnet-4-6': { input: 3, output: 15 },
11
+ 'claude-haiku-4-5-20251001': { input: 0.8, output: 4 },
12
+ 'claude-3-5-sonnet-20241022': { input: 3, output: 15 },
13
+ 'claude-3-5-haiku-20241022': { input: 0.8, output: 4 },
14
+ 'claude-3-opus-20240229': { input: 15, output: 75 },
15
+ 'gpt-4o': { input: 2.5, output: 10 },
16
+ 'gpt-4o-mini': { input: 0.15, output: 0.6 },
17
+ 'gemini-2.0-flash': { input: 0.1, output: 0.4 },
18
+ 'gemini-1.5-pro': { input: 1.25, output: 5 },
19
+ 'deepseek-chat': { input: 0.27, output: 1.1 },
20
+ };
21
+
22
+ /**
23
+ * Compute the actual cost of a single LLM call from observed token counts.
24
+ *
25
+ * @param {number} inputTokens
26
+ * @param {number} outputTokens
27
+ * @param {string} modelName
28
+ * @returns {number} cost in USD
29
+ */
30
+ export function computeActualCost(inputTokens, outputTokens, modelName) {
31
+ const costs = MODEL_COSTS[modelName] ?? { input: 3, output: 15 };
32
+ return (inputTokens / 1_000_000) * costs.input + (outputTokens / 1_000_000) * costs.output;
33
+ }
34
+
35
+ /**
36
+ * Estimate the cost of running an eval suite.
37
+ * Assumes ~500 input tokens and ~200 output tokens per call (conservative estimate).
38
+ * @param {number} caseCount - number of eval cases
39
+ * @param {number} trialCount - number of trials per case
40
+ * @param {string} modelName - model name (used for cost lookup)
41
+ * @param {{avgInputTokens?: number, avgOutputTokens?: number}} [options]
42
+ * @returns {{totalCalls: number, estimatedCostUsd: number, perCallCostUsd: number, modelName: string, summary: string}}
43
+ */
44
+ export function estimateCost(caseCount, trialCount, modelName, options = {}) {
45
+ const { avgInputTokens = 500, avgOutputTokens = 200 } = options;
46
+ const totalCalls = caseCount * trialCount;
47
+
48
+ const costs = MODEL_COSTS[modelName] ?? { input: 3, output: 15 }; // default to claude-sonnet-4-6 pricing
49
+
50
+ const inputCostPer1M = costs.input;
51
+ const outputCostPer1M = costs.output;
52
+
53
+ const totalInputTokens = totalCalls * avgInputTokens;
54
+ const totalOutputTokens = totalCalls * avgOutputTokens;
55
+
56
+ const estimatedCostUsd =
57
+ (totalInputTokens / 1_000_000) * inputCostPer1M +
58
+ (totalOutputTokens / 1_000_000) * outputCostPer1M;
59
+
60
+ const perCallCostUsd = totalCalls > 0 ? estimatedCostUsd / totalCalls : 0;
61
+
62
+ const summary = `${totalCalls} calls × ${modelName} ≈ $${estimatedCostUsd.toFixed(4)} USD (est. ${avgInputTokens}in/${avgOutputTokens}out tokens/call)`;
63
+
64
+ return {
65
+ totalCalls,
66
+ estimatedCostUsd,
67
+ perCallCostUsd,
68
+ modelName,
69
+ summary,
70
+ };
71
+ }
@@ -0,0 +1,46 @@
1
+ // Adapted from agent-eval-kit by FlanaganSe (https://github.com/FlanaganSe/agent-eval-kit)
2
+ // MIT License — see LICENSE
3
+
4
+ /**
5
+ * Evaluate gate thresholds against a run summary.
6
+ *
7
+ * @param {{passRate: number, totalCost: number, p95LatencyMs: number, totalCases: number}} summary
8
+ * @param {{passRate?: number, maxCost?: number, p95LatencyMs?: number}} gates
9
+ * @returns {{pass: boolean, results: Array<{gate: string, threshold: number, actual: number, pass: boolean}>}}
10
+ */
11
+ export function evaluateGates(summary, gates) {
12
+ const results = [];
13
+
14
+ if (gates.passRate !== undefined) {
15
+ const pass = summary.passRate >= gates.passRate;
16
+ results.push({
17
+ gate: 'passRate',
18
+ threshold: gates.passRate,
19
+ actual: summary.passRate,
20
+ pass,
21
+ });
22
+ }
23
+
24
+ if (gates.maxCost !== undefined) {
25
+ const pass = summary.totalCost <= gates.maxCost;
26
+ results.push({
27
+ gate: 'maxCost',
28
+ threshold: gates.maxCost,
29
+ actual: summary.totalCost,
30
+ pass,
31
+ });
32
+ }
33
+
34
+ if (gates.p95LatencyMs !== undefined) {
35
+ const pass = summary.p95LatencyMs <= gates.p95LatencyMs;
36
+ results.push({
37
+ gate: 'p95LatencyMs',
38
+ threshold: gates.p95LatencyMs,
39
+ actual: summary.p95LatencyMs,
40
+ pass,
41
+ });
42
+ }
43
+
44
+ const pass = results.length === 0 || results.every(r => r.pass);
45
+ return { pass, results };
46
+ }
@@ -0,0 +1,165 @@
1
+ /**
2
+ * Programmatic eval runner API.
3
+ *
4
+ * Usage:
5
+ * import { runEvalSuite } from './lib/runner/index.js';
6
+ * const summary = await runEvalSuite('./evals/my-tool.golden.json', async (message) => {
7
+ * const res = await fetch('http://localhost:8001/agent-api/chat', { ... });
8
+ * return { responseText: ..., toolsCalled: [], latencyMs: ... };
9
+ * });
10
+ */
11
+
12
+ import { readFile } from 'node:fs/promises';
13
+ import { runChecks } from '../checks/run-checks.js';
14
+ import { checkAdapter, checkResponseContainsAnyGroups, checkToolsAcceptable } from '../checks/check-adapter.js';
15
+ import { evaluateGates } from './gate.js';
16
+ import { writeFixture, readFixture, sortKeysDeep } from '../fixtures/fixture-store.js';
17
+
18
+ /**
19
+ * Run an eval suite programmatically.
20
+ *
21
+ * @param {string} evalFilePath - path to eval JSON file
22
+ * @param {(message: string) => Promise<{responseText: string, toolsCalled: string[], latencyMs?: number, cost?: number}>} agentFn
23
+ * @param {{
24
+ * record?: boolean,
25
+ * replay?: boolean,
26
+ * fixturesDir?: string,
27
+ * ttlDays?: number,
28
+ * gates?: {passRate?: number, maxCost?: number, p95LatencyMs?: number},
29
+ * suiteName?: string,
30
+ * }} [opts]
31
+ * @returns {Promise<{total: number, passed: number, failed: number, skipped: number, passRate: number, cases: object[], gates?: object}>}
32
+ */
33
+ export async function runEvalSuite(evalFilePath, agentFn, opts = {}) {
34
+ const { record = false, replay = false, fixturesDir = '.forge-fixtures', ttlDays = 30, gates = {}, suiteName } = opts;
35
+
36
+ // Load eval cases
37
+ let cases;
38
+ try {
39
+ const raw = await readFile(evalFilePath, 'utf8');
40
+ cases = JSON.parse(raw);
41
+ } catch (err) {
42
+ throw new Error(`Failed to load eval file ${evalFilePath}: ${err.message}`);
43
+ }
44
+
45
+ if (!Array.isArray(cases)) {
46
+ throw new Error(`Eval file must contain a JSON array of cases`);
47
+ }
48
+
49
+ let passed = 0, failed = 0, skipped = 0;
50
+ const caseResults = [];
51
+ const allLatencies = [];
52
+ let totalCost = 0;
53
+
54
+ for (const evalCase of cases) {
55
+ const message = evalCase.input?.message ?? '';
56
+ if (!message) {
57
+ skipped++;
58
+ caseResults.push({ id: evalCase.id, status: 'skipped', reason: 'no input message' });
59
+ continue;
60
+ }
61
+
62
+ // Fixture replay
63
+ if (replay) {
64
+ const caseId = evalCase.id ?? message.slice(0, 40);
65
+ const configHash = JSON.stringify(sortKeysDeep(evalCase.expect ?? {}));
66
+ const hit = await readFixture(fixturesDir, caseId, configHash, { ttlDays });
67
+ if (hit.status === 'hit') {
68
+ const { responseText, toolsCalled } = hit.output;
69
+ const failures = checkCase(evalCase, { responseText, toolsCalled });
70
+ const casePassed = failures.length === 0;
71
+ if (casePassed) passed++; else failed++;
72
+ // Note: fixture hits do not contribute latency or cost — p95LatencyMs and totalCost
73
+ // reflect live-only cases. Latency/cost gates trivially pass on fully-cached runs.
74
+ caseResults.push({ id: evalCase.id, status: casePassed ? 'passed' : 'failed', reason: failures.join('; ') || null, fromFixture: true });
75
+ continue;
76
+ }
77
+ }
78
+
79
+ // Call agent
80
+ let result;
81
+ try {
82
+ result = await agentFn(message);
83
+ } catch (err) {
84
+ failed++;
85
+ caseResults.push({ id: evalCase.id, status: 'failed', reason: `Agent error: ${err.message}` });
86
+ continue;
87
+ }
88
+
89
+ const { responseText = '', toolsCalled = [], latencyMs, cost } = result;
90
+
91
+ // Record fixture
92
+ if (record) {
93
+ const caseId = evalCase.id ?? message.slice(0, 40);
94
+ const configHash = JSON.stringify(sortKeysDeep(evalCase.expect ?? {}));
95
+ await writeFixture(fixturesDir, caseId, configHash, { responseText, toolsCalled }).catch((err) => {
96
+ console.warn(`[forge] Failed to write fixture for case "${caseId}": ${err.message}`);
97
+ });
98
+ }
99
+
100
+ if (latencyMs !== undefined) allLatencies.push(latencyMs);
101
+ if (cost !== undefined) totalCost += cost;
102
+
103
+ const failures = checkCase(evalCase, { responseText, toolsCalled, latencyMs, cost });
104
+ const casePassed = failures.length === 0;
105
+ if (casePassed) passed++; else failed++;
106
+ caseResults.push({ id: evalCase.id, status: casePassed ? 'passed' : 'failed', reason: failures.join('; ') || null });
107
+ }
108
+
109
+ const total = cases.length;
110
+ const ran = passed + failed;
111
+ const passRate = ran > 0 ? passed / ran : 0;
112
+
113
+ // Compute p95 latency
114
+ const sortedLatencies = [...allLatencies].sort((a, b) => a - b);
115
+ const p95Index = Math.floor((sortedLatencies.length - 1) * 0.95);
116
+ const p95LatencyMs = sortedLatencies[p95Index] ?? 0;
117
+
118
+ const summary = { passRate, totalCost, p95LatencyMs, totalCases: total };
119
+
120
+ // Gate evaluation
121
+ let gateResult;
122
+ if (Object.keys(gates).some(k => gates[k] != null)) {
123
+ const activeGates = Object.fromEntries(Object.entries(gates).filter(([, v]) => v != null));
124
+ gateResult = evaluateGates(summary, activeGates);
125
+ }
126
+
127
+ return {
128
+ total,
129
+ passed,
130
+ failed,
131
+ skipped,
132
+ passRate,
133
+ p95LatencyMs,
134
+ totalCost,
135
+ cases: caseResults,
136
+ ...(suiteName ? { suiteName } : {}),
137
+ ...(gateResult ? { gates: gateResult } : {}),
138
+ };
139
+ }
140
+
141
+ /**
142
+ * Internal assertion runner for a single case.
143
+ * @param {object} evalCase
144
+ * @param {{responseText: string, toolsCalled: string[], latencyMs?: number, cost?: number}} meta
145
+ * @returns {string[]}
146
+ */
147
+ function checkCase(evalCase, { responseText, toolsCalled, latencyMs, cost }) {
148
+ const failures = [];
149
+ const input = checkAdapter(evalCase, { toolsCalled, responseText, latencyMs, cost });
150
+ const result = runChecks(input);
151
+ for (const [checkName, checkResult] of Object.entries(result.checks)) {
152
+ if (!checkResult.pass) failures.push(checkResult.reason ?? `${checkName} failed`);
153
+ }
154
+
155
+ const expect = evalCase.expect ?? {};
156
+ if (expect.responseContainsAny?.length) {
157
+ const anyResult = checkResponseContainsAnyGroups(responseText, expect.responseContainsAny);
158
+ if (!anyResult.pass) failures.push(anyResult.reason);
159
+ }
160
+ if (expect.toolsAcceptable !== undefined) {
161
+ const acceptResult = checkToolsAcceptable(toolsCalled, expect.toolsAcceptable);
162
+ if (!acceptResult.pass) failures.push(acceptResult.reason);
163
+ }
164
+ return failures;
165
+ }
@@ -0,0 +1,83 @@
1
+ import type { Server } from 'http';
2
+ import type { SidecarConfig } from './config-schema.js';
3
+ import type { AuthResult, AuthConfig, Authenticator } from './auth.js';
4
+ import type { ConversationMessage, SessionSummary, ConversationStore } from './conversation-store.js';
5
+ import type { ReactEvent, ReactLoopParams } from './react-engine.js';
6
+
7
+ export interface SidecarOptions {
8
+ port?: number;
9
+ host?: string;
10
+ dbPath?: string;
11
+ env?: Record<string, string>;
12
+ autoListen?: boolean;
13
+ enableDrift?: boolean;
14
+ widgetDir?: string;
15
+ }
16
+
17
+ export interface SidecarContext {
18
+ auth: Authenticator;
19
+ promptStore: object;
20
+ preferenceStore: object;
21
+ conversationStore: ConversationStore;
22
+ agentRegistry: object;
23
+ verifierRunner: object | null;
24
+ hitlEngine: object | null;
25
+ rateLimiter: object | null;
26
+ db: object;
27
+ config: SidecarConfig;
28
+ env: Record<string, string>;
29
+ configPath?: string;
30
+ [key: string]: unknown;
31
+ }
32
+
33
+ export interface SidecarInstance {
34
+ server: Server;
35
+ ctx: SidecarContext;
36
+ close(): Promise<void>;
37
+ }
38
+
39
+ export function createSidecar(config?: Partial<SidecarConfig>, options?: SidecarOptions): Promise<SidecarInstance>;
40
+
41
+ // Advanced consumers
42
+ export function buildSidecarContext(config: SidecarConfig, db: object, env?: Record<string, string>, opts?: object): Promise<SidecarContext>;
43
+ export function createSidecarRouter(ctx: SidecarContext, opts?: object): (req: object, res: object) => void;
44
+
45
+ export { createAuth } from './auth.js';
46
+ export type { AuthResult, AuthConfig, Authenticator } from './auth.js';
47
+
48
+ export { reactLoop } from './react-engine.js';
49
+ export type { ReactEvent, ReactLoopParams, TextEvent, TextDeltaEvent, ToolCallEvent, ToolResultEvent, ToolWarningEvent, HitlEvent, ErrorEvent, DoneEvent } from './react-engine.js';
50
+
51
+ export { mergeDefaults, validateConfig, CONFIG_DEFAULTS } from './config-schema.js';
52
+ export type { SidecarConfig, AgentConfig, RateLimitConfig, VerificationConfig } from './config-schema.js';
53
+
54
+ export { makeConversationStore } from './conversation-store.js';
55
+ export type { ConversationMessage, SessionSummary, ConversationStore } from './conversation-store.js';
56
+
57
+ export function getDb(path: string): object;
58
+ export function initSSE(res: object): { write(event: string, data: unknown): void; close(): void };
59
+ export function makePromptStore(config: object, db: object): object;
60
+ export function makePreferenceStore(config: object, db: object): object;
61
+ export function makeHitlEngine(config: object, db: object, redis?: object, pgPool?: object): object;
62
+ export function makeAgentRegistry(config: object, db: object): object;
63
+
64
+ export class AgentRegistry {
65
+ constructor(config: object, db: object);
66
+ resolveAgent(agentId: string | null): Promise<object | null>;
67
+ getAgent(agentId: string): Promise<object | null>;
68
+ getAllAgents(): Promise<object[]>;
69
+ upsertAgent(agent: object): Promise<void>;
70
+ setDefault(agentId: string): Promise<void>;
71
+ deleteAgent(agentId: string): Promise<void>;
72
+ seedFromConfig(): Promise<void>;
73
+ filterTools(tools: object[]): object[];
74
+ buildAgentConfig(config: object, agent: object | null): object;
75
+ resolveSystemPrompt(agent: object | null, promptStore: object, config: object): Promise<string>;
76
+ }
77
+
78
+ export class VerifierRunner {
79
+ constructor(db: object, config?: object, workerPool?: object);
80
+ loadFromDb(db: object): Promise<void>;
81
+ run(toolName: string, args: object, result: unknown): Promise<Array<{ outcome: 'pass' | 'warn' | 'block'; message: string | null; verifier: string }>>;
82
+ destroy(): void;
83
+ }
package/lib/sidecar.js ADDED
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Sidecar — Library entry point for agent-tool-forge.
3
+ *
4
+ * Usage:
5
+ * import { createSidecar } from 'agent-tool-forge';
6
+ * const sidecar = await createSidecar({ ... }, { port: 8001 });
7
+ * // sidecar.server, sidecar.ctx, sidecar.close()
8
+ *
9
+ * Advanced:
10
+ * import { buildSidecarContext, createSidecarRouter, mergeDefaults, getDb } from 'agent-tool-forge';
11
+ */
12
+
13
+ import { createServer as createHttpServer } from 'http';
14
+ import { resolve } from 'path';
15
+ import { getDb } from './db.js';
16
+ import { mergeDefaults, validateConfig } from './config-schema.js';
17
+ import { buildSidecarContext, createSidecarRouter } from './forge-service.js';
18
+ import { createDriftMonitor } from './drift-background.js';
19
+
20
+ /**
21
+ * Create a fully configured sidecar instance.
22
+ *
23
+ * @param {object} config — sidecar configuration (auth, agents, model, etc.)
24
+ * @param {object} [options]
25
+ * @param {number} [options.port=8001] — port to listen on
26
+ * @param {string} [options.host='0.0.0.0'] — bind address
27
+ * @param {string} [options.dbPath=':memory:'] — SQLite database path
28
+ * @param {Record<string, string>} [options.env] — environment variables (defaults to process.env)
29
+ * @param {boolean} [options.autoListen=true] — start listening immediately
30
+ * @param {boolean} [options.enableDrift=false] — start background drift monitor
31
+ * @param {string} [options.widgetDir] — custom widget directory
32
+ * @returns {Promise<{ server: import('http').Server, ctx: object, close: () => void }>}
33
+ */
34
+ export async function createSidecar(config = {}, options = {}) {
35
+ const {
36
+ port = 8001,
37
+ host = '0.0.0.0',
38
+ dbPath = ':memory:',
39
+ env = process.env,
40
+ autoListen = true,
41
+ enableDrift = false,
42
+ widgetDir,
43
+ } = options;
44
+
45
+ // Merge defaults first so validateConfig sees a fully-populated object (M1).
46
+ // Validating the raw user config risks false positives on missing-but-defaulted
47
+ // fields (e.g. auth.mode absent in raw → would fail "must be one of" if
48
+ // validateConfig checked before defaults were applied).
49
+ const merged = mergeDefaults(config);
50
+ const { valid, errors } = validateConfig(merged);
51
+ if (!valid) {
52
+ throw new Error(`Invalid sidecar config: ${errors.join('; ')}`);
53
+ }
54
+
55
+ // Initialize database with WAL mode
56
+ const db = getDb(dbPath);
57
+ try {
58
+ db.pragma('journal_mode = WAL');
59
+ } catch {
60
+ // WAL not supported on all platforms — continue without it
61
+ }
62
+
63
+ // Build sidecar context (async — may create Redis/Postgres clients)
64
+ const ctx = await buildSidecarContext(merged, db, env);
65
+
66
+ // Seed agents from config
67
+ await ctx.agentRegistry.seedFromConfig();
68
+
69
+ // Build request handler
70
+ const routerOpts = {};
71
+ if (widgetDir) routerOpts.widgetDir = widgetDir;
72
+ const router = createSidecarRouter(ctx, routerOpts);
73
+
74
+ // Create HTTP server
75
+ const server = createHttpServer(router);
76
+
77
+ // Optional drift monitor
78
+ let driftMonitor = null;
79
+ if (enableDrift) {
80
+ driftMonitor = createDriftMonitor(merged, db);
81
+ driftMonitor.start();
82
+ }
83
+
84
+ // One-time guard to prevent double teardown if close() is called twice (M2).
85
+ let _closing = false;
86
+
87
+ // close() tears down everything cleanly
88
+ function close() {
89
+ if (_closing) return Promise.resolve();
90
+ _closing = true;
91
+
92
+ if (driftMonitor) {
93
+ driftMonitor.stop();
94
+ driftMonitor = null;
95
+ }
96
+
97
+ async function teardownConnections() {
98
+ try { if (ctx.verifierRunner?.destroy) ctx.verifierRunner.destroy(); } catch { /* ignore */ }
99
+ try { if (ctx._redisClient) await ctx._redisClient.quit(); } catch { /* ignore */ }
100
+ try { if (ctx._pgPool) await ctx._pgPool.end(); } catch { /* ignore */ }
101
+ try { db.close(); } catch { /* already closed */ }
102
+ }
103
+
104
+ return new Promise((res) => {
105
+ let resolved = false;
106
+ let t;
107
+ const finish = async () => {
108
+ if (resolved) return;
109
+ resolved = true;
110
+ clearTimeout(t);
111
+ await teardownConnections();
112
+ res();
113
+ };
114
+ server.close(() => finish());
115
+ // Force-resolve after 2s if connections linger — do NOT call process.exit()
116
+ // in a library module as it would kill the host application (M2).
117
+ t = setTimeout(async () => {
118
+ if (!resolved) {
119
+ console.error('[forge-sidecar] close() timed out after 2s — forcing resolve');
120
+ resolved = true;
121
+ clearTimeout(t);
122
+ await teardownConnections();
123
+ res();
124
+ }
125
+ }, 2000);
126
+ // Ensure the timeout doesn't keep the event loop alive if finish() runs first
127
+ if (t.unref) t.unref();
128
+ });
129
+ }
130
+
131
+ // Optionally start listening
132
+ if (autoListen) {
133
+ await new Promise((res, rej) => {
134
+ // Use once() so the error listener is removed after firing and doesn't
135
+ // become a ghost listener that fires on unrelated future errors.
136
+ server.once('error', rej);
137
+ server.listen(port, host, () => {
138
+ // Remove the one-time error listener if listen succeeded, so it doesn't
139
+ // linger as a ghost listener on the now-live server.
140
+ server.removeListener('error', rej);
141
+ res();
142
+ });
143
+ });
144
+ }
145
+
146
+ return { server, ctx, close };
147
+ }
148
+
149
+ // Re-exports for advanced consumers
150
+ export { buildSidecarContext, createSidecarRouter } from './forge-service.js';
151
+ export { createAuth } from './auth.js';
152
+ export { reactLoop } from './react-engine.js';
153
+ export { mergeDefaults, validateConfig, CONFIG_DEFAULTS } from './config-schema.js';
154
+ export { getDb } from './db.js';
155
+ export { initSSE } from './sse.js';
156
+ export { VerifierRunner } from './verifier-runner.js';
157
+ export { makePromptStore } from './prompt-store.js';
158
+ export { makePreferenceStore } from './preference-store.js';
159
+ export { makeConversationStore } from './conversation-store.js';
160
+ export { makeHitlEngine } from './hitl-engine.js';
161
+ export { makeAgentRegistry, AgentRegistry } from './agent-registry.js';