agent-tool-forge 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/lib/agent-registry.js +170 -0
- package/lib/api-client.js +792 -0
- package/lib/api-loader.js +260 -0
- package/lib/auth.d.ts +25 -0
- package/lib/auth.js +158 -0
- package/lib/checks/check-adapter.js +172 -0
- package/lib/checks/compose.js +42 -0
- package/lib/checks/content-match.js +14 -0
- package/lib/checks/cost-budget.js +11 -0
- package/lib/checks/index.js +18 -0
- package/lib/checks/json-valid.js +15 -0
- package/lib/checks/latency.js +11 -0
- package/lib/checks/length-bounds.js +17 -0
- package/lib/checks/negative-match.js +14 -0
- package/lib/checks/no-hallucinated-numbers.js +63 -0
- package/lib/checks/non-empty.js +34 -0
- package/lib/checks/regex-match.js +12 -0
- package/lib/checks/run-checks.js +84 -0
- package/lib/checks/schema-match.js +26 -0
- package/lib/checks/tool-call-count.js +16 -0
- package/lib/checks/tool-selection.js +34 -0
- package/lib/checks/types.js +45 -0
- package/lib/comparison/compare.js +86 -0
- package/lib/comparison/format.js +104 -0
- package/lib/comparison/index.js +6 -0
- package/lib/comparison/statistics.js +59 -0
- package/lib/comparison/types.js +41 -0
- package/lib/config-schema.js +200 -0
- package/lib/config.d.ts +66 -0
- package/lib/conversation-store.d.ts +77 -0
- package/lib/conversation-store.js +443 -0
- package/lib/db.d.ts +6 -0
- package/lib/db.js +1112 -0
- package/lib/dep-check.js +99 -0
- package/lib/drift-background.js +61 -0
- package/lib/drift-monitor.js +187 -0
- package/lib/eval-runner.js +566 -0
- package/lib/fixtures/fixture-store.js +161 -0
- package/lib/fixtures/index.js +11 -0
- package/lib/forge-engine.js +982 -0
- package/lib/forge-eval-generator.js +417 -0
- package/lib/forge-file-writer.js +386 -0
- package/lib/forge-service-client.js +190 -0
- package/lib/forge-service.d.ts +4 -0
- package/lib/forge-service.js +655 -0
- package/lib/forge-verifier-generator.js +271 -0
- package/lib/handlers/admin.js +151 -0
- package/lib/handlers/agents.js +229 -0
- package/lib/handlers/chat-resume.js +334 -0
- package/lib/handlers/chat-sync.js +320 -0
- package/lib/handlers/chat.js +320 -0
- package/lib/handlers/conversations.js +92 -0
- package/lib/handlers/preferences.js +88 -0
- package/lib/handlers/tools-list.js +58 -0
- package/lib/hitl-engine.d.ts +60 -0
- package/lib/hitl-engine.js +261 -0
- package/lib/http-utils.js +92 -0
- package/lib/index.d.ts +20 -0
- package/lib/index.js +141 -0
- package/lib/init.js +636 -0
- package/lib/manual-entry.js +59 -0
- package/lib/mcp-server.js +252 -0
- package/lib/output-groups.js +54 -0
- package/lib/postgres-store.d.ts +31 -0
- package/lib/postgres-store.js +465 -0
- package/lib/preference-store.d.ts +47 -0
- package/lib/preference-store.js +79 -0
- package/lib/prompt-store.d.ts +42 -0
- package/lib/prompt-store.js +60 -0
- package/lib/rate-limiter.d.ts +30 -0
- package/lib/rate-limiter.js +104 -0
- package/lib/react-engine.d.ts +110 -0
- package/lib/react-engine.js +337 -0
- package/lib/runner/cli.js +156 -0
- package/lib/runner/cost-estimator.js +71 -0
- package/lib/runner/gate.js +46 -0
- package/lib/runner/index.js +165 -0
- package/lib/sidecar.d.ts +83 -0
- package/lib/sidecar.js +161 -0
- package/lib/sse.d.ts +15 -0
- package/lib/sse.js +30 -0
- package/lib/tools-scanner.js +91 -0
- package/lib/tui.js +253 -0
- package/lib/verifier-report.js +78 -0
- package/lib/verifier-runner.js +338 -0
- package/lib/verifier-scanner.js +70 -0
- package/lib/verifier-worker-pool.js +196 -0
- package/lib/views/chat.js +340 -0
- package/lib/views/endpoints.js +203 -0
- package/lib/views/eval-run.js +206 -0
- package/lib/views/forge-agent.js +538 -0
- package/lib/views/forge.js +410 -0
- package/lib/views/main-menu.js +275 -0
- package/lib/views/mediation.js +381 -0
- package/lib/views/model-compare.js +430 -0
- package/lib/views/model-comparison.js +333 -0
- package/lib/views/onboarding.js +470 -0
- package/lib/views/performance.js +237 -0
- package/lib/views/run-evals.js +205 -0
- package/lib/views/settings.js +829 -0
- package/lib/views/tools-evals.js +514 -0
- package/lib/views/verifier-coverage.js +617 -0
- package/lib/workers/verifier-worker.js +52 -0
- package/package.json +123 -0
- package/widget/forge-chat.js +789 -0
package/lib/dep-check.js
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Dependency Checker — shared utility for checking and optionally
|
|
3
|
+
* installing npm dependencies at runtime.
|
|
4
|
+
*
|
|
5
|
+
* Used by:
|
|
6
|
+
* - forge-service.js (non-interactive: requireDependency)
|
|
7
|
+
* - init.js (interactive: ensureDependencyInteractive)
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import { execFileSync } from 'child_process';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Check whether an npm package is available via dynamic import.
|
|
14
|
+
*
|
|
15
|
+
* @param {string} packageName
|
|
16
|
+
* @returns {Promise<{ available: boolean, error?: string }>}
|
|
17
|
+
*/
|
|
18
|
+
export async function checkDependency(packageName) {
|
|
19
|
+
try {
|
|
20
|
+
await import(packageName);
|
|
21
|
+
return { available: true };
|
|
22
|
+
} catch (err) {
|
|
23
|
+
const notInstalled = err.code === 'MODULE_NOT_FOUND' ||
|
|
24
|
+
err.message?.includes('Cannot find package') ||
|
|
25
|
+
err.message?.includes('Does the file exist?') ||
|
|
26
|
+
err.message?.includes('Failed to load url');
|
|
27
|
+
return {
|
|
28
|
+
available: false,
|
|
29
|
+
error: err.message,
|
|
30
|
+
likelyCause: notInstalled ? 'not_installed' : 'broken_package'
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Return a human-readable install hint for a missing package.
|
|
37
|
+
*
|
|
38
|
+
* @param {string} packageName
|
|
39
|
+
* @returns {string}
|
|
40
|
+
*/
|
|
41
|
+
function installHint(packageName) {
|
|
42
|
+
return `npm install ${packageName}`;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Non-interactive dependency check. Throws with a helpful message
|
|
47
|
+
* if the package is not available. Used at sidecar startup where
|
|
48
|
+
* there is no readline interface.
|
|
49
|
+
*
|
|
50
|
+
* @param {string} packageName
|
|
51
|
+
* @returns {Promise<void>}
|
|
52
|
+
*/
|
|
53
|
+
export async function requireDependency(packageName) {
|
|
54
|
+
const result = await checkDependency(packageName);
|
|
55
|
+
if (!result.available) {
|
|
56
|
+
if (result.likelyCause === 'broken_package') {
|
|
57
|
+
throw new Error(
|
|
58
|
+
`Package "${packageName}" is installed but failed to load: ${result.error}\nThis may be a native addon compilation issue. Try: npm rebuild ${packageName}`
|
|
59
|
+
);
|
|
60
|
+
}
|
|
61
|
+
throw new Error(
|
|
62
|
+
`Required package "${packageName}" is not installed. Run: ${installHint(packageName)}`
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Interactive dependency check — prompts the user to install if missing.
|
|
69
|
+
*
|
|
70
|
+
* @param {string} packageName
|
|
71
|
+
* @param {import('readline').Interface} rl
|
|
72
|
+
* @returns {Promise<boolean>} true if the package is available after check/install
|
|
73
|
+
*/
|
|
74
|
+
export async function ensureDependencyInteractive(packageName, rl) {
|
|
75
|
+
const { available } = await checkDependency(packageName);
|
|
76
|
+
if (available) return true;
|
|
77
|
+
|
|
78
|
+
const answer = await new Promise((resolve) => {
|
|
79
|
+
rl.question(
|
|
80
|
+
`Package "${packageName}" is not installed. Install it now? (y/n): `,
|
|
81
|
+
(ans) => resolve(ans.trim().toLowerCase())
|
|
82
|
+
);
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
if (answer !== 'y' && answer !== 'yes') {
|
|
86
|
+
return false;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
try {
|
|
90
|
+
execFileSync('npm', ['install', packageName], { stdio: 'pipe', timeout: 30000 });
|
|
91
|
+
// Verify it's actually available now
|
|
92
|
+
const recheck = await checkDependency(packageName);
|
|
93
|
+
return recheck.available;
|
|
94
|
+
} catch (err) {
|
|
95
|
+
const detail = err.stderr?.toString().trim() || err.message;
|
|
96
|
+
console.error(` ✗ Failed to install ${packageName}: ${detail}`);
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Background Drift Monitor — periodically checks all promoted tools for drift.
|
|
3
|
+
*
|
|
4
|
+
* Reuses checkDrift() and computeSuspects() from cli/drift-monitor.js.
|
|
5
|
+
* Started in forge-service.js when --mode=sidecar.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getAllToolRegistry, insertDriftAlert } from './db.js';
|
|
9
|
+
import { checkDrift, computeSuspects } from './drift-monitor.js';
|
|
10
|
+
|
|
11
|
+
const DEFAULT_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Create a background drift monitor.
|
|
15
|
+
*
|
|
16
|
+
* @param {object} config — forge config (drift.threshold, drift.windowSize)
|
|
17
|
+
* @param {import('better-sqlite3').Database} db
|
|
18
|
+
* @param {number} [intervalMs] — check interval (default 5 min)
|
|
19
|
+
* @returns {{ start(): void, stop(): void, runOnce(): void }}
|
|
20
|
+
*/
|
|
21
|
+
export function createDriftMonitor(config, db, intervalMs = DEFAULT_INTERVAL_MS) {
|
|
22
|
+
let timer = null;
|
|
23
|
+
const threshold = config.drift?.threshold ?? 0.1;
|
|
24
|
+
const windowSize = config.drift?.windowSize ?? 5;
|
|
25
|
+
|
|
26
|
+
function runOnce() {
|
|
27
|
+
try {
|
|
28
|
+
const tools = getAllToolRegistry(db).filter(r => r.lifecycle_state === 'promoted');
|
|
29
|
+
for (const tool of tools) {
|
|
30
|
+
const drift = checkDrift(db, tool.tool_name, threshold, windowSize);
|
|
31
|
+
if (drift.drifted) {
|
|
32
|
+
const suspects = computeSuspects(db, tool.tool_name);
|
|
33
|
+
insertDriftAlert(db, {
|
|
34
|
+
tool_name: tool.tool_name,
|
|
35
|
+
trigger_tools: suspects.map(s => s.tool_name).join(','),
|
|
36
|
+
baseline_rate: drift.baseline,
|
|
37
|
+
current_rate: drift.current,
|
|
38
|
+
delta: drift.delta
|
|
39
|
+
});
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
} catch (err) {
|
|
43
|
+
process.stderr.write(`[drift-monitor] Error during check: ${err.message}\n`);
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
return {
|
|
48
|
+
start() {
|
|
49
|
+
if (timer) return;
|
|
50
|
+
timer = setInterval(runOnce, intervalMs);
|
|
51
|
+
timer.unref(); // Don't block process exit
|
|
52
|
+
},
|
|
53
|
+
stop() {
|
|
54
|
+
if (timer) {
|
|
55
|
+
clearInterval(timer);
|
|
56
|
+
timer = null;
|
|
57
|
+
}
|
|
58
|
+
},
|
|
59
|
+
runOnce
|
|
60
|
+
};
|
|
61
|
+
}
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Drift Monitor — synchronous drift detection and suspect computation.
|
|
3
|
+
*
|
|
4
|
+
* Pure synchronous module. All SQL via direct db.prepare() calls.
|
|
5
|
+
* No imports from db.js — avoids circular dep risk, consistent with codebase.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// ── Rolling average ────────────────────────────────────────────────────────
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Compute the rolling average pass_rate for a tool over the last N runs.
|
|
12
|
+
*
|
|
13
|
+
* @param {import('better-sqlite3').Database} db
|
|
14
|
+
* @param {string} toolName
|
|
15
|
+
* @param {number} [windowSize=5]
|
|
16
|
+
* @returns {number|null} Average pass_rate, or null if no runs found
|
|
17
|
+
*/
|
|
18
|
+
export function computeRollingAverage(db, toolName, windowSize = 5) {
|
|
19
|
+
let rows;
|
|
20
|
+
try {
|
|
21
|
+
rows = db.prepare(`
|
|
22
|
+
SELECT pass_rate FROM eval_runs
|
|
23
|
+
WHERE tool_name = ? AND pass_rate IS NOT NULL AND total_cases > 0
|
|
24
|
+
ORDER BY run_at DESC
|
|
25
|
+
LIMIT ?
|
|
26
|
+
`).all(toolName, windowSize);
|
|
27
|
+
} catch (_) {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (!rows || rows.length === 0) return null;
|
|
32
|
+
const sum = rows.reduce((acc, r) => acc + (r.pass_rate || 0), 0);
|
|
33
|
+
return sum / rows.length;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// ── Suspect computation ────────────────────────────────────────────────────
|
|
37
|
+
|
|
38
|
+
/**
|
|
39
|
+
* Find tools promoted between the last clean run and the flagged_at timestamp.
|
|
40
|
+
* These tools are suspected of causing drift via description/trigger overlap.
|
|
41
|
+
*
|
|
42
|
+
* @param {import('better-sqlite3').Database} db
|
|
43
|
+
* @param {string} toolName
|
|
44
|
+
* @returns {string[]} List of suspected tool names
|
|
45
|
+
*/
|
|
46
|
+
export function computeSuspects(db, toolName) {
|
|
47
|
+
let registryRow;
|
|
48
|
+
try {
|
|
49
|
+
registryRow = db.prepare(`SELECT flagged_at, baseline_pass_rate FROM tool_registry WHERE tool_name = ?`).get(toolName);
|
|
50
|
+
} catch (_) {
|
|
51
|
+
return [];
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (!registryRow || !registryRow.flagged_at) return [];
|
|
55
|
+
|
|
56
|
+
// Find the last run where pass_rate >= baseline (last clean run)
|
|
57
|
+
let lastCleanRun;
|
|
58
|
+
try {
|
|
59
|
+
lastCleanRun = db.prepare(`
|
|
60
|
+
SELECT run_at FROM eval_runs
|
|
61
|
+
WHERE tool_name = ?
|
|
62
|
+
AND pass_rate IS NOT NULL
|
|
63
|
+
AND pass_rate >= ?
|
|
64
|
+
ORDER BY run_at DESC
|
|
65
|
+
LIMIT 1
|
|
66
|
+
`).get(toolName, registryRow.baseline_pass_rate ?? 0.8);
|
|
67
|
+
} catch (_) {
|
|
68
|
+
return [];
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const cleanAt = lastCleanRun?.run_at;
|
|
72
|
+
const flaggedAt = registryRow.flagged_at;
|
|
73
|
+
|
|
74
|
+
// Tools promoted between last clean run and flagged_at
|
|
75
|
+
try {
|
|
76
|
+
const rows = db.prepare(`
|
|
77
|
+
SELECT tool_name FROM tool_registry
|
|
78
|
+
WHERE tool_name != ?
|
|
79
|
+
AND promoted_at IS NOT NULL
|
|
80
|
+
AND promoted_at > ?
|
|
81
|
+
AND promoted_at <= ?
|
|
82
|
+
`).all(toolName, cleanAt || '1970-01-01', flaggedAt);
|
|
83
|
+
return rows.map((r) => r.tool_name);
|
|
84
|
+
} catch (_) {
|
|
85
|
+
return [];
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// ── Drift check ────────────────────────────────────────────────────────────
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Check if a tool is drifting. If drift is detected and no open alert exists,
|
|
93
|
+
* inserts a drift_alert and marks the tool as 'flagged' in tool_registry.
|
|
94
|
+
*
|
|
95
|
+
* @param {import('better-sqlite3').Database} db
|
|
96
|
+
* @param {string} toolName
|
|
97
|
+
* @param {number} [threshold=0.1] - Minimum pass_rate drop to flag as drift
|
|
98
|
+
* @param {number} [windowSize=5]
|
|
99
|
+
* @returns {{ drifting: boolean, delta: number, suspects: string[], currentRate: number|null }}
|
|
100
|
+
*/
|
|
101
|
+
export function checkDrift(db, toolName, threshold = 0.1, windowSize = 5) {
|
|
102
|
+
// Get baseline from tool_registry
|
|
103
|
+
let baseline = null;
|
|
104
|
+
try {
|
|
105
|
+
const reg = db.prepare(`SELECT baseline_pass_rate, lifecycle_state FROM tool_registry WHERE tool_name = ?`).get(toolName);
|
|
106
|
+
if (reg) baseline = reg.baseline_pass_rate;
|
|
107
|
+
} catch (_) { /* non-fatal */ }
|
|
108
|
+
|
|
109
|
+
const currentRate = computeRollingAverage(db, toolName, windowSize);
|
|
110
|
+
|
|
111
|
+
if (baseline == null || currentRate == null) {
|
|
112
|
+
return { drifting: false, delta: 0, suspects: [], currentRate };
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const delta = baseline - currentRate;
|
|
116
|
+
const drifting = delta >= threshold;
|
|
117
|
+
|
|
118
|
+
if (!drifting) {
|
|
119
|
+
return { drifting: false, delta, suspects: [], currentRate };
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Check if open alert already exists
|
|
123
|
+
let openAlert = null;
|
|
124
|
+
try {
|
|
125
|
+
openAlert = db.prepare(`SELECT id FROM drift_alerts WHERE tool_name = ? AND status = 'open'`).get(toolName);
|
|
126
|
+
} catch (_) { /* non-fatal */ }
|
|
127
|
+
|
|
128
|
+
const suspects = computeSuspects(db, toolName);
|
|
129
|
+
|
|
130
|
+
if (!openAlert) {
|
|
131
|
+
// Insert drift alert + flag the tool atomically — partial writes corrupt drift triangulation
|
|
132
|
+
const now = new Date().toISOString();
|
|
133
|
+
try {
|
|
134
|
+
db.transaction(() => {
|
|
135
|
+
db.prepare(`
|
|
136
|
+
INSERT INTO drift_alerts (tool_name, detected_at, trigger_tools, baseline_rate, current_rate, delta, status)
|
|
137
|
+
VALUES (?, ?, ?, ?, ?, ?, 'open')
|
|
138
|
+
`).run(toolName, now, JSON.stringify(suspects), baseline, currentRate, delta);
|
|
139
|
+
|
|
140
|
+
db.prepare(`
|
|
141
|
+
UPDATE tool_registry SET lifecycle_state = 'flagged', flagged_at = ?
|
|
142
|
+
WHERE tool_name = ? AND lifecycle_state != 'flagged'
|
|
143
|
+
`).run(now, toolName);
|
|
144
|
+
})();
|
|
145
|
+
} catch (_) { /* non-fatal */ }
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return { drifting: true, delta, suspects, currentRate };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// ── Drift resolution ───────────────────────────────────────────────────────
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
* Resolve a drift situation: close the alert, retire the old tool, promote the replacement.
|
|
155
|
+
* All changes run in a transaction for atomicity.
|
|
156
|
+
*
|
|
157
|
+
* @param {import('better-sqlite3').Database} db
|
|
158
|
+
* @param {number} alertId - The drift_alerts.id to resolve
|
|
159
|
+
* @param {string} replacementToolName - The new tool name to promote
|
|
160
|
+
*/
|
|
161
|
+
export function resolveDrift(db, alertId, replacementToolName) {
|
|
162
|
+
const now = new Date().toISOString();
|
|
163
|
+
|
|
164
|
+
db.transaction(() => {
|
|
165
|
+
// Get alert to find the flagged tool
|
|
166
|
+
const alert = db.prepare(`SELECT tool_name FROM drift_alerts WHERE id = ?`).get(alertId);
|
|
167
|
+
if (!alert) return;
|
|
168
|
+
|
|
169
|
+
const oldToolName = alert.tool_name;
|
|
170
|
+
|
|
171
|
+
// Resolve the alert
|
|
172
|
+
db.prepare(`UPDATE drift_alerts SET status = 'resolved', resolved_at = ? WHERE id = ?`)
|
|
173
|
+
.run(now, alertId);
|
|
174
|
+
|
|
175
|
+
// Retire the old tool
|
|
176
|
+
db.prepare(`
|
|
177
|
+
UPDATE tool_registry SET lifecycle_state = 'retired', retired_at = ?, replaced_by = ?
|
|
178
|
+
WHERE tool_name = ?
|
|
179
|
+
`).run(now, replacementToolName, oldToolName);
|
|
180
|
+
|
|
181
|
+
// Promote the replacement
|
|
182
|
+
db.prepare(`
|
|
183
|
+
UPDATE tool_registry SET lifecycle_state = 'promoted', promoted_at = ?
|
|
184
|
+
WHERE tool_name = ?
|
|
185
|
+
`).run(now, replacementToolName);
|
|
186
|
+
})();
|
|
187
|
+
}
|