agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +209 -0
  3. package/lib/agent-registry.js +170 -0
  4. package/lib/api-client.js +792 -0
  5. package/lib/api-loader.js +260 -0
  6. package/lib/auth.d.ts +25 -0
  7. package/lib/auth.js +158 -0
  8. package/lib/checks/check-adapter.js +172 -0
  9. package/lib/checks/compose.js +42 -0
  10. package/lib/checks/content-match.js +14 -0
  11. package/lib/checks/cost-budget.js +11 -0
  12. package/lib/checks/index.js +18 -0
  13. package/lib/checks/json-valid.js +15 -0
  14. package/lib/checks/latency.js +11 -0
  15. package/lib/checks/length-bounds.js +17 -0
  16. package/lib/checks/negative-match.js +14 -0
  17. package/lib/checks/no-hallucinated-numbers.js +63 -0
  18. package/lib/checks/non-empty.js +34 -0
  19. package/lib/checks/regex-match.js +12 -0
  20. package/lib/checks/run-checks.js +84 -0
  21. package/lib/checks/schema-match.js +26 -0
  22. package/lib/checks/tool-call-count.js +16 -0
  23. package/lib/checks/tool-selection.js +34 -0
  24. package/lib/checks/types.js +45 -0
  25. package/lib/comparison/compare.js +86 -0
  26. package/lib/comparison/format.js +104 -0
  27. package/lib/comparison/index.js +6 -0
  28. package/lib/comparison/statistics.js +59 -0
  29. package/lib/comparison/types.js +41 -0
  30. package/lib/config-schema.js +200 -0
  31. package/lib/config.d.ts +66 -0
  32. package/lib/conversation-store.d.ts +77 -0
  33. package/lib/conversation-store.js +443 -0
  34. package/lib/db.d.ts +6 -0
  35. package/lib/db.js +1112 -0
  36. package/lib/dep-check.js +99 -0
  37. package/lib/drift-background.js +61 -0
  38. package/lib/drift-monitor.js +187 -0
  39. package/lib/eval-runner.js +566 -0
  40. package/lib/fixtures/fixture-store.js +161 -0
  41. package/lib/fixtures/index.js +11 -0
  42. package/lib/forge-engine.js +982 -0
  43. package/lib/forge-eval-generator.js +417 -0
  44. package/lib/forge-file-writer.js +386 -0
  45. package/lib/forge-service-client.js +190 -0
  46. package/lib/forge-service.d.ts +4 -0
  47. package/lib/forge-service.js +655 -0
  48. package/lib/forge-verifier-generator.js +271 -0
  49. package/lib/handlers/admin.js +151 -0
  50. package/lib/handlers/agents.js +229 -0
  51. package/lib/handlers/chat-resume.js +334 -0
  52. package/lib/handlers/chat-sync.js +320 -0
  53. package/lib/handlers/chat.js +320 -0
  54. package/lib/handlers/conversations.js +92 -0
  55. package/lib/handlers/preferences.js +88 -0
  56. package/lib/handlers/tools-list.js +58 -0
  57. package/lib/hitl-engine.d.ts +60 -0
  58. package/lib/hitl-engine.js +261 -0
  59. package/lib/http-utils.js +92 -0
  60. package/lib/index.d.ts +20 -0
  61. package/lib/index.js +141 -0
  62. package/lib/init.js +636 -0
  63. package/lib/manual-entry.js +59 -0
  64. package/lib/mcp-server.js +252 -0
  65. package/lib/output-groups.js +54 -0
  66. package/lib/postgres-store.d.ts +31 -0
  67. package/lib/postgres-store.js +465 -0
  68. package/lib/preference-store.d.ts +47 -0
  69. package/lib/preference-store.js +79 -0
  70. package/lib/prompt-store.d.ts +42 -0
  71. package/lib/prompt-store.js +60 -0
  72. package/lib/rate-limiter.d.ts +30 -0
  73. package/lib/rate-limiter.js +104 -0
  74. package/lib/react-engine.d.ts +110 -0
  75. package/lib/react-engine.js +337 -0
  76. package/lib/runner/cli.js +156 -0
  77. package/lib/runner/cost-estimator.js +71 -0
  78. package/lib/runner/gate.js +46 -0
  79. package/lib/runner/index.js +165 -0
  80. package/lib/sidecar.d.ts +83 -0
  81. package/lib/sidecar.js +161 -0
  82. package/lib/sse.d.ts +15 -0
  83. package/lib/sse.js +30 -0
  84. package/lib/tools-scanner.js +91 -0
  85. package/lib/tui.js +253 -0
  86. package/lib/verifier-report.js +78 -0
  87. package/lib/verifier-runner.js +338 -0
  88. package/lib/verifier-scanner.js +70 -0
  89. package/lib/verifier-worker-pool.js +196 -0
  90. package/lib/views/chat.js +340 -0
  91. package/lib/views/endpoints.js +203 -0
  92. package/lib/views/eval-run.js +206 -0
  93. package/lib/views/forge-agent.js +538 -0
  94. package/lib/views/forge.js +410 -0
  95. package/lib/views/main-menu.js +275 -0
  96. package/lib/views/mediation.js +381 -0
  97. package/lib/views/model-compare.js +430 -0
  98. package/lib/views/model-comparison.js +333 -0
  99. package/lib/views/onboarding.js +470 -0
  100. package/lib/views/performance.js +237 -0
  101. package/lib/views/run-evals.js +205 -0
  102. package/lib/views/settings.js +829 -0
  103. package/lib/views/tools-evals.js +514 -0
  104. package/lib/views/verifier-coverage.js +617 -0
  105. package/lib/workers/verifier-worker.js +52 -0
  106. package/package.json +123 -0
  107. package/widget/forge-chat.js +789 -0
@@ -0,0 +1,41 @@
1
+ // Adapted from agent-eval-kit by FlanaganSe (https://github.com/FlanaganSe/agent-eval-kit)
2
+ // MIT License — see LICENSE
3
+
4
+ /**
5
+ * @typedef {Object} CaseComparison
6
+ * @property {string} caseId
7
+ * @property {'regression'|'improvement'|'unchanged'|'added'|'removed'} status
8
+ * @property {number} [basePassRate]
9
+ * @property {number} [comparePassRate]
10
+ * @property {number} [baseMeanLatencyMs]
11
+ * @property {number} [compareMeanLatencyMs]
12
+ */
13
+
14
+ /**
15
+ * @typedef {Object} RunSummary
16
+ * @property {string} runId
17
+ * @property {string} modelName
18
+ * @property {number} passRate
19
+ * @property {number} totalCases
20
+ * @property {number} totalCost
21
+ * @property {number} meanLatencyMs
22
+ * @property {number} p95LatencyMs
23
+ * @property {Object.<string, {pass: boolean, latencyMs?: number}[]>} cases
24
+ */
25
+
26
+ /**
27
+ * @typedef {Object} RunComparison
28
+ * @property {RunSummary} base
29
+ * @property {RunSummary} compare
30
+ * @property {CaseComparison[]} cases
31
+ * @property {number} regressions
32
+ * @property {number} improvements
33
+ * @property {number} unchanged
34
+ * @property {number} added
35
+ * @property {number} removed
36
+ */
37
+
38
+ /**
39
+ * @typedef {Object} ComparisonOptions
40
+ * @property {number} [significanceThreshold] - min abs difference to count as regression/improvement (default 0.1)
41
+ */
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Config schema — defaults and validation for forge.config.json.
3
+ *
4
+ * Used by the sidecar runtime to fill missing config values
5
+ * and reject invalid configurations before startup.
6
+ */
7
+
8
+ export const CONFIG_DEFAULTS = {
9
+ auth: { mode: 'trust', signingKey: null, claimsPath: 'sub' },
10
+ defaultModel: 'claude-sonnet-4-6',
11
+ defaultHitlLevel: 'cautious',
12
+ allowUserModelSelect: false,
13
+ allowUserHitlConfig: false,
14
+ adminKey: null,
15
+ database: { type: 'sqlite', url: null },
16
+ conversation: { store: 'sqlite', window: 25, redis: {} },
17
+ sidecar: { enabled: false, port: 8001 },
18
+ agents: [],
19
+ rateLimit: {
20
+ enabled: false,
21
+ windowMs: 60_000, // 1 minute
22
+ maxRequests: 60 // per user per window
23
+ // no 'store' key — auto-uses Redis if config.conversation.redis is configured
24
+ },
25
+ verification: {
26
+ sandbox: true, // false to disable sandboxing (dev mode)
27
+ workerPoolSize: null, // null = min(4, cpus().length), or explicit integer
28
+ customTimeout: 2000, // ms per custom verifier call
29
+ maxQueueDepth: 200 // pending calls before queue-full rejection
30
+ },
31
+ agent: {
32
+ endpoint: null, // URL of the agent chat endpoint
33
+ method: 'POST',
34
+ headers: {}, // additional headers (e.g. Authorization)
35
+ inputField: 'message', // request body field for the user message
36
+ outputField: 'text', // response body field for the agent reply
37
+ sessionField: 'sessionId' // optional session field
38
+ },
39
+ gates: {
40
+ passRate: null, // minimum pass rate (0-1), null = no gate
41
+ maxCost: null, // maximum total cost in USD, null = no gate
42
+ p95LatencyMs: null // maximum p95 latency in ms, null = no gate
43
+ },
44
+ fixtures: {
45
+ dir: '.forge-fixtures', // directory to store fixture files
46
+ ttlDays: 30 // fixture TTL in days
47
+ }
48
+ };
49
+
50
+ const VALID_AUTH_MODES = ['verify', 'trust'];
51
+ const VALID_HITL_LEVELS = ['autonomous', 'cautious', 'standard', 'paranoid'];
52
+ const VALID_STORE_TYPES = ['sqlite', 'redis', 'postgres'];
53
+ const VALID_DB_TYPES = ['sqlite', 'postgres'];
54
+
55
+ /**
56
+ * Deep merge raw config onto defaults. Only merges plain objects — arrays
57
+ * and primitives from raw override the default value entirely.
58
+ *
59
+ * @param {object} raw — user-provided config (from forge.config.json)
60
+ * @returns {object} merged config with all defaults filled in
61
+ */
62
+ export function mergeDefaults(raw = {}) {
63
+ if (raw == null) raw = {};
64
+ return deepMerge(CONFIG_DEFAULTS, raw);
65
+ }
66
+
67
+ const UNSAFE_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
68
+
69
+ function deepMerge(defaults, overrides) {
70
+ const result = { ...defaults };
71
+ for (const key of Object.keys(overrides)) {
72
+ if (UNSAFE_KEYS.has(key)) continue;
73
+ const val = overrides[key];
74
+ if (val !== null && typeof val === 'object' && !Array.isArray(val)
75
+ && typeof defaults[key] === 'object' && defaults[key] !== null && !Array.isArray(defaults[key])) {
76
+ result[key] = deepMerge(defaults[key], val);
77
+ } else {
78
+ result[key] = val;
79
+ }
80
+ }
81
+ return result;
82
+ }
83
+
84
+ /**
85
+ * Validate a raw config object. Returns { valid, errors }.
86
+ *
87
+ * @param {object} raw — config to validate (before or after merging defaults)
88
+ * @returns {{ valid: boolean, errors: string[] }}
89
+ */
90
+ export function validateConfig(raw = {}) {
91
+ const errors = [];
92
+
93
+ // auth.mode
94
+ if (raw.auth?.mode !== undefined && !VALID_AUTH_MODES.includes(raw.auth.mode)) {
95
+ errors.push(`auth.mode must be one of: ${VALID_AUTH_MODES.join(', ')} (got "${raw.auth.mode}")`);
96
+ }
97
+
98
+ // auth.mode = 'verify' requires signingKey
99
+ if (raw.auth?.mode === 'verify' && !raw.auth?.signingKey) {
100
+ errors.push('auth.signingKey is required when auth.mode is "verify"');
101
+ }
102
+
103
+ // Startup validation: sidecar enabled + verify mode + no signingKey
104
+ if (raw.sidecar?.enabled && raw.auth?.mode === 'verify' && !raw.auth?.signingKey) {
105
+ errors.push('auth.signingKey is required when auth.mode is "verify" and sidecar is enabled. Set FORGE_JWT_KEY in .env');
106
+ }
107
+
108
+ // defaultHitlLevel
109
+ if (raw.defaultHitlLevel !== undefined && !VALID_HITL_LEVELS.includes(raw.defaultHitlLevel)) {
110
+ errors.push(`defaultHitlLevel must be one of: ${VALID_HITL_LEVELS.join(', ')} (got "${raw.defaultHitlLevel}")`);
111
+ }
112
+
113
+ // conversation.store
114
+ if (raw.conversation?.store !== undefined && !VALID_STORE_TYPES.includes(raw.conversation.store)) {
115
+ errors.push(`conversation.store must be one of: ${VALID_STORE_TYPES.join(', ')} (got "${raw.conversation.store}")`);
116
+ }
117
+
118
+ // sidecar.port
119
+ if (raw.sidecar?.port !== undefined) {
120
+ const port = raw.sidecar.port;
121
+ if (typeof port !== 'number' || port < 1 || port > 65535 || !Number.isInteger(port)) {
122
+ errors.push(`sidecar.port must be an integer between 1 and 65535 (got ${port})`);
123
+ }
124
+ }
125
+
126
+ // database.type
127
+ if (raw.database?.type !== undefined && !VALID_DB_TYPES.includes(raw.database.type)) {
128
+ errors.push(`database.type must be one of: ${VALID_DB_TYPES.join(', ')} (got "${raw.database.type}")`);
129
+ }
130
+
131
+ // conversation.window
132
+ if (raw.conversation?.window !== undefined) {
133
+ const w = raw.conversation.window;
134
+ if (typeof w !== 'number' || w < 1 || !Number.isInteger(w)) {
135
+ errors.push(`conversation.window must be a positive integer (got ${w})`);
136
+ }
137
+ }
138
+
139
+ // agents[]
140
+ if (raw.agents !== undefined) {
141
+ if (!Array.isArray(raw.agents)) {
142
+ errors.push('agents must be an array');
143
+ } else {
144
+ const AGENT_ID_RE = /^[a-z0-9_-]+$/;
145
+ for (let i = 0; i < raw.agents.length; i++) {
146
+ const a = raw.agents[i];
147
+ if (!a.id || typeof a.id !== 'string' || !AGENT_ID_RE.test(a.id)) {
148
+ errors.push(`agents[${i}].id must be a slug matching /^[a-z0-9_-]+$/ (got "${a.id}")`);
149
+ }
150
+ if (!a.displayName || typeof a.displayName !== 'string') {
151
+ errors.push(`agents[${i}].displayName is required and must be a string (got ${JSON.stringify(a.displayName)})`);
152
+ }
153
+ if (a.defaultHitlLevel !== undefined && !VALID_HITL_LEVELS.includes(a.defaultHitlLevel)) {
154
+ errors.push(`agents[${i}].defaultHitlLevel must be one of: ${VALID_HITL_LEVELS.join(', ')} (got "${a.defaultHitlLevel}")`);
155
+ }
156
+ if (a.toolAllowlist !== undefined && !Array.isArray(a.toolAllowlist) && a.toolAllowlist !== '*') {
157
+ errors.push(`agents[${i}].toolAllowlist must be '*' or an array of tool names`);
158
+ }
159
+ if (a.maxTurns !== undefined && (typeof a.maxTurns !== 'number' || a.maxTurns < 1 || !Number.isInteger(a.maxTurns))) {
160
+ errors.push(`agents[${i}].maxTurns must be a positive integer (got ${a.maxTurns})`);
161
+ }
162
+ if (a.maxTokens !== undefined && (typeof a.maxTokens !== 'number' || a.maxTokens < 1 || !Number.isInteger(a.maxTokens))) {
163
+ errors.push(`agents[${i}].maxTokens must be a positive integer (got ${a.maxTokens})`);
164
+ }
165
+ }
166
+ }
167
+ }
168
+
169
+ // rateLimit (only validated when enabled)
170
+ if (raw.rateLimit?.enabled === true) {
171
+ const windowMs = raw.rateLimit.windowMs;
172
+ if (windowMs !== undefined && (typeof windowMs !== 'number' || windowMs < 1 || !Number.isInteger(windowMs))) {
173
+ errors.push(`rateLimit.windowMs must be a positive integer (got ${windowMs})`);
174
+ }
175
+ const maxRequests = raw.rateLimit.maxRequests;
176
+ if (maxRequests !== undefined && (typeof maxRequests !== 'number' || maxRequests < 1 || !Number.isInteger(maxRequests))) {
177
+ errors.push(`rateLimit.maxRequests must be a positive integer (got ${maxRequests})`);
178
+ }
179
+ }
180
+
181
+ // verification
182
+ if (raw.verification !== undefined) {
183
+ const workerPoolSize = raw.verification.workerPoolSize;
184
+ if (workerPoolSize !== null && workerPoolSize !== undefined) {
185
+ if (typeof workerPoolSize !== 'number' || workerPoolSize < 1 || !Number.isInteger(workerPoolSize)) {
186
+ errors.push(`verification.workerPoolSize must be a positive integer or null (got ${workerPoolSize})`);
187
+ }
188
+ }
189
+ const customTimeout = raw.verification.customTimeout;
190
+ if (customTimeout !== undefined && (typeof customTimeout !== 'number' || customTimeout < 1 || !Number.isInteger(customTimeout))) {
191
+ errors.push(`verification.customTimeout must be a positive integer (got ${customTimeout})`);
192
+ }
193
+ const maxQueueDepth = raw.verification.maxQueueDepth;
194
+ if (maxQueueDepth !== undefined && (typeof maxQueueDepth !== 'number' || maxQueueDepth < 1 || !Number.isInteger(maxQueueDepth))) {
195
+ errors.push(`verification.maxQueueDepth must be a positive integer (got ${maxQueueDepth})`);
196
+ }
197
+ }
198
+
199
+ return { valid: errors.length === 0, errors };
200
+ }
@@ -0,0 +1,66 @@
1
+ export interface RateLimitConfig {
2
+ enabled: boolean;
3
+ windowMs: number;
4
+ maxRequests: number;
5
+ }
6
+
7
+ export interface VerificationConfig {
8
+ sandbox: boolean;
9
+ workerPoolSize: number | null;
10
+ customTimeout: number;
11
+ maxQueueDepth: number;
12
+ }
13
+
14
+ export interface ConversationConfig {
15
+ store?: string;
16
+ window: number;
17
+ redis?: object;
18
+ }
19
+
20
+ export interface DatabaseConfig {
21
+ type?: 'sqlite' | 'postgres';
22
+ url?: string;
23
+ }
24
+
25
+ export interface AuthConfig {
26
+ mode?: 'trust' | 'verify';
27
+ signingKey?: string;
28
+ claimsPath?: string;
29
+ }
30
+
31
+ export interface AgentConfig {
32
+ id?: string;
33
+ displayName?: string;
34
+ systemPrompt?: string;
35
+ defaultModel?: string;
36
+ defaultHitlLevel?: string;
37
+ toolAllowlist?: string | string[];
38
+ maxTurns?: number;
39
+ maxTokens?: number;
40
+ /** DB-only — set by agent_registry, not forge.config.json */
41
+ isDefault?: number;
42
+ /** DB-only — set by agent_registry, not forge.config.json */
43
+ enabled?: number;
44
+ }
45
+
46
+ export interface SidecarConfig {
47
+ auth?: AuthConfig;
48
+ defaultModel?: string;
49
+ defaultHitlLevel?: 'autonomous' | 'cautious' | 'standard' | 'paranoid';
50
+ allowUserModelSelect?: boolean;
51
+ allowUserHitlConfig?: boolean;
52
+ systemPrompt?: string;
53
+ adminKey?: string;
54
+ conversation?: ConversationConfig;
55
+ rateLimit?: RateLimitConfig;
56
+ verification?: VerificationConfig;
57
+ database?: DatabaseConfig;
58
+ sidecar?: { enabled?: boolean; port?: number };
59
+ agents?: AgentConfig[];
60
+ costs?: Record<string, { input: number; output: number }>;
61
+ }
62
+
63
+ export const CONFIG_DEFAULTS: SidecarConfig;
64
+
65
+ export function mergeDefaults(config: Partial<SidecarConfig>): SidecarConfig;
66
+ export function validateConfig(config: SidecarConfig): { valid: boolean; errors: string[] };
@@ -0,0 +1,77 @@
1
+ export interface ConversationMessage {
2
+ session_id?: string;
3
+ stage?: string;
4
+ role: 'user' | 'assistant' | 'tool' | 'system';
5
+ content: string;
6
+ agent_id?: string | null;
7
+ user_id?: string | null;
8
+ created_at?: string;
9
+ }
10
+
11
+ export interface SessionSummary {
12
+ sessionId: string;
13
+ agentId: string | null;
14
+ userId: string | null;
15
+ startedAt: string;
16
+ lastUpdated: string;
17
+ }
18
+
19
+ export interface ConversationStore {
20
+ createSession(): string;
21
+ persistMessage(
22
+ sessionId: string,
23
+ stage: string,
24
+ role: string,
25
+ content: string,
26
+ agentId?: string | null,
27
+ userId?: string | null
28
+ ): Promise<void>;
29
+ getHistory(sessionId: string): Promise<ConversationMessage[]>;
30
+ getIncompleteSessions(): Promise<object[]>;
31
+ getSessionUserId(sessionId: string): Promise<string | null | undefined>;
32
+ listSessions(userId: string | null): Promise<SessionSummary[]>;
33
+ deleteSession(sessionId: string, userId: string | null): Promise<boolean>;
34
+ close(): Promise<void>;
35
+ }
36
+
37
+ export class SqliteConversationStore implements ConversationStore {
38
+ constructor(db: object);
39
+ createSession(): string;
40
+ persistMessage(sessionId: string, stage: string, role: string, content: string, agentId?: string | null, userId?: string | null): Promise<void>;
41
+ getHistory(sessionId: string): Promise<ConversationMessage[]>;
42
+ getIncompleteSessions(): Promise<object[]>;
43
+ getSessionUserId(sessionId: string): Promise<string | null | undefined>;
44
+ listSessions(userId: string | null): Promise<SessionSummary[]>;
45
+ deleteSession(sessionId: string, userId: string | null): Promise<boolean>;
46
+ close(): Promise<void>;
47
+ }
48
+
49
+ export class RedisConversationStore implements ConversationStore {
50
+ constructor(redisConfig?: { url?: string; ttlSeconds?: number });
51
+ createSession(): string;
52
+ persistMessage(sessionId: string, stage: string, role: string, content: string, agentId?: string | null, userId?: string | null): Promise<void>;
53
+ getHistory(sessionId: string): Promise<ConversationMessage[]>;
54
+ getIncompleteSessions(): Promise<object[]>;
55
+ getSessionUserId(sessionId: string): Promise<string | null | undefined>;
56
+ listSessions(userId: string | null): Promise<SessionSummary[]>;
57
+ deleteSession(sessionId: string, userId: string | null): Promise<boolean>;
58
+ close(): Promise<void>;
59
+ }
60
+
61
+ export class PostgresConversationStore implements ConversationStore {
62
+ constructor(pgPool: object);
63
+ createSession(): string;
64
+ persistMessage(sessionId: string, stage: string, role: string, content: string, agentId?: string | null, userId?: string | null): Promise<void>;
65
+ getHistory(sessionId: string): Promise<ConversationMessage[]>;
66
+ getIncompleteSessions(): Promise<object[]>;
67
+ getSessionUserId(sessionId: string): Promise<string | null | undefined>;
68
+ listSessions(userId: string | null): Promise<SessionSummary[]>;
69
+ deleteSession(sessionId: string, userId: string | null): Promise<boolean>;
70
+ close(): Promise<void>;
71
+ }
72
+
73
+ export function makeConversationStore(
74
+ config: object,
75
+ db?: object | null,
76
+ pgPool?: object | null
77
+ ): SqliteConversationStore | RedisConversationStore | PostgresConversationStore;