agent-tool-forge 0.4.6 → 0.4.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -16
- package/config/api-endpoints.template.json +17 -0
- package/config/forge.config.template.json +106 -0
- package/lib/config-schema.js +4 -4
- package/lib/config.d.ts +33 -4
- package/lib/forge-service.js +29 -6
- package/lib/hitl-engine.d.ts +8 -2
- package/lib/index.js +2 -3
- package/lib/init.js +1 -1
- package/lib/sidecar.d.ts +16 -5
- package/package.json +3 -1
- package/skills/forge-eval/SKILL.md +69 -0
- package/skills/forge-eval/references/assertion-patterns.md +265 -0
- package/skills/forge-eval/references/eval-types.md +262 -0
- package/skills/forge-eval/references/overlap-map.md +89 -0
- package/skills/forge-mcp/SKILL.md +62 -0
- package/skills/forge-mcp/references/mcp-templates.md +302 -0
- package/skills/forge-mcp/references/tool-to-mcp-mapping.md +108 -0
- package/skills/forge-tool/SKILL.md +112 -0
- package/skills/forge-tool/references/description-contract.md +102 -0
- package/skills/forge-tool/references/extension-points.md +120 -0
- package/skills/forge-tool/references/pending-spec.md +53 -0
- package/skills/forge-tool/references/tool-shape.md +106 -0
- package/skills/forge-verifier/SKILL.md +78 -0
- package/skills/forge-verifier/references/output-groups.md +39 -0
- package/skills/forge-verifier/references/verifier-pattern.md +83 -0
- package/skills/forge-verifier/references/verifier-stubs.md +147 -0
package/README.md
CHANGED
|
@@ -40,10 +40,10 @@ See [docs/tui-workflow.md](docs/tui-workflow.md) for a start-to-finish walkthrou
|
|
|
40
40
|
|
|
41
41
|
```bash
|
|
42
42
|
# Global install (available in all projects)
|
|
43
|
-
cp -r tool-forge/skills/forge-tool ~/.claude/skills/
|
|
44
|
-
cp -r tool-forge/skills/forge-eval ~/.claude/skills/
|
|
45
|
-
cp -r tool-forge/skills/forge-mcp ~/.claude/skills/
|
|
46
|
-
cp -r tool-forge/skills/forge-verifier ~/.claude/skills/
|
|
43
|
+
cp -r node_modules/agent-tool-forge/skills/forge-tool ~/.claude/skills/
|
|
44
|
+
cp -r node_modules/agent-tool-forge/skills/forge-eval ~/.claude/skills/
|
|
45
|
+
cp -r node_modules/agent-tool-forge/skills/forge-mcp ~/.claude/skills/
|
|
46
|
+
cp -r node_modules/agent-tool-forge/skills/forge-verifier ~/.claude/skills/
|
|
47
47
|
```
|
|
48
48
|
|
|
49
49
|
Then in any Claude Code session:
|
|
@@ -123,23 +123,23 @@ All subpaths ship with TypeScript declarations.
|
|
|
123
123
|
|
|
124
124
|
```js
|
|
125
125
|
import { createSidecar } from 'agent-tool-forge' // main entry
|
|
126
|
-
import { reactLoop } from 'tool-forge/react-engine'
|
|
127
|
-
import { createAuth } from 'tool-forge/auth'
|
|
128
|
-
import { makeConversationStore } from 'tool-forge/conversation-store'
|
|
129
|
-
import { mergeDefaults } from 'tool-forge/config'
|
|
130
|
-
import { makeHitlEngine } from 'tool-forge/hitl-engine'
|
|
131
|
-
import { makePromptStore } from 'tool-forge/prompt-store'
|
|
132
|
-
import { makePreferenceStore } from 'tool-forge/preference-store'
|
|
133
|
-
import { makeRateLimiter } from 'tool-forge/rate-limiter'
|
|
134
|
-
import { getDb } from 'tool-forge/db'
|
|
135
|
-
import { initSSE } from 'tool-forge/sse'
|
|
126
|
+
import { reactLoop } from 'agent-tool-forge/react-engine'
|
|
127
|
+
import { createAuth } from 'agent-tool-forge/auth'
|
|
128
|
+
import { makeConversationStore } from 'agent-tool-forge/conversation-store'
|
|
129
|
+
import { mergeDefaults } from 'agent-tool-forge/config'
|
|
130
|
+
import { makeHitlEngine } from 'agent-tool-forge/hitl-engine'
|
|
131
|
+
import { makePromptStore } from 'agent-tool-forge/prompt-store'
|
|
132
|
+
import { makePreferenceStore } from 'agent-tool-forge/preference-store'
|
|
133
|
+
import { makeRateLimiter } from 'agent-tool-forge/rate-limiter'
|
|
134
|
+
import { getDb } from 'agent-tool-forge/db'
|
|
135
|
+
import { initSSE } from 'agent-tool-forge/sse'
|
|
136
136
|
import {
|
|
137
137
|
PostgresStore,
|
|
138
138
|
PostgresEvalStore,
|
|
139
139
|
PostgresChatAuditStore,
|
|
140
140
|
PostgresVerifierStore
|
|
141
|
-
} from 'tool-forge/postgres-store'
|
|
142
|
-
import { buildSidecarContext, createSidecarRouter } from 'tool-forge/forge-service'
|
|
141
|
+
} from 'agent-tool-forge/postgres-store'
|
|
142
|
+
import { buildSidecarContext, createSidecarRouter } from 'agent-tool-forge/forge-service'
|
|
143
143
|
```
|
|
144
144
|
|
|
145
145
|
---
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"_comment": "Manual endpoint manifest. Add endpoints here when OpenAPI discovery is unavailable. Forge uses this to propose tools.",
|
|
4
|
+
"baseUrl": "${API_BASE_URL}",
|
|
5
|
+
"endpoints": [
|
|
6
|
+
{
|
|
7
|
+
"path": "/api/v1/example",
|
|
8
|
+
"method": "GET",
|
|
9
|
+
"name": "get_example",
|
|
10
|
+
"description": "Retrieves example data from the API. Use when the user asks for examples.",
|
|
11
|
+
"params": {
|
|
12
|
+
"id": { "type": "string", "description": "Optional filter by ID" }
|
|
13
|
+
},
|
|
14
|
+
"requiresConfirmation": false
|
|
15
|
+
}
|
|
16
|
+
]
|
|
17
|
+
}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"_comment": "Optional configuration that front-loads answers to common skill questions. Delete fields you don't need — all are optional. The skills work via dialogue alone without this file.",
|
|
4
|
+
|
|
5
|
+
"project": {
|
|
6
|
+
"name": "my-project",
|
|
7
|
+
"toolsDir": "src/tools",
|
|
8
|
+
"testsDir": "src/tools/__tests__",
|
|
9
|
+
"evalsDir": "evals/dataset",
|
|
10
|
+
"barrelsFile": "src/tools/tools.exports.ts"
|
|
11
|
+
},
|
|
12
|
+
|
|
13
|
+
"api": {
|
|
14
|
+
"baseUrl": "http://localhost:3000",
|
|
15
|
+
"_baseUrlComment": "Base URL for MCP tool routing. Tool mcpRouting.endpoint paths are appended to this.",
|
|
16
|
+
"discovery": {
|
|
17
|
+
"type": "openapi",
|
|
18
|
+
"url": "http://localhost:3333/api-json",
|
|
19
|
+
"_comment": "Or file: { \"type\": \"openapi\", \"file\": \"openapi.json\" }"
|
|
20
|
+
},
|
|
21
|
+
"manifestPath": "api-endpoints.json"
|
|
22
|
+
},
|
|
23
|
+
|
|
24
|
+
"language": "typescript",
|
|
25
|
+
|
|
26
|
+
"validation": {
|
|
27
|
+
"library": "zod",
|
|
28
|
+
"_alternatives": ["pydantic", "joi", "json-schema", "struct-tags"]
|
|
29
|
+
},
|
|
30
|
+
|
|
31
|
+
"testing": {
|
|
32
|
+
"framework": "jest",
|
|
33
|
+
"_alternatives": ["vitest", "pytest", "go-test", "mocha"],
|
|
34
|
+
"command": "npx jest --passWithNoTests"
|
|
35
|
+
},
|
|
36
|
+
|
|
37
|
+
"typeCheck": {
|
|
38
|
+
"command": "npx tsc --noEmit",
|
|
39
|
+
"_comment": "Set to null if your stack doesn't have a type checker"
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
"auth": {
|
|
43
|
+
"contextField": "context.auth",
|
|
44
|
+
"type": "jwt",
|
|
45
|
+
"_alternatives": ["api-key", "oauth", "service-account"]
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
"client": {
|
|
49
|
+
"contextField": "context.client",
|
|
50
|
+
"type": "http",
|
|
51
|
+
"_comment": "The API client your tools use. Could be HTTP, gRPC, SDK wrapper, etc."
|
|
52
|
+
},
|
|
53
|
+
|
|
54
|
+
"hitl": {
|
|
55
|
+
"enabled": false,
|
|
56
|
+
"framework": null,
|
|
57
|
+
"_comment": "Set to true and specify framework (e.g., 'langgraph') if you use human-in-the-loop confirmation for write tools"
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
"mcp": {
|
|
61
|
+
"defaultTransport": "stdio",
|
|
62
|
+
"_alternatives": ["streamable-http"],
|
|
63
|
+
"serverPrefix": "my-project",
|
|
64
|
+
"_comment": "Used by /forge-mcp to name the generated MCP server"
|
|
65
|
+
},
|
|
66
|
+
|
|
67
|
+
"evals": {
|
|
68
|
+
"goldenDir": "evals/dataset/golden",
|
|
69
|
+
"labeledDir": "evals/dataset/labeled",
|
|
70
|
+
"overlapMapFile": "evals/tool-overlap-map.json",
|
|
71
|
+
"seedManifestFile": "evals/seed-manifest.json",
|
|
72
|
+
"_comment": "Paths are relative to project root",
|
|
73
|
+
"defaultMix": {
|
|
74
|
+
"golden": { "total": 10 },
|
|
75
|
+
"labeled": { "straightforward": 3, "ambiguous": 3, "edge": 2, "adversarial": 2 }
|
|
76
|
+
},
|
|
77
|
+
"multiPass": { "passes": 3 },
|
|
78
|
+
"randomSample": { "aggression": "standard" }
|
|
79
|
+
},
|
|
80
|
+
"drift": {
|
|
81
|
+
"threshold": 0.1,
|
|
82
|
+
"windowSize": 5
|
|
83
|
+
},
|
|
84
|
+
"modelMatrix": [],
|
|
85
|
+
"_modelMatrixComment": "Add model names to compare during eval runs, e.g. ['gpt-4o-mini', 'gemini-2.0-flash', 'claude-haiku-4-5-20251001']",
|
|
86
|
+
"costs": {
|
|
87
|
+
"claude-haiku-4-5-20251001": { "input": 0.80, "output": 4.00 },
|
|
88
|
+
"claude-sonnet-4-6": { "input": 3.00, "output": 15.00 },
|
|
89
|
+
"claude-opus-4-6": { "input": 15.00, "output": 75.00 },
|
|
90
|
+
"gpt-4o": { "input": 2.50, "output": 10.00 },
|
|
91
|
+
"gpt-4o-mini": { "input": 0.15, "output": 0.60 },
|
|
92
|
+
"o1": { "input": 15.00, "output": 60.00 },
|
|
93
|
+
"o3-mini": { "input": 1.10, "output": 4.40 },
|
|
94
|
+
"gemini-2.0-flash": { "input": 0.10, "output": 0.40 },
|
|
95
|
+
"gemini-2.5-pro-exp": { "input": 1.25, "output": 10.00 },
|
|
96
|
+
"deepseek-chat": { "input": 0.27, "output": 1.10 }
|
|
97
|
+
},
|
|
98
|
+
|
|
99
|
+
"verification": {
|
|
100
|
+
"enabled": true,
|
|
101
|
+
"verifiersDir": "src/verification",
|
|
102
|
+
"barrelsFile": "src/verification/verifiers.exports.ts",
|
|
103
|
+
"orderPrefix": "A-",
|
|
104
|
+
"_comment": "Order categories: A=attribution, C=compliance, I=interface, R=risk, U=uncertainty"
|
|
105
|
+
}
|
|
106
|
+
}
|
package/lib/config-schema.js
CHANGED
|
@@ -14,7 +14,7 @@ export const CONFIG_DEFAULTS = {
|
|
|
14
14
|
adminKey: null,
|
|
15
15
|
database: { type: 'sqlite', url: null },
|
|
16
16
|
conversation: { store: 'sqlite', window: 25, redis: {} },
|
|
17
|
-
sidecar: {
|
|
17
|
+
sidecar: { port: 8001 }, // port: used in direct-run mode only (node lib/forge-service.js)
|
|
18
18
|
agents: [],
|
|
19
19
|
rateLimit: {
|
|
20
20
|
enabled: false,
|
|
@@ -100,9 +100,9 @@ export function validateConfig(raw = {}) {
|
|
|
100
100
|
errors.push('auth.signingKey is required when auth.mode is "verify"');
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
-
//
|
|
104
|
-
if (raw.
|
|
105
|
-
errors.push('auth.signingKey is required when auth.mode is "verify"
|
|
103
|
+
// verify mode always requires a signingKey
|
|
104
|
+
if (raw.auth?.mode === 'verify' && !raw.auth?.signingKey) {
|
|
105
|
+
errors.push('auth.signingKey is required when auth.mode is "verify". Set it in forge.config.json or via a ${ENV_VAR} reference.');
|
|
106
106
|
}
|
|
107
107
|
|
|
108
108
|
// defaultHitlLevel
|
package/lib/config.d.ts
CHANGED
|
@@ -23,9 +23,13 @@ export interface DatabaseConfig {
|
|
|
23
23
|
}
|
|
24
24
|
|
|
25
25
|
export interface AuthConfig {
|
|
26
|
-
mode?: 'trust' | 'verify';
|
|
27
|
-
signingKey?: string;
|
|
26
|
+
mode?: 'trust' | 'verify' | 'none';
|
|
27
|
+
signingKey?: string | null;
|
|
28
28
|
claimsPath?: string;
|
|
29
|
+
/** Admin Bearer token. Replaces top-level `adminKey`. Supports `${VAR}` env references. */
|
|
30
|
+
adminToken?: string | null;
|
|
31
|
+
/** Metrics scrape token for /metrics. Supports `${VAR}` env references. */
|
|
32
|
+
metricsToken?: string | null;
|
|
29
33
|
}
|
|
30
34
|
|
|
31
35
|
export interface AgentConfig {
|
|
@@ -43,6 +47,26 @@ export interface AgentConfig {
|
|
|
43
47
|
enabled?: number;
|
|
44
48
|
}
|
|
45
49
|
|
|
50
|
+
export interface AgentRouterConfig {
|
|
51
|
+
endpoint?: string | null;
|
|
52
|
+
method?: string;
|
|
53
|
+
headers?: Record<string, string>;
|
|
54
|
+
inputField?: string;
|
|
55
|
+
outputField?: string;
|
|
56
|
+
sessionField?: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export interface GatesConfig {
|
|
60
|
+
passRate?: number | null;
|
|
61
|
+
maxCost?: number | null;
|
|
62
|
+
p95LatencyMs?: number | null;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
export interface FixturesConfig {
|
|
66
|
+
dir?: string;
|
|
67
|
+
ttlDays?: number;
|
|
68
|
+
}
|
|
69
|
+
|
|
46
70
|
export interface SidecarConfig {
|
|
47
71
|
auth?: AuthConfig;
|
|
48
72
|
defaultModel?: string;
|
|
@@ -50,14 +74,19 @@ export interface SidecarConfig {
|
|
|
50
74
|
allowUserModelSelect?: boolean;
|
|
51
75
|
allowUserHitlConfig?: boolean;
|
|
52
76
|
systemPrompt?: string;
|
|
53
|
-
|
|
77
|
+
/** @deprecated Use `auth.adminToken` instead. */
|
|
78
|
+
adminKey?: string | null;
|
|
54
79
|
conversation?: ConversationConfig;
|
|
55
80
|
rateLimit?: RateLimitConfig;
|
|
56
81
|
verification?: VerificationConfig;
|
|
57
82
|
database?: DatabaseConfig;
|
|
58
|
-
|
|
83
|
+
/** `port` is used in direct-run mode only (`node lib/forge-service.js`). `createSidecar()` uses `SidecarOptions.port`. */
|
|
84
|
+
sidecar?: { port?: number };
|
|
59
85
|
agents?: AgentConfig[];
|
|
60
86
|
costs?: Record<string, { input: number; output: number }>;
|
|
87
|
+
agent?: AgentRouterConfig;
|
|
88
|
+
gates?: GatesConfig;
|
|
89
|
+
fixtures?: FixturesConfig;
|
|
61
90
|
}
|
|
62
91
|
|
|
63
92
|
export const CONFIG_DEFAULTS: SidecarConfig;
|
package/lib/forge-service.js
CHANGED
|
@@ -67,7 +67,16 @@ const PROJECT_ROOT = resolve(__dirname, '..');
|
|
|
67
67
|
* @returns {Promise<{ auth, promptStore, preferenceStore, conversationStore, hitlEngine, verifierRunner, agentRegistry, db, config, env, rateLimiter, configPath, evalStore, chatAuditStore, verifierStore, pgStore, _redisClient, _pgPool }>}
|
|
68
68
|
*/
|
|
69
69
|
export async function buildSidecarContext(config, db, env = {}, opts = {}) {
|
|
70
|
-
|
|
70
|
+
// Resolve ${VAR} references in auth token fields at startup, not per-request.
|
|
71
|
+
// No fallback for signingKey: if the env var is absent, resolve to null so createAuth
|
|
72
|
+
// fails-closed in verify mode rather than using the literal "${VAR}" string as the key.
|
|
73
|
+
const resolvedAuth = config.auth ? {
|
|
74
|
+
...config.auth,
|
|
75
|
+
signingKey: resolveSecret(config.auth.signingKey, env),
|
|
76
|
+
adminToken: resolveSecret(config.auth.adminToken, env),
|
|
77
|
+
metricsToken: resolveSecret(config.auth.metricsToken, env),
|
|
78
|
+
} : config.auth;
|
|
79
|
+
const auth = createAuth(resolvedAuth);
|
|
71
80
|
|
|
72
81
|
let redisClient = null;
|
|
73
82
|
let pgPool = null;
|
|
@@ -103,6 +112,7 @@ export async function buildSidecarContext(config, db, env = {}, opts = {}) {
|
|
|
103
112
|
idleTimeoutMillis: 30000,
|
|
104
113
|
max: 10
|
|
105
114
|
});
|
|
115
|
+
pgPool.on('error', err => process.stderr.write(`[forge] pg pool error: ${err.message}\n`));
|
|
106
116
|
await pgPool.query(SCHEMA); // ensure all tables exist
|
|
107
117
|
}
|
|
108
118
|
|
|
@@ -142,9 +152,14 @@ export async function buildSidecarContext(config, db, env = {}, opts = {}) {
|
|
|
142
152
|
// project directory, not into the installed package.
|
|
143
153
|
const configPath = opts?.configPath ?? resolve(process.cwd(), 'forge.config.json');
|
|
144
154
|
|
|
155
|
+
// Return resolved auth config so applyRouteAuth sees literal tokens (not ${VAR})
|
|
156
|
+
const resolvedConfig = resolvedAuth !== config.auth
|
|
157
|
+
? { ...config, auth: resolvedAuth }
|
|
158
|
+
: config;
|
|
159
|
+
|
|
145
160
|
return {
|
|
146
161
|
auth, promptStore, preferenceStore, conversationStore, hitlEngine, verifierRunner,
|
|
147
|
-
agentRegistry, db, config, env, rateLimiter, configPath,
|
|
162
|
+
agentRegistry, db, config: resolvedConfig, env, rateLimiter, configPath,
|
|
148
163
|
evalStore, chatAuditStore, verifierStore, pgStore,
|
|
149
164
|
_redisClient: redisClient, _pgPool: pgPool
|
|
150
165
|
};
|
|
@@ -304,7 +319,8 @@ export function createSidecarRouter(ctx, options = {}) {
|
|
|
304
319
|
if (sidecarPath === '/agent-api/user/preferences') {
|
|
305
320
|
if (req.method === 'GET') return handleGetPreferences(req, res, ctx);
|
|
306
321
|
if (req.method === 'PUT') return handlePutPreferences(req, res, ctx);
|
|
307
|
-
|
|
322
|
+
sendJson(res, 405, { error: 'Method not allowed' });
|
|
323
|
+
return;
|
|
308
324
|
}
|
|
309
325
|
if (sidecarPath.startsWith('/agent-api/conversations')) {
|
|
310
326
|
return handleConversations(req, res, ctx);
|
|
@@ -374,8 +390,14 @@ export function createSidecarRouter(ctx, options = {}) {
|
|
|
374
390
|
|
|
375
391
|
// ── Custom routes (consumer-provided) ─────────────────────────────────
|
|
376
392
|
if (customRoutes) {
|
|
377
|
-
|
|
378
|
-
|
|
393
|
+
try {
|
|
394
|
+
const handled = await customRoutes(req, res, ctx);
|
|
395
|
+
if (handled) return;
|
|
396
|
+
} catch (err) {
|
|
397
|
+
process.stderr.write(`[forge] customRoutes error: ${err.message}\n`);
|
|
398
|
+
if (!res.headersSent) sendJson(res, 500, { error: 'Internal server error' });
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
379
401
|
}
|
|
380
402
|
|
|
381
403
|
// ── 404 fallback ───────────────────────────────────────────────────────
|
|
@@ -630,7 +652,8 @@ function createDirectServer() {
|
|
|
630
652
|
if (sidecarPath === '/agent-api/user/preferences') {
|
|
631
653
|
if (req.method === 'GET') return handleGetPreferences(req, res, sidecarCtx);
|
|
632
654
|
if (req.method === 'PUT') return handlePutPreferences(req, res, sidecarCtx);
|
|
633
|
-
|
|
655
|
+
json(res, 405, { error: 'Method not allowed' });
|
|
656
|
+
return;
|
|
634
657
|
}
|
|
635
658
|
if (sidecarPath.startsWith('/agent-api/conversations')) {
|
|
636
659
|
return handleConversations(req, res, sidecarCtx);
|
package/lib/hitl-engine.d.ts
CHANGED
|
@@ -37,9 +37,15 @@ export class HitlEngine {
|
|
|
37
37
|
|
|
38
38
|
/**
|
|
39
39
|
* Retrieve and consume the paused state for a resume token.
|
|
40
|
-
*
|
|
40
|
+
* Returns null if the token has expired or does not exist (does not throw).
|
|
41
41
|
*/
|
|
42
|
-
resume(resumeToken: string): Promise<
|
|
42
|
+
resume(resumeToken: string): Promise<object | null>;
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Tear down any backend connections (Redis subscriber, Postgres pool, etc.).
|
|
46
|
+
* Call on graceful shutdown. Synchronous.
|
|
47
|
+
*/
|
|
48
|
+
destroy(): void;
|
|
43
49
|
}
|
|
44
50
|
|
|
45
51
|
/**
|
package/lib/index.js
CHANGED
|
@@ -8,8 +8,7 @@
|
|
|
8
8
|
*/
|
|
9
9
|
|
|
10
10
|
import { readFileSync, existsSync, writeFileSync } from 'fs';
|
|
11
|
-
import { resolve
|
|
12
|
-
import { fileURLToPath } from 'url';
|
|
11
|
+
import { resolve } from 'path';
|
|
13
12
|
import { runTui } from './tui.js';
|
|
14
13
|
import { addEndpointManually } from './manual-entry.js';
|
|
15
14
|
import * as readline from 'readline';
|
|
@@ -18,7 +17,7 @@ const CONFIG_FILE = 'forge.config.json';
|
|
|
18
17
|
const PENDING_SPEC_FILE = 'forge-pending-tool.json';
|
|
19
18
|
|
|
20
19
|
function findProjectRoot() {
|
|
21
|
-
return
|
|
20
|
+
return process.cwd();
|
|
22
21
|
}
|
|
23
22
|
|
|
24
23
|
function loadConfig() {
|
package/lib/init.js
CHANGED
|
@@ -499,7 +499,7 @@ export async function runInit(opts = {}) {
|
|
|
499
499
|
const adminKeyValue = hasSidecar ? generateAdminKey() : null;
|
|
500
500
|
|
|
501
501
|
if (hasSidecar) {
|
|
502
|
-
raw.sidecar = {
|
|
502
|
+
raw.sidecar = { port: 8001 };
|
|
503
503
|
raw.adminKey = '${FORGE_ADMIN_KEY}';
|
|
504
504
|
raw.auth = { mode: authMode };
|
|
505
505
|
if (authMode === 'verify') {
|
package/lib/sidecar.d.ts
CHANGED
|
@@ -44,11 +44,20 @@ export interface SidecarInstance {
|
|
|
44
44
|
|
|
45
45
|
export function createSidecar(config?: Partial<SidecarConfig>, options?: SidecarOptions): Promise<SidecarInstance>;
|
|
46
46
|
|
|
47
|
+
export interface SidecarRouterOptions {
|
|
48
|
+
/** Absolute path to serve static files from for /widget/* routes. Defaults to package widget/. */
|
|
49
|
+
widgetDir?: string;
|
|
50
|
+
/** Optional async handler for /mcp routes. */
|
|
51
|
+
mcpHandler?: (req: object, res: object) => Promise<void> | void;
|
|
52
|
+
/** Called before the 404 fallback. Return true if the request was handled. */
|
|
53
|
+
customRoutes?: (req: object, res: object, ctx: SidecarContext) => Promise<boolean> | boolean;
|
|
54
|
+
}
|
|
55
|
+
|
|
47
56
|
// Advanced consumers
|
|
48
|
-
export function buildSidecarContext(config: SidecarConfig, db: object, env?: Record<string, string>, opts?:
|
|
49
|
-
export function createSidecarRouter(ctx: SidecarContext, opts?:
|
|
57
|
+
export function buildSidecarContext(config: SidecarConfig, db: object, env?: Record<string, string>, opts?: { configPath?: string }): Promise<SidecarContext>;
|
|
58
|
+
export function createSidecarRouter(ctx: SidecarContext, opts?: SidecarRouterOptions): (req: object, res: object) => Promise<void>;
|
|
50
59
|
|
|
51
|
-
export { createAuth } from './auth.js';
|
|
60
|
+
export { createAuth, resolveSecret, authenticateAdmin } from './auth.js';
|
|
52
61
|
export type { AuthResult, AuthConfig, Authenticator } from './auth.js';
|
|
53
62
|
|
|
54
63
|
export { reactLoop } from './react-engine.js';
|
|
@@ -82,8 +91,10 @@ export class AgentRegistry {
|
|
|
82
91
|
}
|
|
83
92
|
|
|
84
93
|
export class VerifierRunner {
|
|
85
|
-
constructor(db: object, config?: object, workerPool?: object);
|
|
94
|
+
constructor(db: object, config?: object, pgPool?: object | null, workerPool?: object | null);
|
|
86
95
|
loadFromDb(db: object): Promise<void>;
|
|
87
|
-
|
|
96
|
+
registerVerifiers(toolName: string, verifiers: object[]): void;
|
|
97
|
+
verify(toolName: string, args: object, result: unknown): Promise<{ outcome: 'pass' | 'warn' | 'block'; message: string | null; verifierName: string | null }>;
|
|
98
|
+
logResult(sessionId: string, toolName: string, result: object): void;
|
|
88
99
|
destroy(): void;
|
|
89
100
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agent-tool-forge",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.9",
|
|
4
4
|
"description": "Production LLM agent sidecar + Claude Code skill library for building, testing, and running tool-calling agents.",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"llm",
|
|
@@ -29,6 +29,8 @@
|
|
|
29
29
|
"files": [
|
|
30
30
|
"lib",
|
|
31
31
|
"widget",
|
|
32
|
+
"config",
|
|
33
|
+
"skills",
|
|
32
34
|
"!lib/**/*.test.js",
|
|
33
35
|
"!lib/__fixtures__",
|
|
34
36
|
"!widget/**/*.test.js"
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# /forge-eval — Generate Eval Suites
|
|
2
|
+
|
|
3
|
+
Generate golden and labeled eval JSON files for a named tool. Run this skill after a tool is implemented and tests are green.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Step 1 — Identify the Tool
|
|
8
|
+
|
|
9
|
+
Ask the user which tool to generate evals for, or read it from context if `/forge-tool` just completed.
|
|
10
|
+
|
|
11
|
+
Read the tool's ToolDefinition from `tools/<name>.tool.js`:
|
|
12
|
+
- `name`, `description`, `schema`, `triggerPhrases`, `category`, `consequenceLevel`
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Step 2 — Generate Golden Eval Suite
|
|
17
|
+
|
|
18
|
+
Generate **5–10 golden cases** covering:
|
|
19
|
+
- Happy path with typical inputs
|
|
20
|
+
- Edge cases: empty results, boundary values, missing optional params
|
|
21
|
+
- Error paths: invalid input, service unavailable
|
|
22
|
+
|
|
23
|
+
Each golden case follows this schema:
|
|
24
|
+
```json
|
|
25
|
+
{
|
|
26
|
+
"id": "case-001",
|
|
27
|
+
"description": "What this case tests",
|
|
28
|
+
"input": { "message": "User's natural-language request" },
|
|
29
|
+
"expectedTool": "<tool_name>",
|
|
30
|
+
"expectedArgs": { "param": "value" },
|
|
31
|
+
"checks": [
|
|
32
|
+
{ "type": "tool_called", "tool": "<tool_name>" },
|
|
33
|
+
{ "type": "arg_equals", "arg": "param", "value": "value" }
|
|
34
|
+
]
|
|
35
|
+
}
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Write to `evals/<name>.golden.json` as a JSON array.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Step 3 — Generate Labeled Eval Suite
|
|
43
|
+
|
|
44
|
+
Generate **2–3 labeled (multi-tool) scenarios** where the agent must choose between 2+ tools or sequence multiple calls:
|
|
45
|
+
- Scenario where the tool is the correct choice over a similar tool
|
|
46
|
+
- Scenario where the tool is called followed by a second tool
|
|
47
|
+
- Scenario where the tool should NOT be called (wrong intent)
|
|
48
|
+
|
|
49
|
+
Each labeled case:
|
|
50
|
+
```json
|
|
51
|
+
{
|
|
52
|
+
"id": "labeled-001",
|
|
53
|
+
"description": "What this scenario tests",
|
|
54
|
+
"input": { "message": "User's multi-intent request" },
|
|
55
|
+
"label": "correct" | "incorrect" | "partial",
|
|
56
|
+
"expectedTools": ["<tool_name>"],
|
|
57
|
+
"checks": [...]
|
|
58
|
+
}
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Write to `evals/<name>.labeled.json` as a JSON array.
|
|
62
|
+
|
|
63
|
+
---
|
|
64
|
+
|
|
65
|
+
## Step 4 — Validate
|
|
66
|
+
|
|
67
|
+
Run `node lib/index.js run --eval evals/<name>.golden.json --dry-run` if available to validate JSON schema.
|
|
68
|
+
|
|
69
|
+
Print a summary: N golden cases, M labeled scenarios, file paths written.
|