npm - agent-tool-forge - Versions diffs - 0.4.6 → 0.4.9 - Mend

agent-tool-forge 0.4.6 → 0.4.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/README.md +16 -16
package/config/api-endpoints.template.json +17 -0
package/config/forge.config.template.json +106 -0
package/lib/config-schema.js +4 -4
package/lib/config.d.ts +33 -4
package/lib/forge-service.js +29 -6
package/lib/hitl-engine.d.ts +8 -2
package/lib/index.js +2 -3
package/lib/init.js +1 -1
package/lib/sidecar.d.ts +16 -5
package/package.json +3 -1
package/skills/forge-eval/SKILL.md +69 -0
package/skills/forge-eval/references/assertion-patterns.md +265 -0
package/skills/forge-eval/references/eval-types.md +262 -0
package/skills/forge-eval/references/overlap-map.md +89 -0
package/skills/forge-mcp/SKILL.md +62 -0
package/skills/forge-mcp/references/mcp-templates.md +302 -0
package/skills/forge-mcp/references/tool-to-mcp-mapping.md +108 -0
package/skills/forge-tool/SKILL.md +112 -0
package/skills/forge-tool/references/description-contract.md +102 -0
package/skills/forge-tool/references/extension-points.md +120 -0
package/skills/forge-tool/references/pending-spec.md +53 -0
package/skills/forge-tool/references/tool-shape.md +106 -0
package/skills/forge-verifier/SKILL.md +78 -0
package/skills/forge-verifier/references/output-groups.md +39 -0
package/skills/forge-verifier/references/verifier-pattern.md +83 -0
package/skills/forge-verifier/references/verifier-stubs.md +147 -0

package/README.md CHANGED Viewed

@@ -40,10 +40,10 @@ See [docs/tui-workflow.md](docs/tui-workflow.md) for a start-to-finish walkthrou
 ```bash
 # Global install (available in all projects)
-cp -r tool-forge/skills/forge-tool     ~/.claude/skills/
-cp -r tool-forge/skills/forge-eval     ~/.claude/skills/
-cp -r tool-forge/skills/forge-mcp      ~/.claude/skills/
-cp -r tool-forge/skills/forge-verifier ~/.claude/skills/
+cp -r node_modules/agent-tool-forge/skills/forge-tool     ~/.claude/skills/
+cp -r node_modules/agent-tool-forge/skills/forge-eval     ~/.claude/skills/
+cp -r node_modules/agent-tool-forge/skills/forge-mcp      ~/.claude/skills/
+cp -r node_modules/agent-tool-forge/skills/forge-verifier ~/.claude/skills/
 ```
 Then in any Claude Code session:
@@ -123,23 +123,23 @@ All subpaths ship with TypeScript declarations.
 ```js
 import { createSidecar }      from 'agent-tool-forge'               // main entry
-import { reactLoop }           from 'tool-forge/react-engine'
-import { createAuth }          from 'tool-forge/auth'
-import { makeConversationStore } from 'tool-forge/conversation-store'
-import { mergeDefaults }       from 'tool-forge/config'
-import { makeHitlEngine }      from 'tool-forge/hitl-engine'
-import { makePromptStore }     from 'tool-forge/prompt-store'
-import { makePreferenceStore } from 'tool-forge/preference-store'
-import { makeRateLimiter }     from 'tool-forge/rate-limiter'
-import { getDb }               from 'tool-forge/db'
-import { initSSE }             from 'tool-forge/sse'
+import { reactLoop }           from 'agent-tool-forge/react-engine'
+import { createAuth }          from 'agent-tool-forge/auth'
+import { makeConversationStore } from 'agent-tool-forge/conversation-store'
+import { mergeDefaults }       from 'agent-tool-forge/config'
+import { makeHitlEngine }      from 'agent-tool-forge/hitl-engine'
+import { makePromptStore }     from 'agent-tool-forge/prompt-store'
+import { makePreferenceStore } from 'agent-tool-forge/preference-store'
+import { makeRateLimiter }     from 'agent-tool-forge/rate-limiter'
+import { getDb }               from 'agent-tool-forge/db'
+import { initSSE }             from 'agent-tool-forge/sse'
 import {
   PostgresStore,
   PostgresEvalStore,
   PostgresChatAuditStore,
   PostgresVerifierStore
-}                              from 'tool-forge/postgres-store'
-import { buildSidecarContext, createSidecarRouter } from 'tool-forge/forge-service'
+}                              from 'agent-tool-forge/postgres-store'
+import { buildSidecarContext, createSidecarRouter } from 'agent-tool-forge/forge-service'
 ```
 ---

package/config/api-endpoints.template.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "_comment": "Manual endpoint manifest. Add endpoints here when OpenAPI discovery is unavailable. Forge uses this to propose tools.",
+  "baseUrl": "${API_BASE_URL}",
+  "endpoints": [
+    {
+      "path": "/api/v1/example",
+      "method": "GET",
+      "name": "get_example",
+      "description": "Retrieves example data from the API. Use when the user asks for examples.",
+      "params": {
+        "id": { "type": "string", "description": "Optional filter by ID" }
+      },
+      "requiresConfirmation": false
+    }
+  ]
+}

package/config/forge.config.template.json ADDED Viewed

@@ -0,0 +1,106 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "_comment": "Optional configuration that front-loads answers to common skill questions. Delete fields you don't need — all are optional. The skills work via dialogue alone without this file.",
+  "project": {
+    "name": "my-project",
+    "toolsDir": "src/tools",
+    "testsDir": "src/tools/__tests__",
+    "evalsDir": "evals/dataset",
+    "barrelsFile": "src/tools/tools.exports.ts"
+  },
+  "api": {
+    "baseUrl": "http://localhost:3000",
+    "_baseUrlComment": "Base URL for MCP tool routing. Tool mcpRouting.endpoint paths are appended to this.",
+    "discovery": {
+      "type": "openapi",
+      "url": "http://localhost:3333/api-json",
+      "_comment": "Or file: { \"type\": \"openapi\", \"file\": \"openapi.json\" }"
+    },
+    "manifestPath": "api-endpoints.json"
+  },
+  "language": "typescript",
+  "validation": {
+    "library": "zod",
+    "_alternatives": ["pydantic", "joi", "json-schema", "struct-tags"]
+  },
+  "testing": {
+    "framework": "jest",
+    "_alternatives": ["vitest", "pytest", "go-test", "mocha"],
+    "command": "npx jest --passWithNoTests"
+  },
+  "typeCheck": {
+    "command": "npx tsc --noEmit",
+    "_comment": "Set to null if your stack doesn't have a type checker"
+  },
+  "auth": {
+    "contextField": "context.auth",
+    "type": "jwt",
+    "_alternatives": ["api-key", "oauth", "service-account"]
+  },
+  "client": {
+    "contextField": "context.client",
+    "type": "http",
+    "_comment": "The API client your tools use. Could be HTTP, gRPC, SDK wrapper, etc."
+  },
+  "hitl": {
+    "enabled": false,
+    "framework": null,
+    "_comment": "Set to true and specify framework (e.g., 'langgraph') if you use human-in-the-loop confirmation for write tools"
+  },
+  "mcp": {
+    "defaultTransport": "stdio",
+    "_alternatives": ["streamable-http"],
+    "serverPrefix": "my-project",
+    "_comment": "Used by /forge-mcp to name the generated MCP server"
+  },
+  "evals": {
+    "goldenDir": "evals/dataset/golden",
+    "labeledDir": "evals/dataset/labeled",
+    "overlapMapFile": "evals/tool-overlap-map.json",
+    "seedManifestFile": "evals/seed-manifest.json",
+    "_comment": "Paths are relative to project root",
+    "defaultMix": {
+      "golden": { "total": 10 },
+      "labeled": { "straightforward": 3, "ambiguous": 3, "edge": 2, "adversarial": 2 }
+    },
+    "multiPass": { "passes": 3 },
+    "randomSample": { "aggression": "standard" }
+  },
+  "drift": {
+    "threshold": 0.1,
+    "windowSize": 5
+  },
+  "modelMatrix": [],
+  "_modelMatrixComment": "Add model names to compare during eval runs, e.g. ['gpt-4o-mini', 'gemini-2.0-flash', 'claude-haiku-4-5-20251001']",
+  "costs": {
+    "claude-haiku-4-5-20251001":  { "input": 0.80,  "output": 4.00  },
+    "claude-sonnet-4-6":          { "input": 3.00,  "output": 15.00 },
+    "claude-opus-4-6":            { "input": 15.00, "output": 75.00 },
+    "gpt-4o":                     { "input": 2.50,  "output": 10.00 },
+    "gpt-4o-mini":                { "input": 0.15,  "output": 0.60  },
+    "o1":                         { "input": 15.00, "output": 60.00 },
+    "o3-mini":                    { "input": 1.10,  "output": 4.40  },
+    "gemini-2.0-flash":           { "input": 0.10,  "output": 0.40  },
+    "gemini-2.5-pro-exp":         { "input": 1.25,  "output": 10.00 },
+    "deepseek-chat":              { "input": 0.27,  "output": 1.10  }
+  },
+  "verification": {
+    "enabled": true,
+    "verifiersDir": "src/verification",
+    "barrelsFile": "src/verification/verifiers.exports.ts",
+    "orderPrefix": "A-",
+    "_comment": "Order categories: A=attribution, C=compliance, I=interface, R=risk, U=uncertainty"
+  }
+}

package/lib/config-schema.js CHANGED Viewed

@@ -14,7 +14,7 @@ export const CONFIG_DEFAULTS = {
   adminKey: null,
   database: { type: 'sqlite', url: null },
   conversation: { store: 'sqlite', window: 25, redis: {} },
-  sidecar: { enabled: false, port: 8001 },
+  sidecar: { port: 8001 },  // port: used in direct-run mode only (node lib/forge-service.js)
   agents: [],
   rateLimit: {
     enabled: false,
@@ -100,9 +100,9 @@ export function validateConfig(raw = {}) {
     errors.push('auth.signingKey is required when auth.mode is "verify"');
   }
-  // Startup validation: sidecar enabled + verify mode + no signingKey
-  if (raw.sidecar?.enabled && raw.auth?.mode === 'verify' && !raw.auth?.signingKey) {
-    errors.push('auth.signingKey is required when auth.mode is "verify" and sidecar is enabled. Set FORGE_JWT_KEY in .env');
+  // verify mode always requires a signingKey
+  if (raw.auth?.mode === 'verify' && !raw.auth?.signingKey) {
+    errors.push('auth.signingKey is required when auth.mode is "verify". Set it in forge.config.json or via a ${ENV_VAR} reference.');
   }
   // defaultHitlLevel

package/lib/config.d.ts CHANGED Viewed

@@ -23,9 +23,13 @@ export interface DatabaseConfig {
 }
 export interface AuthConfig {
-  mode?: 'trust' | 'verify';
-  signingKey?: string;
+  mode?: 'trust' | 'verify' | 'none';
+  signingKey?: string | null;
   claimsPath?: string;
+  /** Admin Bearer token. Replaces top-level `adminKey`. Supports `${VAR}` env references. */
+  adminToken?: string | null;
+  /** Metrics scrape token for /metrics. Supports `${VAR}` env references. */
+  metricsToken?: string | null;
 }
 export interface AgentConfig {
@@ -43,6 +47,26 @@ export interface AgentConfig {
   enabled?: number;
 }
+export interface AgentRouterConfig {
+  endpoint?: string | null;
+  method?: string;
+  headers?: Record<string, string>;
+  inputField?: string;
+  outputField?: string;
+  sessionField?: string;
+}
+export interface GatesConfig {
+  passRate?: number | null;
+  maxCost?: number | null;
+  p95LatencyMs?: number | null;
+}
+export interface FixturesConfig {
+  dir?: string;
+  ttlDays?: number;
+}
 export interface SidecarConfig {
   auth?: AuthConfig;
   defaultModel?: string;
@@ -50,14 +74,19 @@ export interface SidecarConfig {
   allowUserModelSelect?: boolean;
   allowUserHitlConfig?: boolean;
   systemPrompt?: string;
-  adminKey?: string;
+  /** @deprecated Use `auth.adminToken` instead. */
+  adminKey?: string | null;
   conversation?: ConversationConfig;
   rateLimit?: RateLimitConfig;
   verification?: VerificationConfig;
   database?: DatabaseConfig;
-  sidecar?: { enabled?: boolean; port?: number };
+  /** `port` is used in direct-run mode only (`node lib/forge-service.js`). `createSidecar()` uses `SidecarOptions.port`. */
+  sidecar?: { port?: number };
   agents?: AgentConfig[];
   costs?: Record<string, { input: number; output: number }>;
+  agent?: AgentRouterConfig;
+  gates?: GatesConfig;
+  fixtures?: FixturesConfig;
 }
 export const CONFIG_DEFAULTS: SidecarConfig;

package/lib/forge-service.js CHANGED Viewed

@@ -67,7 +67,16 @@ const PROJECT_ROOT = resolve(__dirname, '..');
  * @returns {Promise<{ auth, promptStore, preferenceStore, conversationStore, hitlEngine, verifierRunner, agentRegistry, db, config, env, rateLimiter, configPath, evalStore, chatAuditStore, verifierStore, pgStore, _redisClient, _pgPool }>}
  */
 export async function buildSidecarContext(config, db, env = {}, opts = {}) {
-  const auth = createAuth(config.auth);
+  // Resolve ${VAR} references in auth token fields at startup, not per-request.
+  // No fallback for signingKey: if the env var is absent, resolve to null so createAuth
+  // fails-closed in verify mode rather than using the literal "${VAR}" string as the key.
+  const resolvedAuth = config.auth ? {
+    ...config.auth,
+    signingKey: resolveSecret(config.auth.signingKey, env),
+    adminToken: resolveSecret(config.auth.adminToken, env),
+    metricsToken: resolveSecret(config.auth.metricsToken, env),
+  } : config.auth;
+  const auth = createAuth(resolvedAuth);
   let redisClient = null;
   let pgPool = null;
@@ -103,6 +112,7 @@ export async function buildSidecarContext(config, db, env = {}, opts = {}) {
       idleTimeoutMillis: 30000,
       max: 10
     });
+    pgPool.on('error', err => process.stderr.write(`[forge] pg pool error: ${err.message}\n`));
     await pgPool.query(SCHEMA);  // ensure all tables exist
   }
@@ -142,9 +152,14 @@ export async function buildSidecarContext(config, db, env = {}, opts = {}) {
   // project directory, not into the installed package.
   const configPath = opts?.configPath ?? resolve(process.cwd(), 'forge.config.json');
+  // Return resolved auth config so applyRouteAuth sees literal tokens (not ${VAR})
+  const resolvedConfig = resolvedAuth !== config.auth
+    ? { ...config, auth: resolvedAuth }
+    : config;
   return {
     auth, promptStore, preferenceStore, conversationStore, hitlEngine, verifierRunner,
-    agentRegistry, db, config, env, rateLimiter, configPath,
+    agentRegistry, db, config: resolvedConfig, env, rateLimiter, configPath,
     evalStore, chatAuditStore, verifierStore, pgStore,
     _redisClient: redisClient, _pgPool: pgPool
   };
@@ -304,7 +319,8 @@ export function createSidecarRouter(ctx, options = {}) {
     if (sidecarPath === '/agent-api/user/preferences') {
       if (req.method === 'GET') return handleGetPreferences(req, res, ctx);
       if (req.method === 'PUT') return handlePutPreferences(req, res, ctx);
-      else { sendJson(res, 405, { error: 'Method not allowed' }); return; }
+      sendJson(res, 405, { error: 'Method not allowed' });
+      return;
     }
     if (sidecarPath.startsWith('/agent-api/conversations')) {
       return handleConversations(req, res, ctx);
@@ -374,8 +390,14 @@ export function createSidecarRouter(ctx, options = {}) {
     // ── Custom routes (consumer-provided) ─────────────────────────────────
     if (customRoutes) {
-      const handled = await customRoutes(req, res, ctx);
-      if (handled) return;
+      try {
+        const handled = await customRoutes(req, res, ctx);
+        if (handled) return;
+      } catch (err) {
+        process.stderr.write(`[forge] customRoutes error: ${err.message}\n`);
+        if (!res.headersSent) sendJson(res, 500, { error: 'Internal server error' });
+        return;
+      }
     }
     // ── 404 fallback ───────────────────────────────────────────────────────
@@ -630,7 +652,8 @@ function createDirectServer() {
       if (sidecarPath === '/agent-api/user/preferences') {
         if (req.method === 'GET') return handleGetPreferences(req, res, sidecarCtx);
         if (req.method === 'PUT') return handlePutPreferences(req, res, sidecarCtx);
-        else { json(res, 405, { error: 'Method not allowed' }); return; }
+        json(res, 405, { error: 'Method not allowed' });
+        return;
       }
       if (sidecarPath.startsWith('/agent-api/conversations')) {
         return handleConversations(req, res, sidecarCtx);

package/lib/hitl-engine.d.ts CHANGED Viewed

@@ -37,9 +37,15 @@ export class HitlEngine {
   /**
    * Retrieve and consume the paused state for a resume token.
-   * Throws if the token has expired or does not exist.
+   * Returns null if the token has expired or does not exist (does not throw).
    */
-  resume(resumeToken: string): Promise<unknown>;
+  resume(resumeToken: string): Promise<object | null>;
+  /**
+   * Tear down any backend connections (Redis subscriber, Postgres pool, etc.).
+   * Call on graceful shutdown. Synchronous.
+   */
+  destroy(): void;
 }
 /**

package/lib/index.js CHANGED Viewed

@@ -8,8 +8,7 @@
  */
 import { readFileSync, existsSync, writeFileSync } from 'fs';
-import { resolve, dirname } from 'path';
-import { fileURLToPath } from 'url';
+import { resolve } from 'path';
 import { runTui } from './tui.js';
 import { addEndpointManually } from './manual-entry.js';
 import * as readline from 'readline';
@@ -18,7 +17,7 @@ const CONFIG_FILE = 'forge.config.json';
 const PENDING_SPEC_FILE = 'forge-pending-tool.json';
 function findProjectRoot() {
-  return resolve(dirname(fileURLToPath(import.meta.url)), '..');
+  return process.cwd();
 }
 function loadConfig() {

package/lib/init.js CHANGED Viewed

@@ -499,7 +499,7 @@ export async function runInit(opts = {}) {
     const adminKeyValue = hasSidecar ? generateAdminKey() : null;
     if (hasSidecar) {
-      raw.sidecar = { enabled: true, port: 8001 };
+      raw.sidecar = { port: 8001 };
       raw.adminKey = '${FORGE_ADMIN_KEY}';
       raw.auth = { mode: authMode };
       if (authMode === 'verify') {

package/lib/sidecar.d.ts CHANGED Viewed

@@ -44,11 +44,20 @@ export interface SidecarInstance {
 export function createSidecar(config?: Partial<SidecarConfig>, options?: SidecarOptions): Promise<SidecarInstance>;
+export interface SidecarRouterOptions {
+  /** Absolute path to serve static files from for /widget/* routes. Defaults to package widget/. */
+  widgetDir?: string;
+  /** Optional async handler for /mcp routes. */
+  mcpHandler?: (req: object, res: object) => Promise<void> | void;
+  /** Called before the 404 fallback. Return true if the request was handled. */
+  customRoutes?: (req: object, res: object, ctx: SidecarContext) => Promise<boolean> | boolean;
+}
 // Advanced consumers
-export function buildSidecarContext(config: SidecarConfig, db: object, env?: Record<string, string>, opts?: object): Promise<SidecarContext>;
-export function createSidecarRouter(ctx: SidecarContext, opts?: object): (req: object, res: object) => void;
+export function buildSidecarContext(config: SidecarConfig, db: object, env?: Record<string, string>, opts?: { configPath?: string }): Promise<SidecarContext>;
+export function createSidecarRouter(ctx: SidecarContext, opts?: SidecarRouterOptions): (req: object, res: object) => Promise<void>;
-export { createAuth } from './auth.js';
+export { createAuth, resolveSecret, authenticateAdmin } from './auth.js';
 export type { AuthResult, AuthConfig, Authenticator } from './auth.js';
 export { reactLoop } from './react-engine.js';
@@ -82,8 +91,10 @@ export class AgentRegistry {
 }
 export class VerifierRunner {
-  constructor(db: object, config?: object, workerPool?: object);
+  constructor(db: object, config?: object, pgPool?: object | null, workerPool?: object | null);
   loadFromDb(db: object): Promise<void>;
-  run(toolName: string, args: object, result: unknown): Promise<Array<{ outcome: 'pass' | 'warn' | 'block'; message: string | null; verifier: string }>>;
+  registerVerifiers(toolName: string, verifiers: object[]): void;
+  verify(toolName: string, args: object, result: unknown): Promise<{ outcome: 'pass' | 'warn' | 'block'; message: string | null; verifierName: string | null }>;
+  logResult(sessionId: string, toolName: string, result: object): void;
   destroy(): void;
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agent-tool-forge",
-  "version": "0.4.6",
+  "version": "0.4.9",
   "description": "Production LLM agent sidecar + Claude Code skill library for building, testing, and running tool-calling agents.",
   "keywords": [
     "llm",
@@ -29,6 +29,8 @@
   "files": [
     "lib",
     "widget",
+    "config",
+    "skills",
     "!lib/**/*.test.js",
     "!lib/__fixtures__",
     "!widget/**/*.test.js"

package/skills/forge-eval/SKILL.md ADDED Viewed

@@ -0,0 +1,69 @@
+# /forge-eval — Generate Eval Suites
+Generate golden and labeled eval JSON files for a named tool. Run this skill after a tool is implemented and tests are green.
+---
+## Step 1 — Identify the Tool
+Ask the user which tool to generate evals for, or read it from context if `/forge-tool` just completed.
+Read the tool's ToolDefinition from `tools/<name>.tool.js`:
+- `name`, `description`, `schema`, `triggerPhrases`, `category`, `consequenceLevel`
+---
+## Step 2 — Generate Golden Eval Suite
+Generate **5–10 golden cases** covering:
+- Happy path with typical inputs
+- Edge cases: empty results, boundary values, missing optional params
+- Error paths: invalid input, service unavailable
+Each golden case follows this schema:
+```json
+{
+  "id": "case-001",
+  "description": "What this case tests",
+  "input": { "message": "User's natural-language request" },
+  "expectedTool": "<tool_name>",
+  "expectedArgs": { "param": "value" },
+  "checks": [
+    { "type": "tool_called", "tool": "<tool_name>" },
+    { "type": "arg_equals", "arg": "param", "value": "value" }
+  ]
+}
+```
+Write to `evals/<name>.golden.json` as a JSON array.
+---
+## Step 3 — Generate Labeled Eval Suite
+Generate **2–3 labeled (multi-tool) scenarios** where the agent must choose between 2+ tools or sequence multiple calls:
+- Scenario where the tool is the correct choice over a similar tool
+- Scenario where the tool is called followed by a second tool
+- Scenario where the tool should NOT be called (wrong intent)
+Each labeled case:
+```json
+{
+  "id": "labeled-001",
+  "description": "What this scenario tests",
+  "input": { "message": "User's multi-intent request" },
+  "label": "correct" | "incorrect" | "partial",
+  "expectedTools": ["<tool_name>"],
+  "checks": [...]
+}
+```
+Write to `evals/<name>.labeled.json` as a JSON array.
+---
+## Step 4 — Validate
+Run `node lib/index.js run --eval evals/<name>.golden.json --dry-run` if available to validate JSON schema.
+Print a summary: N golden cases, M labeled scenarios, file paths written.