npm - agent-tool-forge - Versions diffs - 0.3.0 - Mend

agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (107) hide show

package/LICENSE +21 -0
package/README.md +209 -0
package/lib/agent-registry.js +170 -0
package/lib/api-client.js +792 -0
package/lib/api-loader.js +260 -0
package/lib/auth.d.ts +25 -0
package/lib/auth.js +158 -0
package/lib/checks/check-adapter.js +172 -0
package/lib/checks/compose.js +42 -0
package/lib/checks/content-match.js +14 -0
package/lib/checks/cost-budget.js +11 -0
package/lib/checks/index.js +18 -0
package/lib/checks/json-valid.js +15 -0
package/lib/checks/latency.js +11 -0
package/lib/checks/length-bounds.js +17 -0
package/lib/checks/negative-match.js +14 -0
package/lib/checks/no-hallucinated-numbers.js +63 -0
package/lib/checks/non-empty.js +34 -0
package/lib/checks/regex-match.js +12 -0
package/lib/checks/run-checks.js +84 -0
package/lib/checks/schema-match.js +26 -0
package/lib/checks/tool-call-count.js +16 -0
package/lib/checks/tool-selection.js +34 -0
package/lib/checks/types.js +45 -0
package/lib/comparison/compare.js +86 -0
package/lib/comparison/format.js +104 -0
package/lib/comparison/index.js +6 -0
package/lib/comparison/statistics.js +59 -0
package/lib/comparison/types.js +41 -0
package/lib/config-schema.js +200 -0
package/lib/config.d.ts +66 -0
package/lib/conversation-store.d.ts +77 -0
package/lib/conversation-store.js +443 -0
package/lib/db.d.ts +6 -0
package/lib/db.js +1112 -0
package/lib/dep-check.js +99 -0
package/lib/drift-background.js +61 -0
package/lib/drift-monitor.js +187 -0
package/lib/eval-runner.js +566 -0
package/lib/fixtures/fixture-store.js +161 -0
package/lib/fixtures/index.js +11 -0
package/lib/forge-engine.js +982 -0
package/lib/forge-eval-generator.js +417 -0
package/lib/forge-file-writer.js +386 -0
package/lib/forge-service-client.js +190 -0
package/lib/forge-service.d.ts +4 -0
package/lib/forge-service.js +655 -0
package/lib/forge-verifier-generator.js +271 -0
package/lib/handlers/admin.js +151 -0
package/lib/handlers/agents.js +229 -0
package/lib/handlers/chat-resume.js +334 -0
package/lib/handlers/chat-sync.js +320 -0
package/lib/handlers/chat.js +320 -0
package/lib/handlers/conversations.js +92 -0
package/lib/handlers/preferences.js +88 -0
package/lib/handlers/tools-list.js +58 -0
package/lib/hitl-engine.d.ts +60 -0
package/lib/hitl-engine.js +261 -0
package/lib/http-utils.js +92 -0
package/lib/index.d.ts +20 -0
package/lib/index.js +141 -0
package/lib/init.js +636 -0
package/lib/manual-entry.js +59 -0
package/lib/mcp-server.js +252 -0
package/lib/output-groups.js +54 -0
package/lib/postgres-store.d.ts +31 -0
package/lib/postgres-store.js +465 -0
package/lib/preference-store.d.ts +47 -0
package/lib/preference-store.js +79 -0
package/lib/prompt-store.d.ts +42 -0
package/lib/prompt-store.js +60 -0
package/lib/rate-limiter.d.ts +30 -0
package/lib/rate-limiter.js +104 -0
package/lib/react-engine.d.ts +110 -0
package/lib/react-engine.js +337 -0
package/lib/runner/cli.js +156 -0
package/lib/runner/cost-estimator.js +71 -0
package/lib/runner/gate.js +46 -0
package/lib/runner/index.js +165 -0
package/lib/sidecar.d.ts +83 -0
package/lib/sidecar.js +161 -0
package/lib/sse.d.ts +15 -0
package/lib/sse.js +30 -0
package/lib/tools-scanner.js +91 -0
package/lib/tui.js +253 -0
package/lib/verifier-report.js +78 -0
package/lib/verifier-runner.js +338 -0
package/lib/verifier-scanner.js +70 -0
package/lib/verifier-worker-pool.js +196 -0
package/lib/views/chat.js +340 -0
package/lib/views/endpoints.js +203 -0
package/lib/views/eval-run.js +206 -0
package/lib/views/forge-agent.js +538 -0
package/lib/views/forge.js +410 -0
package/lib/views/main-menu.js +275 -0
package/lib/views/mediation.js +381 -0
package/lib/views/model-compare.js +430 -0
package/lib/views/model-comparison.js +333 -0
package/lib/views/onboarding.js +470 -0
package/lib/views/performance.js +237 -0
package/lib/views/run-evals.js +205 -0
package/lib/views/settings.js +829 -0
package/lib/views/tools-evals.js +514 -0
package/lib/views/verifier-coverage.js +617 -0
package/lib/workers/verifier-worker.js +52 -0
package/package.json +123 -0
package/widget/forge-chat.js +789 -0

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Tool-Forge Contributors
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,209 @@
+# Agent Tool Forge
+Production LLM agent sidecar + Claude Code skill library for building, testing, and running tool-calling agents.
+**Two jobs, one package:**
+1. **Sidecar runtime** — deploy alongside your app. Handles the full ReAct loop, HITL gates, verifier pipeline, eval runner, and observability.
+2. **Skill library** — Claude Code skills that generate tools, eval suites, and MCP servers via structured 12-phase dialogue.
+---
+## Quick Start
+### As a runtime package
+```bash
+npm install agent-tool-forge
+```
+```js
+import { createSidecar } from 'agent-tool-forge'
+const { server, ctx, close } = await createSidecar(
+  { auth: { mode: 'trust' } },
+  { port: 8001 }
+)
+// server is already listening on port 8001
+// call close() on shutdown for clean teardown
+```
+### With the TUI
+```bash
+node lib/index.js
+```
+See [docs/tui-workflow.md](docs/tui-workflow.md) for a start-to-finish walkthrough.
+### Install Claude Code Skills
+```bash
+# Global install (available in all projects)
+cp -r tool-forge/skills/forge-tool     ~/.claude/skills/
+cp -r tool-forge/skills/forge-eval     ~/.claude/skills/
+cp -r tool-forge/skills/forge-mcp      ~/.claude/skills/
+cp -r tool-forge/skills/forge-verifier ~/.claude/skills/
+```
+Then in any Claude Code session:
+```
+/forge-tool      # 12-phase tool creation dialogue
+/forge-eval      # Generate golden + labeled eval suites
+/forge-mcp       # Generate an MCP server from a ToolDefinition
+/forge-verifier  # Detect tools without verifiers, generate stubs
+```
+---
+## Skills
+| Skill | Purpose |
+|-------|---------|
+| `/forge-tool` | 12-phase structured dialogue: challenge necessity, lock the description contract, generate tool + tests + evals |
+| `/forge-eval` | Generate golden (5-10 cases) and labeled (multi-tool) eval suites with deterministic assertions |
+| `/forge-mcp` | Generate an MCP server scaffold from a ToolDefinition |
+| `/forge-verifier` | Detect tools without verifier coverage, generate verifier stubs + barrel registration |
+### The 12-Phase `/forge-tool` Dialogue
+| Phase | What Happens |
+|-------|-------------|
+| 0 | **Creative exploration** — open-ended "what should this do?" |
+| 1 | **Skeptic gate** — challenge necessity, overlap, scope |
+| 2 | **Description + name** — lock the routing contract |
+| 3 | **Collect fields** — schema, category, consequence level, confirmation flag |
+| 4 | **Routing** — collect endpoint target, HTTP method, auth type, parameter mapping |
+| 5 | **Dependency check** — verify the tool context provides what's needed |
+| 6 | **Confirm full spec** — sign off before any code is written |
+| 7 | **Generate all files** — tool, tests, barrel registration |
+| 8 | **Run tests** — must be green before proceeding |
+| 9 | **Generate evals** — hand off to `/forge-eval` |
+| 10 | **Generate verifiers** — create verifier stubs for the new tool |
+| 11 | **Done** — summary of everything created |
+---
+## Runtime Features
+- **ReAct loop** — multi-turn LLM + tool execution, streamed via SSE
+- **HITL** — four levels (autonomous → paranoid), pause/resume with 5-minute TTL
+- **Verifiers** — post-response quality pipeline (warnings + flags, ACIRU ordering)
+- **Eval runner** — `node lib/index.js run --eval <path>` executes eval JSON, checks assertions, stores results in SQLite; `--record` / `--replay` for fixture-based testing
+- **Observability** — token tracking, cost estimation, per-tool metrics in SQLite
+- **Web component** — `<forge-chat>` drop-in chat widget (vanilla JS, zero deps)
+---
+## Optional Peer Dependencies
+The sidecar core requires only `better-sqlite3`. Additional backends are loaded on demand when configured — install them only if you use them:
+| Package | When needed |
+|---------|-------------|
+| `redis` or `ioredis` | `conversation.store: 'redis'` or `rateLimit.enabled: true` with Redis backend |
+| `pg` | `database.type: 'postgres'` — Postgres conversation store, agent registry, and preferences |
+```bash
+# Redis backend
+npm install ioredis          # or: npm install redis
+# Postgres backend
+npm install pg
+```
+If a required package is missing, the sidecar prints an actionable error on startup rather than crashing at import time.
+---
+## Exported Subpaths
+All subpaths ship with TypeScript declarations.
+```js
+import { createSidecar }      from 'agent-tool-forge'               // main entry
+import { reactLoop }           from 'tool-forge/react-engine'
+import { createAuth }          from 'tool-forge/auth'
+import { makeConversationStore } from 'tool-forge/conversation-store'
+import { mergeDefaults }       from 'tool-forge/config'
+import { makeHitlEngine }      from 'tool-forge/hitl-engine'
+import { makePromptStore }     from 'tool-forge/prompt-store'
+import { makePreferenceStore } from 'tool-forge/preference-store'
+import { makeRateLimiter }     from 'tool-forge/rate-limiter'
+import { getDb }               from 'tool-forge/db'
+import { initSSE }             from 'tool-forge/sse'
+import { PostgresStore }       from 'tool-forge/postgres-store'
+import { buildSidecarContext, createSidecarRouter } from 'tool-forge/forge-service'
+```
+---
+## Documentation
+| Doc | Contents |
+|-----|----------|
+| [docs/tui-workflow.md](docs/tui-workflow.md) | TUI walkthrough, start to finish |
+| [docs/reference/config.md](docs/reference/config.md) | `forge.config.json` field reference |
+| [docs/reference/api.md](docs/reference/api.md) | HTTP endpoints, SSE events, HITL flow |
+| [docs/eval-runner-contract.md](docs/eval-runner-contract.md) | Eval file format and assertion spec |
+| [docs/API-DISCOVERY.md](docs/API-DISCOVERY.md) | API discovery TUI |
+| [docs/VERIFIER-FACTORY.md](docs/VERIFIER-FACTORY.md) | Verifier gap detection and stub generation |
+---
+## Repo Structure
+```
+lib/
+  sidecar.js              # createSidecar() — package entry point
+  index.js                # TUI + CLI entry point
+  react-engine.js         # ReAct loop, SSE streaming
+  hitl-engine.js          # HITL pause/resume
+  verifier-runner.js      # Post-response verifier pipeline
+  eval-runner.js          # Eval execution engine
+  checks/                 # Deterministic assertion checks
+  fixtures/               # Record/replay fixture store
+  comparison/             # Run comparison + Wilson statistics
+  runner/                 # Gate evaluation + CLI
+  views/                  # TUI screens
+  db.js                   # SQLite persistence
+skills/
+  forge-tool/             # 12-phase tool creation workflow
+  forge-eval/             # Golden + labeled eval generation
+  forge-mcp/              # MCP server generation
+  forge-verifier/         # Verifier gap detection + stub generation
+templates/                # Pseudo-code reference templates
+docs/
+  tui-workflow.md         # Start-to-finish TUI guide
+  reference/
+    config.md             # forge.config.json reference
+    api.md                # HTTP + SSE reference
+  eval-runner-contract.md # Eval file format spec
+  API-DISCOVERY.md        # API discovery workflow
+  VERIFIER-FACTORY.md     # Verifier gap detection + stub generation
+example/
+  tools/                  # Example tool files
+  verification/           # Example verifiers
+docs/examples/            # Example evals (golden, labeled)
+widget/
+  forge-chat.js           # <forge-chat> web component
+```
+---
+## Standing on Shoulders
+Tool-Forge integrates ideas and code from two excellent open-source projects:
+- **[evalkit](https://github.com/wkhori/evalkit)** by wkhori — MIT License
+  Provides the deterministic check suite (`lib/checks/`): content matching, tool selection verification, schema validation, and the `runChecks()` meta-runner. Used under MIT license with attribution in each file.
+- **[agent-eval-kit](https://github.com/FlanaganSe/agent-eval-kit)** by FlanaganSe — MIT License
+  Provides fixture-based record/replay (`lib/fixtures/`), statistical comparison with Wilson confidence intervals (`lib/comparison/`), gate evaluation (`lib/runner/gate.js`), and composition operators (`lib/checks/compose.js`). Used under MIT license with attribution in each file.
+---
+## License
+MIT

package/lib/agent-registry.js ADDED Viewed

@@ -0,0 +1,170 @@
+/**
+ * AgentRegistry — multi-agent configuration for the sidecar runtime.
+ *
+ * Each agent selects a subset of tools from the shared tool_registry,
+ * overrides model/HITL defaults, and carries its own system prompt.
+ *
+ * Factory: makeAgentRegistry(config, db)
+ */
+import {
+  upsertAgent, getAgent, getAllAgents, getDefaultAgent,
+  setDefaultAgent, deleteAgent
+} from './db.js';
+export class AgentRegistry {
+  /**
+   * @param {object} config — merged forge config
+   * @param {import('better-sqlite3').Database} db
+   */
+  constructor(config, db) {
+    this._config = config;
+    this._db = db;
+  }
+  /**
+   * Resolve an agent by ID. If agentId is null/empty, returns the default agent (or null).
+   * If agentId is provided but not found or disabled, returns null.
+   *
+   * @param {string|null|undefined} agentId
+   * @returns {object|null}
+   */
+  resolveAgent(agentId) {
+    if (!agentId) {
+      return getDefaultAgent(this._db);
+    }
+    const agent = getAgent(this._db, agentId);
+    if (!agent || !agent.enabled) return null;
+    return agent;
+  }
+  /**
+   * Filter tools to an agent's allowlist. If allowlist is '*', returns all.
+   * Operates on the { toolRows, tools } shape returned by loadPromotedTools.
+   *
+   * @param {{ toolRows: object[], tools: object[] }} loaded
+   * @param {object|null} agent
+   * @returns {{ toolRows: object[], tools: object[] }}
+   */
+  filterTools(loaded, agent) {
+    if (!agent) return loaded;
+    const allowlist = agent.tool_allowlist;
+    if (!allowlist || allowlist === '*') return loaded;
+    let allowed;
+    try {
+      allowed = JSON.parse(allowlist);
+    } catch {
+      return { toolRows: [], tools: [] }; // malformed → deny all (fail closed)
+    }
+    if (!Array.isArray(allowed)) return { toolRows: [], tools: [] };
+    const allowSet = new Set(allowed);
+    const toolRows = loaded.toolRows.filter(r => allowSet.has(r.tool_name));
+    const tools = loaded.tools.filter(t => allowSet.has(t.name));
+    return { toolRows, tools };
+  }
+  /**
+   * Build an agent-scoped config by overlaying agent overrides onto the base config.
+   * The returned object can be passed to PreferenceStore.resolveEffective() unchanged.
+   *
+   * @param {object} baseConfig — the merged forge config
+   * @param {object|null} agent — agent row or null
+   * @returns {object} scoped config
+   */
+  buildAgentConfig(baseConfig, agent) {
+    if (!agent) return baseConfig;
+    const scoped = { ...baseConfig };
+    if (agent.default_model) scoped.defaultModel = agent.default_model;
+    if (agent.default_hitl_level) scoped.defaultHitlLevel = agent.default_hitl_level;
+    // Only override boolean flags when explicitly enabled (1), not on DB default (0).
+    // DB column is NOT NULL DEFAULT 0, so 0 means "not explicitly set" — defer to base config.
+    if (agent.allow_user_model_select) scoped.allowUserModelSelect = true;
+    if (agent.allow_user_hitl_config) scoped.allowUserHitlConfig = true;
+    if (agent.max_turns != null) scoped.maxTurns = agent.max_turns;
+    if (agent.max_tokens != null) scoped.maxTokens = agent.max_tokens;
+    return scoped;
+  }
+  /**
+   * Resolve the system prompt for an agent.
+   * Fallback chain: agent prompt → promptStore active → config.systemPrompt → default.
+   *
+   * @param {object|null} agent
+   * @param {object} promptStore
+   * @param {object} config
+   * @returns {string}
+   */
+  resolveSystemPrompt(agent, promptStore, config) {
+    if (agent?.system_prompt) return agent.system_prompt;
+    const active = promptStore.getActivePrompt();
+    if (active) return active;
+    return config.systemPrompt || 'You are a helpful assistant.';
+  }
+  // ── CRUD pass-throughs ──────────────────────────────────────────────────
+  getAgent(agentId) { return getAgent(this._db, agentId); }
+  getAllAgents() { return getAllAgents(this._db); }
+  upsertAgent(row) { return upsertAgent(this._db, row); }
+  setDefault(agentId) { return setDefaultAgent(this._db, agentId); }
+  deleteAgent(agentId) { return deleteAgent(this._db, agentId); }
+  /**
+   * Seed agents from config.agents[] array. Upserts with seeded_from_config=1.
+   * Ensures at least one default exists if agents are defined.
+   */
+  seedFromConfig() {
+    const agents = this._config.agents;
+    if (!Array.isArray(agents) || agents.length === 0) return;
+    let defaultAgentId = null;
+    for (const a of agents) {
+      if (!a.id || !a.displayName) continue;
+      // Skip if agent exists and was modified outside of config seeding
+      const existing = getAgent(this._db, a.id);
+      if (existing && !existing.seeded_from_config) continue;
+      upsertAgent(this._db, {
+        agent_id: a.id,
+        display_name: a.displayName,
+        description: a.description ?? null,
+        system_prompt: a.systemPrompt ?? null,
+        default_model: a.defaultModel ?? null,
+        default_hitl_level: a.defaultHitlLevel ?? null,
+        allow_user_model_select: a.allowUserModelSelect ? 1 : 0,
+        allow_user_hitl_config: a.allowUserHitlConfig ? 1 : 0,
+        tool_allowlist: Array.isArray(a.toolAllowlist) ? JSON.stringify(a.toolAllowlist) : '*',
+        max_turns: a.maxTurns ?? null,
+        max_tokens: a.maxTokens ?? null,
+        is_default: 0, // Don't set via upsert — use setDefaultAgent below to enforce single default
+        enabled: 1,
+        seeded_from_config: 1
+      });
+      if (a.isDefault) defaultAgentId = a.id;
+    }
+    // Enforce single default via setDefaultAgent (atomic clear + set)
+    if (defaultAgentId) {
+      setDefaultAgent(this._db, defaultAgentId);
+    } else if (!getDefaultAgent(this._db)) {
+      const first = agents.find(a => a.id && a.displayName);
+      if (first) setDefaultAgent(this._db, first.id);
+    }
+  }
+}
+/**
+ * Factory — creates an AgentRegistry backed by SQLite.
+ * For Postgres, use buildSidecarContext which selects the adapter automatically.
+ *
+ * @param {object} config — merged forge config
+ * @param {import('better-sqlite3').Database} db
+ * @returns {AgentRegistry}
+ */
+export function makeAgentRegistry(config, db) {
+  return new AgentRegistry(config, db);
+}