agent-tool-forge 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (107) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +209 -0
  3. package/lib/agent-registry.js +170 -0
  4. package/lib/api-client.js +792 -0
  5. package/lib/api-loader.js +260 -0
  6. package/lib/auth.d.ts +25 -0
  7. package/lib/auth.js +158 -0
  8. package/lib/checks/check-adapter.js +172 -0
  9. package/lib/checks/compose.js +42 -0
  10. package/lib/checks/content-match.js +14 -0
  11. package/lib/checks/cost-budget.js +11 -0
  12. package/lib/checks/index.js +18 -0
  13. package/lib/checks/json-valid.js +15 -0
  14. package/lib/checks/latency.js +11 -0
  15. package/lib/checks/length-bounds.js +17 -0
  16. package/lib/checks/negative-match.js +14 -0
  17. package/lib/checks/no-hallucinated-numbers.js +63 -0
  18. package/lib/checks/non-empty.js +34 -0
  19. package/lib/checks/regex-match.js +12 -0
  20. package/lib/checks/run-checks.js +84 -0
  21. package/lib/checks/schema-match.js +26 -0
  22. package/lib/checks/tool-call-count.js +16 -0
  23. package/lib/checks/tool-selection.js +34 -0
  24. package/lib/checks/types.js +45 -0
  25. package/lib/comparison/compare.js +86 -0
  26. package/lib/comparison/format.js +104 -0
  27. package/lib/comparison/index.js +6 -0
  28. package/lib/comparison/statistics.js +59 -0
  29. package/lib/comparison/types.js +41 -0
  30. package/lib/config-schema.js +200 -0
  31. package/lib/config.d.ts +66 -0
  32. package/lib/conversation-store.d.ts +77 -0
  33. package/lib/conversation-store.js +443 -0
  34. package/lib/db.d.ts +6 -0
  35. package/lib/db.js +1112 -0
  36. package/lib/dep-check.js +99 -0
  37. package/lib/drift-background.js +61 -0
  38. package/lib/drift-monitor.js +187 -0
  39. package/lib/eval-runner.js +566 -0
  40. package/lib/fixtures/fixture-store.js +161 -0
  41. package/lib/fixtures/index.js +11 -0
  42. package/lib/forge-engine.js +982 -0
  43. package/lib/forge-eval-generator.js +417 -0
  44. package/lib/forge-file-writer.js +386 -0
  45. package/lib/forge-service-client.js +190 -0
  46. package/lib/forge-service.d.ts +4 -0
  47. package/lib/forge-service.js +655 -0
  48. package/lib/forge-verifier-generator.js +271 -0
  49. package/lib/handlers/admin.js +151 -0
  50. package/lib/handlers/agents.js +229 -0
  51. package/lib/handlers/chat-resume.js +334 -0
  52. package/lib/handlers/chat-sync.js +320 -0
  53. package/lib/handlers/chat.js +320 -0
  54. package/lib/handlers/conversations.js +92 -0
  55. package/lib/handlers/preferences.js +88 -0
  56. package/lib/handlers/tools-list.js +58 -0
  57. package/lib/hitl-engine.d.ts +60 -0
  58. package/lib/hitl-engine.js +261 -0
  59. package/lib/http-utils.js +92 -0
  60. package/lib/index.d.ts +20 -0
  61. package/lib/index.js +141 -0
  62. package/lib/init.js +636 -0
  63. package/lib/manual-entry.js +59 -0
  64. package/lib/mcp-server.js +252 -0
  65. package/lib/output-groups.js +54 -0
  66. package/lib/postgres-store.d.ts +31 -0
  67. package/lib/postgres-store.js +465 -0
  68. package/lib/preference-store.d.ts +47 -0
  69. package/lib/preference-store.js +79 -0
  70. package/lib/prompt-store.d.ts +42 -0
  71. package/lib/prompt-store.js +60 -0
  72. package/lib/rate-limiter.d.ts +30 -0
  73. package/lib/rate-limiter.js +104 -0
  74. package/lib/react-engine.d.ts +110 -0
  75. package/lib/react-engine.js +337 -0
  76. package/lib/runner/cli.js +156 -0
  77. package/lib/runner/cost-estimator.js +71 -0
  78. package/lib/runner/gate.js +46 -0
  79. package/lib/runner/index.js +165 -0
  80. package/lib/sidecar.d.ts +83 -0
  81. package/lib/sidecar.js +161 -0
  82. package/lib/sse.d.ts +15 -0
  83. package/lib/sse.js +30 -0
  84. package/lib/tools-scanner.js +91 -0
  85. package/lib/tui.js +253 -0
  86. package/lib/verifier-report.js +78 -0
  87. package/lib/verifier-runner.js +338 -0
  88. package/lib/verifier-scanner.js +70 -0
  89. package/lib/verifier-worker-pool.js +196 -0
  90. package/lib/views/chat.js +340 -0
  91. package/lib/views/endpoints.js +203 -0
  92. package/lib/views/eval-run.js +206 -0
  93. package/lib/views/forge-agent.js +538 -0
  94. package/lib/views/forge.js +410 -0
  95. package/lib/views/main-menu.js +275 -0
  96. package/lib/views/mediation.js +381 -0
  97. package/lib/views/model-compare.js +430 -0
  98. package/lib/views/model-comparison.js +333 -0
  99. package/lib/views/onboarding.js +470 -0
  100. package/lib/views/performance.js +237 -0
  101. package/lib/views/run-evals.js +205 -0
  102. package/lib/views/settings.js +829 -0
  103. package/lib/views/tools-evals.js +514 -0
  104. package/lib/views/verifier-coverage.js +617 -0
  105. package/lib/workers/verifier-worker.js +52 -0
  106. package/package.json +123 -0
  107. package/widget/forge-chat.js +789 -0
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Tool-Forge Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,209 @@
1
+ # Agent Tool Forge
2
+
3
+ Production LLM agent sidecar + Claude Code skill library for building, testing, and running tool-calling agents.
4
+
5
+ **Two jobs, one package:**
6
+ 1. **Sidecar runtime** — deploy alongside your app. Handles the full ReAct loop, HITL gates, verifier pipeline, eval runner, and observability.
7
+ 2. **Skill library** — Claude Code skills that generate tools, eval suites, and MCP servers via structured 12-phase dialogue.
8
+
9
+ ---
10
+
11
+ ## Quick Start
12
+
13
+ ### As a runtime package
14
+
15
+ ```bash
16
+ npm install agent-tool-forge
17
+ ```
18
+
19
+ ```js
20
+ import { createSidecar } from 'agent-tool-forge'
21
+
22
+ const { server, ctx, close } = await createSidecar(
23
+ { auth: { mode: 'trust' } },
24
+ { port: 8001 }
25
+ )
26
+
27
+ // server is already listening on port 8001
28
+ // call close() on shutdown for clean teardown
29
+ ```
30
+
31
+ ### With the TUI
32
+
33
+ ```bash
34
+ node lib/index.js
35
+ ```
36
+
37
+ See [docs/tui-workflow.md](docs/tui-workflow.md) for a start-to-finish walkthrough.
38
+
39
+ ### Install Claude Code Skills
40
+
41
+ ```bash
42
+ # Global install (available in all projects)
43
+ cp -r tool-forge/skills/forge-tool ~/.claude/skills/
44
+ cp -r tool-forge/skills/forge-eval ~/.claude/skills/
45
+ cp -r tool-forge/skills/forge-mcp ~/.claude/skills/
46
+ cp -r tool-forge/skills/forge-verifier ~/.claude/skills/
47
+ ```
48
+
49
+ Then in any Claude Code session:
50
+
51
+ ```
52
+ /forge-tool # 12-phase tool creation dialogue
53
+ /forge-eval # Generate golden + labeled eval suites
54
+ /forge-mcp # Generate an MCP server from a ToolDefinition
55
+ /forge-verifier # Detect tools without verifiers, generate stubs
56
+ ```
57
+
58
+ ---
59
+
60
+ ## Skills
61
+
62
+ | Skill | Purpose |
63
+ |-------|---------|
64
+ | `/forge-tool` | 12-phase structured dialogue: challenge necessity, lock the description contract, generate tool + tests + evals |
65
+ | `/forge-eval` | Generate golden (5-10 cases) and labeled (multi-tool) eval suites with deterministic assertions |
66
+ | `/forge-mcp` | Generate an MCP server scaffold from a ToolDefinition |
67
+ | `/forge-verifier` | Detect tools without verifier coverage, generate verifier stubs + barrel registration |
68
+
69
+ ### The 12-Phase `/forge-tool` Dialogue
70
+
71
+ | Phase | What Happens |
72
+ |-------|-------------|
73
+ | 0 | **Creative exploration** — open-ended "what should this do?" |
74
+ | 1 | **Skeptic gate** — challenge necessity, overlap, scope |
75
+ | 2 | **Description + name** — lock the routing contract |
76
+ | 3 | **Collect fields** — schema, category, consequence level, confirmation flag |
77
+ | 4 | **Routing** — collect endpoint target, HTTP method, auth type, parameter mapping |
78
+ | 5 | **Dependency check** — verify the tool context provides what's needed |
79
+ | 6 | **Confirm full spec** — sign off before any code is written |
80
+ | 7 | **Generate all files** — tool, tests, barrel registration |
81
+ | 8 | **Run tests** — must be green before proceeding |
82
+ | 9 | **Generate evals** — hand off to `/forge-eval` |
83
+ | 10 | **Generate verifiers** — create verifier stubs for the new tool |
84
+ | 11 | **Done** — summary of everything created |
85
+
86
+ ---
87
+
88
+ ## Runtime Features
89
+
90
+ - **ReAct loop** — multi-turn LLM + tool execution, streamed via SSE
91
+ - **HITL** — four levels (autonomous → paranoid), pause/resume with 5-minute TTL
92
+ - **Verifiers** — post-response quality pipeline (warnings + flags, ACIRU ordering)
93
+ - **Eval runner** — `node lib/index.js run --eval <path>` executes eval JSON, checks assertions, stores results in SQLite; `--record` / `--replay` for fixture-based testing
94
+ - **Observability** — token tracking, cost estimation, per-tool metrics in SQLite
95
+ - **Web component** — `<forge-chat>` drop-in chat widget (vanilla JS, zero deps)
96
+
97
+ ---
98
+
99
+ ## Optional Peer Dependencies
100
+
101
+ The sidecar core requires only `better-sqlite3`. Additional backends are loaded on demand when configured — install them only if you use them:
102
+
103
+ | Package | When needed |
104
+ |---------|-------------|
105
+ | `redis` or `ioredis` | `conversation.store: 'redis'` or `rateLimit.enabled: true` with Redis backend |
106
+ | `pg` | `database.type: 'postgres'` — Postgres conversation store, agent registry, and preferences |
107
+
108
+ ```bash
109
+ # Redis backend
110
+ npm install ioredis # or: npm install redis
111
+
112
+ # Postgres backend
113
+ npm install pg
114
+ ```
115
+
116
+ If a required package is missing, the sidecar prints an actionable error on startup rather than crashing at import time.
117
+
118
+ ---
119
+
120
+ ## Exported Subpaths
121
+
122
+ All subpaths ship with TypeScript declarations.
123
+
124
+ ```js
125
+ import { createSidecar } from 'agent-tool-forge' // main entry
126
+ import { reactLoop } from 'tool-forge/react-engine'
127
+ import { createAuth } from 'tool-forge/auth'
128
+ import { makeConversationStore } from 'tool-forge/conversation-store'
129
+ import { mergeDefaults } from 'tool-forge/config'
130
+ import { makeHitlEngine } from 'tool-forge/hitl-engine'
131
+ import { makePromptStore } from 'tool-forge/prompt-store'
132
+ import { makePreferenceStore } from 'tool-forge/preference-store'
133
+ import { makeRateLimiter } from 'tool-forge/rate-limiter'
134
+ import { getDb } from 'tool-forge/db'
135
+ import { initSSE } from 'tool-forge/sse'
136
+ import { PostgresStore } from 'tool-forge/postgres-store'
137
+ import { buildSidecarContext, createSidecarRouter } from 'tool-forge/forge-service'
138
+ ```
139
+
140
+ ---
141
+
142
+ ## Documentation
143
+
144
+ | Doc | Contents |
145
+ |-----|----------|
146
+ | [docs/tui-workflow.md](docs/tui-workflow.md) | TUI walkthrough, start to finish |
147
+ | [docs/reference/config.md](docs/reference/config.md) | `forge.config.json` field reference |
148
+ | [docs/reference/api.md](docs/reference/api.md) | HTTP endpoints, SSE events, HITL flow |
149
+ | [docs/eval-runner-contract.md](docs/eval-runner-contract.md) | Eval file format and assertion spec |
150
+ | [docs/API-DISCOVERY.md](docs/API-DISCOVERY.md) | API discovery TUI |
151
+ | [docs/VERIFIER-FACTORY.md](docs/VERIFIER-FACTORY.md) | Verifier gap detection and stub generation |
152
+
153
+ ---
154
+
155
+ ## Repo Structure
156
+
157
+ ```
158
+ lib/
159
+ sidecar.js # createSidecar() — package entry point
160
+ index.js # TUI + CLI entry point
161
+ react-engine.js # ReAct loop, SSE streaming
162
+ hitl-engine.js # HITL pause/resume
163
+ verifier-runner.js # Post-response verifier pipeline
164
+ eval-runner.js # Eval execution engine
165
+ checks/ # Deterministic assertion checks
166
+ fixtures/ # Record/replay fixture store
167
+ comparison/ # Run comparison + Wilson statistics
168
+ runner/ # Gate evaluation + CLI
169
+ views/ # TUI screens
170
+ db.js # SQLite persistence
171
+ skills/
172
+ forge-tool/ # 12-phase tool creation workflow
173
+ forge-eval/ # Golden + labeled eval generation
174
+ forge-mcp/ # MCP server generation
175
+ forge-verifier/ # Verifier gap detection + stub generation
176
+ templates/ # Pseudo-code reference templates
177
+ docs/
178
+ tui-workflow.md # Start-to-finish TUI guide
179
+ reference/
180
+ config.md # forge.config.json reference
181
+ api.md # HTTP + SSE reference
182
+ eval-runner-contract.md # Eval file format spec
183
+ API-DISCOVERY.md # API discovery workflow
184
+ VERIFIER-FACTORY.md # Verifier gap detection + stub generation
185
+ example/
186
+ tools/ # Example tool files
187
+ verification/ # Example verifiers
188
+ docs/examples/ # Example evals (golden, labeled)
189
+ widget/
190
+ forge-chat.js # <forge-chat> web component
191
+ ```
192
+
193
+ ---
194
+
195
+ ## Standing on Shoulders
196
+
197
+ Tool-Forge integrates ideas and code from two excellent open-source projects:
198
+
199
+ - **[evalkit](https://github.com/wkhori/evalkit)** by wkhori — MIT License
200
+ Provides the deterministic check suite (`lib/checks/`): content matching, tool selection verification, schema validation, and the `runChecks()` meta-runner. Used under MIT license with attribution in each file.
201
+
202
+ - **[agent-eval-kit](https://github.com/FlanaganSe/agent-eval-kit)** by FlanaganSe — MIT License
203
+ Provides fixture-based record/replay (`lib/fixtures/`), statistical comparison with Wilson confidence intervals (`lib/comparison/`), gate evaluation (`lib/runner/gate.js`), and composition operators (`lib/checks/compose.js`). Used under MIT license with attribution in each file.
204
+
205
+ ---
206
+
207
+ ## License
208
+
209
+ MIT
@@ -0,0 +1,170 @@
1
+ /**
2
+ * AgentRegistry — multi-agent configuration for the sidecar runtime.
3
+ *
4
+ * Each agent selects a subset of tools from the shared tool_registry,
5
+ * overrides model/HITL defaults, and carries its own system prompt.
6
+ *
7
+ * Factory: makeAgentRegistry(config, db)
8
+ */
9
+
10
+ import {
11
+ upsertAgent, getAgent, getAllAgents, getDefaultAgent,
12
+ setDefaultAgent, deleteAgent
13
+ } from './db.js';
14
+
15
+ export class AgentRegistry {
16
+ /**
17
+ * @param {object} config — merged forge config
18
+ * @param {import('better-sqlite3').Database} db
19
+ */
20
+ constructor(config, db) {
21
+ this._config = config;
22
+ this._db = db;
23
+ }
24
+
25
+ /**
26
+ * Resolve an agent by ID. If agentId is null/empty, returns the default agent (or null).
27
+ * If agentId is provided but not found or disabled, returns null.
28
+ *
29
+ * @param {string|null|undefined} agentId
30
+ * @returns {object|null}
31
+ */
32
+ resolveAgent(agentId) {
33
+ if (!agentId) {
34
+ return getDefaultAgent(this._db);
35
+ }
36
+ const agent = getAgent(this._db, agentId);
37
+ if (!agent || !agent.enabled) return null;
38
+ return agent;
39
+ }
40
+
41
+ /**
42
+ * Filter tools to an agent's allowlist. If allowlist is '*', returns all.
43
+ * Operates on the { toolRows, tools } shape returned by loadPromotedTools.
44
+ *
45
+ * @param {{ toolRows: object[], tools: object[] }} loaded
46
+ * @param {object|null} agent
47
+ * @returns {{ toolRows: object[], tools: object[] }}
48
+ */
49
+ filterTools(loaded, agent) {
50
+ if (!agent) return loaded;
51
+ const allowlist = agent.tool_allowlist;
52
+ if (!allowlist || allowlist === '*') return loaded;
53
+
54
+ let allowed;
55
+ try {
56
+ allowed = JSON.parse(allowlist);
57
+ } catch {
58
+ return { toolRows: [], tools: [] }; // malformed → deny all (fail closed)
59
+ }
60
+ if (!Array.isArray(allowed)) return { toolRows: [], tools: [] };
61
+
62
+ const allowSet = new Set(allowed);
63
+ const toolRows = loaded.toolRows.filter(r => allowSet.has(r.tool_name));
64
+ const tools = loaded.tools.filter(t => allowSet.has(t.name));
65
+ return { toolRows, tools };
66
+ }
67
+
68
+ /**
69
+ * Build an agent-scoped config by overlaying agent overrides onto the base config.
70
+ * The returned object can be passed to PreferenceStore.resolveEffective() unchanged.
71
+ *
72
+ * @param {object} baseConfig — the merged forge config
73
+ * @param {object|null} agent — agent row or null
74
+ * @returns {object} scoped config
75
+ */
76
+ buildAgentConfig(baseConfig, agent) {
77
+ if (!agent) return baseConfig;
78
+
79
+ const scoped = { ...baseConfig };
80
+
81
+ if (agent.default_model) scoped.defaultModel = agent.default_model;
82
+ if (agent.default_hitl_level) scoped.defaultHitlLevel = agent.default_hitl_level;
83
+ // Only override boolean flags when explicitly enabled (1), not on DB default (0).
84
+ // DB column is NOT NULL DEFAULT 0, so 0 means "not explicitly set" — defer to base config.
85
+ if (agent.allow_user_model_select) scoped.allowUserModelSelect = true;
86
+ if (agent.allow_user_hitl_config) scoped.allowUserHitlConfig = true;
87
+ if (agent.max_turns != null) scoped.maxTurns = agent.max_turns;
88
+ if (agent.max_tokens != null) scoped.maxTokens = agent.max_tokens;
89
+
90
+ return scoped;
91
+ }
92
+
93
+ /**
94
+ * Resolve the system prompt for an agent.
95
+ * Fallback chain: agent prompt → promptStore active → config.systemPrompt → default.
96
+ *
97
+ * @param {object|null} agent
98
+ * @param {object} promptStore
99
+ * @param {object} config
100
+ * @returns {string}
101
+ */
102
+ resolveSystemPrompt(agent, promptStore, config) {
103
+ if (agent?.system_prompt) return agent.system_prompt;
104
+ const active = promptStore.getActivePrompt();
105
+ if (active) return active;
106
+ return config.systemPrompt || 'You are a helpful assistant.';
107
+ }
108
+
109
+ // ── CRUD pass-throughs ──────────────────────────────────────────────────
110
+
111
+ getAgent(agentId) { return getAgent(this._db, agentId); }
112
+ getAllAgents() { return getAllAgents(this._db); }
113
+ upsertAgent(row) { return upsertAgent(this._db, row); }
114
+ setDefault(agentId) { return setDefaultAgent(this._db, agentId); }
115
+ deleteAgent(agentId) { return deleteAgent(this._db, agentId); }
116
+
117
+ /**
118
+ * Seed agents from config.agents[] array. Upserts with seeded_from_config=1.
119
+ * Ensures at least one default exists if agents are defined.
120
+ */
121
+ seedFromConfig() {
122
+ const agents = this._config.agents;
123
+ if (!Array.isArray(agents) || agents.length === 0) return;
124
+
125
+ let defaultAgentId = null;
126
+ for (const a of agents) {
127
+ if (!a.id || !a.displayName) continue;
128
+ // Skip if agent exists and was modified outside of config seeding
129
+ const existing = getAgent(this._db, a.id);
130
+ if (existing && !existing.seeded_from_config) continue;
131
+ upsertAgent(this._db, {
132
+ agent_id: a.id,
133
+ display_name: a.displayName,
134
+ description: a.description ?? null,
135
+ system_prompt: a.systemPrompt ?? null,
136
+ default_model: a.defaultModel ?? null,
137
+ default_hitl_level: a.defaultHitlLevel ?? null,
138
+ allow_user_model_select: a.allowUserModelSelect ? 1 : 0,
139
+ allow_user_hitl_config: a.allowUserHitlConfig ? 1 : 0,
140
+ tool_allowlist: Array.isArray(a.toolAllowlist) ? JSON.stringify(a.toolAllowlist) : '*',
141
+ max_turns: a.maxTurns ?? null,
142
+ max_tokens: a.maxTokens ?? null,
143
+ is_default: 0, // Don't set via upsert — use setDefaultAgent below to enforce single default
144
+ enabled: 1,
145
+ seeded_from_config: 1
146
+ });
147
+ if (a.isDefault) defaultAgentId = a.id;
148
+ }
149
+
150
+ // Enforce single default via setDefaultAgent (atomic clear + set)
151
+ if (defaultAgentId) {
152
+ setDefaultAgent(this._db, defaultAgentId);
153
+ } else if (!getDefaultAgent(this._db)) {
154
+ const first = agents.find(a => a.id && a.displayName);
155
+ if (first) setDefaultAgent(this._db, first.id);
156
+ }
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Factory — creates an AgentRegistry backed by SQLite.
162
+ * For Postgres, use buildSidecarContext which selects the adapter automatically.
163
+ *
164
+ * @param {object} config — merged forge config
165
+ * @param {import('better-sqlite3').Database} db
166
+ * @returns {AgentRegistry}
167
+ */
168
+ export function makeAgentRegistry(config, db) {
169
+ return new AgentRegistry(config, db);
170
+ }