@ninemind/agentgem 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+ import { CLAUDE_AGENT, analysisWorkspace, defaultConnectFn, currentTestConnectFn } from "./acpRecommender.js";
2
+ // skillify Phase-0 thresholds (proposal §4): "invoked 2+ times" and ">20 lines of
3
+ // logic" (~4 distinct action verbs). The third criterion (clear trigger phrase) is
4
+ // deferred to the agent + validation.
5
+ export const MIN_RECURRENCE = 2;
6
+ export const MIN_STEPS = 3; // procedures are mined as >=3-gram action runs (§3c)
7
+ const KEBAB_RE = /^[a-z0-9]+(?:-[a-z0-9]+)*$/;
8
+ const MUTATING_TOOL_RE = /^(Bash|Edit|Write|NotebookEdit)$/;
9
+ function extractJson(text) {
10
+ const start = text.indexOf("{");
11
+ const end = text.lastIndexOf("}");
12
+ return start >= 0 && end > start ? text.slice(start, end + 1) : text;
13
+ }
14
+ /**
15
+ * Validate a raw agent response into DistilledSkills. A distilled skill cannot be
16
+ * checked against the inventory (it is new), so validation is shape + evidence-
17
+ * grounding (proposal §6): kebab name, non-empty triggers + body, slug not already
18
+ * installed, and every claimed tool present in the candidates' sampled evidence.
19
+ * `mutating` is forced true when the procedure touches Bash/Edit/Write. Never throws.
20
+ */
21
+ export function validateDistilled(raw, inv, candidates) {
22
+ let obj = raw;
23
+ if (typeof raw === "string") {
24
+ try {
25
+ obj = JSON.parse(extractJson(raw));
26
+ }
27
+ catch {
28
+ return [];
29
+ }
30
+ }
31
+ if (!obj || typeof obj !== "object" || !Array.isArray(obj.distilled))
32
+ return [];
33
+ const installed = new Set([
34
+ ...inv.project.skills.map((s) => s.name),
35
+ ...(inv.global?.skills ?? []).map((s) => s.name),
36
+ ]);
37
+ // Evidence pool: tools that actually appear in any candidate's sampled run.
38
+ const evidenceTools = new Set();
39
+ for (const c of candidates)
40
+ for (const st of c.sample.steps)
41
+ evidenceTools.add(st.tool);
42
+ const evidenceIsMutating = [...evidenceTools].some((t) => MUTATING_TOOL_RE.test(t));
43
+ const sessions = candidates.reduce((m, c) => Math.max(m, c.sessions), 0);
44
+ const exampleSequence = candidates[0]?.verbs ?? [];
45
+ const root = inv.project.root;
46
+ const out = [];
47
+ for (const it of obj.distilled) {
48
+ if (!it || typeof it !== "object")
49
+ continue;
50
+ if (typeof it.name !== "string" || !KEBAB_RE.test(it.name)) {
51
+ console.error(`distill: dropping non-kebab name '${it.name}'`);
52
+ continue;
53
+ }
54
+ if (installed.has(it.name)) {
55
+ console.error(`distill: dropping slug colliding with installed skill '${it.name}'`);
56
+ continue;
57
+ }
58
+ const triggers = Array.isArray(it.triggers) ? it.triggers.filter((t) => typeof t === "string" && t.trim().length > 0) : [];
59
+ if (!triggers.length)
60
+ continue;
61
+ if (typeof it.body !== "string" || !it.body.trim())
62
+ continue;
63
+ const tools = Array.isArray(it.tools) ? it.tools.filter((t) => typeof t === "string") : [];
64
+ if (tools.some((t) => !evidenceTools.has(t))) {
65
+ console.error(`distill: dropping '${it.name}' — fabricated tool not in evidence`);
66
+ continue;
67
+ }
68
+ out.push({
69
+ name: it.name,
70
+ description: typeof it.description === "string" ? it.description : "",
71
+ triggers,
72
+ tools,
73
+ mutating: evidenceIsMutating || tools.some((t) => MUTATING_TOOL_RE.test(t)),
74
+ body: it.body,
75
+ evidence: { sessions, exampleSequence, root },
76
+ status: "draft",
77
+ confidence: ["high", "medium", "low"].includes(it.confidence) ? it.confidence : "medium",
78
+ });
79
+ }
80
+ return out;
81
+ }
82
+ export function distillCandidates(signal, opts = {}) {
83
+ const minRecurrence = opts.minRecurrence ?? MIN_RECURRENCE;
84
+ const minSteps = opts.minSteps ?? MIN_STEPS;
85
+ const sessions = signal.sequences?.sessions;
86
+ if (!signal.procedures || !sessions)
87
+ return [];
88
+ return signal.procedures
89
+ .filter((p) => p.sessions >= minRecurrence && p.verbs.length >= minSteps)
90
+ .map((p) => ({ ...p, sample: sessions[p.sampleSessionIdx] }))
91
+ .filter((c) => c.sample !== undefined);
92
+ }
93
+ // ── The generative ACP step (proposal §5) ───────────────────────────────────
94
+ // Distinct from the selective recommender's GROUNDING prompt. Names/scopes a
95
+ // skill by the MISSION it accomplished; dedups against installed skills.
96
+ export const DISTILL = (candidatesJson, installedSkillsJson) => `You distill the WORKFLOW a coding agent used to accomplish a mission into a ` +
97
+ `reusable skill. Each candidate carries: a mission hint (the task the user set ` +
98
+ `out to do + the outcome), an ordered redacted sequence of tool calls, and how ` +
99
+ `many sessions it recurred across.\n` +
100
+ `Name and scope each skill by the MISSION it accomplished — not by its tool ` +
101
+ `fingerprint. For each genuinely reusable workflow, emit a skill with:\n` +
102
+ ` frontmatter: name (kebab), description (one paragraph), triggers (phrases a ` +
103
+ `user would actually type), tools (from the sequence), mutating (bool)\n` +
104
+ ` body: ## Contract (guarantees) / ## Phases (reproduce the ordered ` +
105
+ `instructions/steps the agent followed) / ## Output Format (the deliverable)\n` +
106
+ `DEDUP — do NOT propose a skill that overlaps any installed skill:\n${installedSkillsJson}\n` +
107
+ `Drop a candidate that is one-off, trivial, or has no clear trigger phrase.\n` +
108
+ `MISSIONS + WORKFLOWS (redacted; counts are facts):\n${candidatesJson}\n\n` +
109
+ `Return ONLY JSON: {"distilled":[{"name","description","triggers":[],"tools":[],` +
110
+ `"mutating":bool,"body","confidence":"high"|"medium"|"low"}]}.`;
111
+ // Bound the prompt: send each candidate's verbs, recurrence, mission hint, and a
112
+ // capped slice of its sampled steps.
113
+ function trimCandidate(c) {
114
+ return {
115
+ verbs: c.verbs,
116
+ sessions: c.sessions,
117
+ missionHint: c.sample.missionHint ?? null,
118
+ steps: c.sample.steps.slice(0, 30).map((s) => ({ verb: s.verb, arg: s.arg })),
119
+ };
120
+ }
121
+ function installedSkillNames(inv) {
122
+ return [...inv.project.skills.map((s) => s.name), ...(inv.global?.skills ?? []).map((s) => s.name)];
123
+ }
124
+ function withTimeout(p, ms) {
125
+ return Promise.race([p, new Promise((_, rej) => setTimeout(() => rej(new Error(`distill agent timeout after ${ms}ms`)), ms))]);
126
+ }
127
+ /**
128
+ * Distil draft skills from a WorkflowSignal. Total: never throws. Short-circuits to
129
+ * an empty (non-degraded) result when no procedure clears Phase-0 — the agent is
130
+ * not even spawned. Any agent error/timeout/junk → { distilled: [], degraded: true }.
131
+ */
132
+ export async function distillWorkflow(signal, inv, opts = {}) {
133
+ const candidates = distillCandidates(signal, opts);
134
+ if (!candidates.length)
135
+ return { distilled: [], degraded: false };
136
+ const connectFn = opts.connectFn ?? currentTestConnectFn() ?? defaultConnectFn;
137
+ const timeoutMs = opts.timeoutMs ?? 60_000;
138
+ let conn = null;
139
+ let handle = null;
140
+ try {
141
+ const prompt = DISTILL(JSON.stringify(candidates.map(trimCandidate)), JSON.stringify(installedSkillNames(inv)));
142
+ conn = await connectFn(CLAUDE_AGENT, null);
143
+ handle = await conn.ctx.open(analysisWorkspace()); // neutral cwd — don't pollute the project
144
+ await handle.setMode("plan"); // explicit — never edits files
145
+ const text = await withTimeout(handle.promptText(prompt), timeoutMs);
146
+ return { distilled: validateDistilled(text, inv, candidates), degraded: false };
147
+ }
148
+ catch (err) {
149
+ console.error("distill: fell back to empty:", err.message);
150
+ return { distilled: [], degraded: true };
151
+ }
152
+ finally {
153
+ try {
154
+ handle?.dispose();
155
+ }
156
+ catch { /* ignore */ }
157
+ try {
158
+ conn?.close();
159
+ }
160
+ catch { /* ignore */ }
161
+ }
162
+ }
@@ -0,0 +1,77 @@
1
+ // src/gem/draftStage.ts
2
+ //
3
+ // Stage a distilled DRAFT skill so a Gem candidate can include it before it is
4
+ // installed (proposal §7b). The seam is at inventory assembly, NOT buildGem:
5
+ // buildGem resolves names against the in-memory ConfigInventory and throws on a
6
+ // miss (buildGem.ts), so we materialize each draft into a SkillArtifact and merge
7
+ // it into the inventory upstream. buildGem itself is unchanged.
8
+ import { mkdirSync, writeFileSync } from "node:fs";
9
+ import { join } from "node:path";
10
+ import { agentgemHome } from "../resolveDir.js";
11
+ // Assemble the SKILL.md text: skillify-shaped frontmatter + the captured body.
12
+ // This is also exactly what the draft-write handler persists to disk (§9).
13
+ export function distilledSkillMarkdown(s) {
14
+ return [
15
+ "---",
16
+ `name: ${s.name}`,
17
+ `description: ${s.description}`,
18
+ "triggers:",
19
+ ...s.triggers.map((t) => ` - ${t}`),
20
+ `tools: [${s.tools.join(", ")}]`,
21
+ `mutating: ${s.mutating}`,
22
+ "---",
23
+ "",
24
+ s.body.trim(),
25
+ "",
26
+ ].join("\n");
27
+ }
28
+ export function distilledToArtifact(s) {
29
+ return { type: "skill", name: s.name, description: s.description, source: "distilled-draft", content: distilledSkillMarkdown(s) };
30
+ }
31
+ /**
32
+ * Stage every draft into the project named by its own `evidence.root` (drafts may
33
+ * span projects). Pure; no-op (same reference) when there are no drafts. Used by the
34
+ * build path so a candidate can include an accepted draft the server hasn't installed.
35
+ */
36
+ export function stageDraftsByEvidence(inv, drafts) {
37
+ if (!drafts.length)
38
+ return inv;
39
+ const byRoot = new Map();
40
+ for (const d of drafts) {
41
+ const r = d.evidence.root;
42
+ const list = byRoot.get(r) ?? [];
43
+ list.push(d);
44
+ byRoot.set(r, list);
45
+ }
46
+ let out = inv;
47
+ for (const [root, list] of byRoot)
48
+ out = stageDistilledDrafts(out, list, root);
49
+ return out;
50
+ }
51
+ /**
52
+ * Persist an accepted draft to `<base>/.agentgem/distilled/<name>/SKILL.md` for the
53
+ * user to review and promote (proposal §7) — NOT into `.claude/skills/`. Returns the
54
+ * written path. `name` is a validated kebab slug (validateDistilled), so path-safe.
55
+ */
56
+ export function writeDistilledDraft(s, base = agentgemHome()) {
57
+ const dir = join(base, ".agentgem", "distilled", s.name);
58
+ mkdirSync(dir, { recursive: true });
59
+ const path = join(dir, "SKILL.md");
60
+ writeFileSync(path, distilledSkillMarkdown(s), "utf8");
61
+ return path;
62
+ }
63
+ /**
64
+ * Return a copy of `inv` with each draft materialized into the project (matching
65
+ * `root`) skills, or top-level skills if no project matches. Pure — never mutates
66
+ * the input. A no-op (returns the same reference) when there are no drafts.
67
+ */
68
+ export function stageDistilledDrafts(inv, drafts, root) {
69
+ if (!drafts.length)
70
+ return inv;
71
+ const arts = drafts.map(distilledToArtifact);
72
+ const matched = (inv.projects ?? []).some((p) => p.root === root);
73
+ const projects = (inv.projects ?? []).map((p) => p.root === root ? { ...p, skills: [...p.skills, ...arts] } : p);
74
+ return matched
75
+ ? { ...inv, projects }
76
+ : { ...inv, skills: [...inv.skills, ...arts], projects: inv.projects ? projects : undefined };
77
+ }
@@ -0,0 +1,35 @@
1
+ export function verifyGemRun(outcome, expectations = {}) {
2
+ // A run that never completed can't be reasoned about — fail fast, single check.
3
+ if (!outcome.ok) {
4
+ return { passed: false, checks: [{ name: "run completed", passed: false, detail: outcome.error ?? "run did not complete" }] };
5
+ }
6
+ const checks = [];
7
+ const { toolCalls, text } = outcome.result;
8
+ const titles = toolCalls.map((t) => t.title);
9
+ for (const want of expectations.expectTools ?? []) {
10
+ const hit = titles.some((title) => title.toLowerCase().includes(want.toLowerCase()));
11
+ checks.push({
12
+ name: `invoked tool ~ "${want}"`,
13
+ passed: hit,
14
+ detail: hit ? `matched ${JSON.stringify(titles.find((t) => t.toLowerCase().includes(want.toLowerCase())))}` : `no invoked tool matched (saw: ${titles.length ? titles.join(", ") : "none"})`,
15
+ });
16
+ }
17
+ if (expectations.expectText !== undefined) {
18
+ const pat = expectations.expectText;
19
+ const hit = typeof pat === "string" ? text.includes(pat) : pat.test(text);
20
+ checks.push({
21
+ name: "output text matches",
22
+ passed: hit,
23
+ detail: hit ? "matched" : `expected ${typeof pat === "string" ? JSON.stringify(pat) : String(pat)} in agent output`,
24
+ });
25
+ }
26
+ if (expectations.forbidToolFailures ?? true) {
27
+ const failed = toolCalls.filter((t) => t.status === "failed").map((t) => t.title);
28
+ checks.push({
29
+ name: "no tool failures",
30
+ passed: failed.length === 0,
31
+ detail: failed.length === 0 ? "all tools ok" : `failed tools: ${failed.join(", ")}`,
32
+ });
33
+ }
34
+ return { passed: checks.every((c) => c.passed), checks };
35
+ }
@@ -0,0 +1,21 @@
1
+ // src/gem/inputError.ts
2
+ // A rejection of a caller-supplied value by an input-containment guard: an unsafe
3
+ // workspace-name path segment, a non-public (SSRF) URL, a malformed credential.
4
+ //
5
+ // These are NOT server faults — the request was refused on purpose — so the caller
6
+ // should learn WHY. @agentback/rest hides the message of any error whose status is
7
+ // >= 500 ("Internal Server Error"), but surfaces e.message verbatim for a 4xx. By
8
+ // carrying statusCode 400 (read by the framework's buildErrorEnvelope) this turns the
9
+ // previously opaque 500 into a 400 whose message names the violated rule — matching
10
+ // how the zod body/param validators already report bad input.
11
+ export class InvalidInputError extends Error {
12
+ constructor(message) {
13
+ super(message);
14
+ this.statusCode = 400;
15
+ this.code = "invalid_input";
16
+ // Override the framework's default invalid_input hint, which points at a per-field
17
+ // `issues`/`schema` payload these single-rule guards don't carry.
18
+ this.hint = "Correct the value to satisfy the rule stated in `message`, then retry.";
19
+ this.name = "InvalidInputError";
20
+ }
21
+ }
@@ -178,6 +178,21 @@ export async function mergeGems(graph, source) {
178
178
  };
179
179
  return { gem: merged, provenance };
180
180
  }
181
+ // Derive the searchable discovery block for a publish: caller-supplied description/tags,
182
+ // falling back to the first artifact's description; kinds/author derived from the gem.
183
+ export function buildDiscovery(gem, scope, opts = {}) {
184
+ const description = opts.description ?? gem.artifacts.find((a) => "description" in a && a.description)?.["description"];
185
+ const tags = (opts.tags ?? []).map((t) => t.toLowerCase());
186
+ const artifactKinds = [...new Set(gem.artifacts.map((a) => a.type))];
187
+ const d = { author: scope, artifactKinds };
188
+ if (description)
189
+ d.description = description;
190
+ if (tags.length)
191
+ d.tags = tags;
192
+ if (opts.updatedAt)
193
+ d.updatedAt = opts.updatedAt;
194
+ return d;
195
+ }
181
196
  export function updateIndex(index, e) {
182
197
  const items = { ...index.items };
183
198
  const existing = items[e.key];
@@ -187,8 +202,11 @@ export function updateIndex(index, e) {
187
202
  throw new Error(`${e.key}@${e.version} is immutable (published ${existingVersion.gemDigest}, attempted ${e.gemDigest})`);
188
203
  }
189
204
  versions[e.version] = { path: e.path, gemDigest: e.gemDigest, dependencies: e.dependencies };
190
- const latest = existing && cmpSemver(existing.latest, e.version) >= 0 ? existing.latest : e.version;
191
- items[e.key] = { latest, versions };
205
+ const isNewLatest = !existing || cmpSemver(existing.latest, e.version) < 0;
206
+ const latest = isNewLatest ? e.version : existing.latest;
207
+ // discovery reflects the latest version; keep the prior block when publishing an older version
208
+ const discovery = isNewLatest ? (e.discovery ?? existing?.discovery) : existing?.discovery;
209
+ items[e.key] = { latest, versions, ...(discovery ? { discovery } : {}) };
192
210
  return { formatVersion: REGISTRY_FORMAT_VERSION, items };
193
211
  }
194
212
  export async function publishGem(args) {
@@ -207,7 +225,8 @@ export async function publishGem(args) {
207
225
  if (prior && prior.gemDigest === gemDigest) {
208
226
  return { ref: key, version: args.version, gemDigest, commit: "", path };
209
227
  }
210
- const nextIndex = updateIndex(args.index, { key, version: args.version, path, gemDigest, dependencies });
228
+ const discovery = buildDiscovery(args.gem, args.scope, { description: args.description, tags: args.tags, updatedAt: args.updatedAt });
229
+ const nextIndex = updateIndex(args.index, { key, version: args.version, path, gemDigest, dependencies, discovery });
211
230
  const commitFiles = { "registry.json": JSON.stringify(nextIndex, null, 2) };
212
231
  for (const [rel, content] of Object.entries(files))
213
232
  commitFiles[`${path}/${rel}`] = content;
@@ -227,7 +246,7 @@ export async function resolveInstall(args) {
227
246
  if (args.mode === "materialize") {
228
247
  if (!args.target)
229
248
  throw new Error("materialize mode requires a target harness id");
230
- plan.materialize = materialize(gem, args.target);
249
+ plan.materialize = materialize(gem, args.target, { a2aServer: args.a2aServer });
231
250
  }
232
251
  return { plan, gem };
233
252
  }
@@ -0,0 +1,161 @@
1
+ // src/gem/runGem.ts
2
+ //
3
+ // The end-to-end "run my Gem" path: materialize a portable Gem into a runnable
4
+ // testbed dir, then drive a local ACP agent against it (optionally verifying).
5
+ //
6
+ // Why not targets.materialize(gem, "claude")? That renders the gem-archive layout
7
+ // (skills under `skills/<n>/SKILL.md`), which is NOT where Claude Code discovers
8
+ // skills at runtime. The runnable layout (`.claude/skills/<n>/SKILL.md`, etc.) is
9
+ // produced by the testbed import writer — so we adapt the Gem's self-contained
10
+ // artifacts into a ConfigInventory and reuse that tested writer.
11
+ import { randomUUID } from "node:crypto";
12
+ import { createRequire } from "node:module";
13
+ import { spawn } from "node:child_process";
14
+ import { mkdirSync } from "node:fs";
15
+ import { dirname, join } from "node:path";
16
+ import { agentgemHome } from "../resolveDir.js";
17
+ import { binOnPath } from "./binPath.js";
18
+ import { scaffoldTestbed, importArtifacts } from "./testbed.js";
19
+ import { runGemWithAgent, hasTestConnectFn } from "./acpRun.js";
20
+ import { verifyGemRun } from "./gemVerify.js";
21
+ // ── Opaque run registry ──────────────────────────────────────────────────────
22
+ // The streaming UI prepares a run (materialize) over POST, then streams it over a
23
+ // GET. We hand the client an opaque runId — NOT the raw runDir — so a crafted GET
24
+ // can't point the agent at an arbitrary path; the id maps server-side to the dir
25
+ // (always under AGENTGEM_HOME) and the agent chosen at prepare time.
26
+ const RUN_REGISTRY = new Map();
27
+ export function registerRun(dir, agent) {
28
+ const id = randomUUID();
29
+ RUN_REGISTRY.set(id, { dir, agent });
30
+ return id;
31
+ }
32
+ export function resolveRun(id) {
33
+ return RUN_REGISTRY.get(id);
34
+ }
35
+ export const AGENT_ADAPTERS = {
36
+ claude: { id: "claude", name: "Claude Code", pkg: "@agentclientprotocol/claude-agent-acp", bin: "claude-agent-acp", version: "0.51.0", flavor: "claude", validated: true },
37
+ codex: { id: "codex", name: "Codex", pkg: "@agentclientprotocol/codex-acp", bin: "codex-acp", version: "1.0.0", flavor: "codex", validated: true },
38
+ };
39
+ const require = createRequire(import.meta.url);
40
+ // Resolve a package's bin to `[node, <bin path>]` from a given module root (default:
41
+ // agentgem's own deps). Returns null if the package isn't installed there.
42
+ function resolveBinFrom(pkg, binName, fromDir) {
43
+ try {
44
+ const req = fromDir ? createRequire(join(fromDir, "noop.cjs")) : require;
45
+ const pkgJsonPath = req.resolve(`${pkg}/package.json`);
46
+ const pkgJson = req(pkgJsonPath);
47
+ const binRel = typeof pkgJson.bin === "string" ? pkgJson.bin : pkgJson.bin?.[binName];
48
+ if (binRel)
49
+ return [process.execPath, join(dirname(pkgJsonPath), binRel)];
50
+ }
51
+ catch { /* not resolvable here */ }
52
+ return null;
53
+ }
54
+ // Where on-demand-fetched adapters are cached (under AGENTGEM_HOME, never global).
55
+ export function adapterCacheDir() { return join(agentgemHome(), "adapters"); }
56
+ // Back-compat sync resolver (local dep → [node,path], else bare PATH name). The
57
+ // async resolveOrFetchAdapter below is the full chain used at run time.
58
+ export function resolveAdapterCommand(pkg, binName) {
59
+ return resolveBinFrom(pkg, binName) ?? [binName];
60
+ }
61
+ const npmInstaller = (pkg, version, prefixDir) => new Promise((resolve, reject) => {
62
+ mkdirSync(prefixDir, { recursive: true });
63
+ const child = spawn("npm", ["install", `${pkg}@${version}`, "--prefix", prefixDir, "--no-audit", "--no-fund", "--loglevel", "error"], { stdio: "inherit" });
64
+ child.once("error", reject);
65
+ child.once("exit", (code) => (code === 0 ? resolve() : reject(new Error(`npm install ${pkg}@${version} exited with code ${code}`))));
66
+ });
67
+ // Dedupe concurrent fetches of the same package (two runs racing the install).
68
+ const inflightFetch = new Map();
69
+ // Locate an adapter's spawnable command via a fallback chain, fetching on demand
70
+ // only as a last resort. Order (reuse what exists, fetch last):
71
+ // 1. global install on PATH (the user explicitly installed the adapter)
72
+ // 2. agentgem's own dep (bundled/optional dependency)
73
+ // 3. agentgem cache (a prior on-demand fetch, under AGENTGEM_HOME)
74
+ // 4. on-demand fetch into the cache (pinned version), then resolve
75
+ export async function resolveOrFetchAdapter(adapter, opts = {}) {
76
+ if (binOnPath(adapter.bin))
77
+ return [adapter.bin];
78
+ const dep = resolveBinFrom(adapter.pkg, adapter.bin);
79
+ if (dep)
80
+ return dep;
81
+ const cache = adapterCacheDir();
82
+ const cached = resolveBinFrom(adapter.pkg, adapter.bin, cache);
83
+ if (cached)
84
+ return cached;
85
+ if (opts.allowFetch === false) {
86
+ throw new Error(`${adapter.name} adapter (${adapter.pkg}) is not installed and on-demand fetch is disabled`);
87
+ }
88
+ let fetching = inflightFetch.get(adapter.pkg);
89
+ if (!fetching) {
90
+ const installer = opts.installer ?? npmInstaller;
91
+ fetching = (async () => {
92
+ opts.onFetch?.();
93
+ await installer(adapter.pkg, adapter.version, cache);
94
+ const resolved = resolveBinFrom(adapter.pkg, adapter.bin, cache);
95
+ if (!resolved)
96
+ throw new Error(`fetched ${adapter.pkg} but its '${adapter.bin}' bin was not found`);
97
+ return resolved;
98
+ })();
99
+ inflightFetch.set(adapter.pkg, fetching);
100
+ void fetching.catch(() => { }).finally(() => inflightFetch.delete(adapter.pkg));
101
+ }
102
+ return fetching;
103
+ }
104
+ // Partition a Gem's flat artifact list into the inventory shape importArtifacts wants.
105
+ // A Gem is self-contained (each artifact carries its content), so no disk read needed.
106
+ export function gemToInventory(gem) {
107
+ const inv = { skills: [], mcpServers: [], instructions: [], hooks: [] };
108
+ for (const a of gem.artifacts) {
109
+ if (a.type === "skill")
110
+ inv.skills.push(a);
111
+ else if (a.type === "mcp_server")
112
+ inv.mcpServers.push(a);
113
+ else if (a.type === "instructions")
114
+ inv.instructions.push(a);
115
+ else if (a.type === "hook")
116
+ inv.hooks.push(a);
117
+ }
118
+ return inv;
119
+ }
120
+ /**
121
+ * Scaffold a runnable testbed at `dir` and write every artifact the Gem carries
122
+ * into the flavor's discoverable locations (`.claude/skills/...`, CLAUDE.md, etc.).
123
+ */
124
+ export function materializeGemToTestbed(gem, dir, flavor = "claude") {
125
+ scaffoldTestbed(dir, gem.name, flavor);
126
+ const inv = gemToInventory(gem);
127
+ const selection = {
128
+ skills: inv.skills.map((s) => s.name),
129
+ mcpServers: inv.mcpServers.map((s) => s.name),
130
+ hooks: inv.hooks.map((h) => h.name),
131
+ includeInstructions: inv.instructions.length > 0,
132
+ };
133
+ return importArtifacts(dir, selection, inv, flavor);
134
+ }
135
+ /**
136
+ * Gem → dir → run → (verify). The one call that turns a portable Gem into an
137
+ * observed agent run. Verification is attached only when `expectations` are given.
138
+ */
139
+ export async function materializeAndRunGem(opts) {
140
+ const agent = opts.agent ?? "claude";
141
+ const adapter = AGENT_ADAPTERS[agent];
142
+ const flavor = opts.flavor ?? adapter.flavor;
143
+ const materialized = materializeGemToTestbed(opts.gem, opts.dir, flavor);
144
+ // Resolve (and if needed fetch) the adapter command, unless a connectFn is
145
+ // injected — fakes never spawn, so don't trigger resolution/fetch in tests.
146
+ const command = opts.connectFn || hasTestConnectFn()
147
+ ? [adapter.bin]
148
+ : await resolveOrFetchAdapter(adapter, { installer: opts.installer, onFetch: opts.onFetch, allowFetch: opts.allowFetch });
149
+ const run = await runGemWithAgent({
150
+ dir: opts.dir,
151
+ task: opts.task,
152
+ mode: opts.mode,
153
+ descriptor: { id: adapter.id, name: adapter.name, command },
154
+ connectFn: opts.connectFn,
155
+ timeoutMs: opts.timeoutMs,
156
+ onDelta: opts.onDelta,
157
+ onToolCall: opts.onToolCall,
158
+ });
159
+ const verification = opts.expectations ? verifyGemRun(run, opts.expectations) : undefined;
160
+ return { agent, materialized, run, verification };
161
+ }
@@ -0,0 +1,112 @@
1
+ // src/gem/safeFetch.ts
2
+ // SSRF guard for installing a .gem from a URL. A localhost-bound dev server is still
3
+ // reachable by a malicious page via CSRF, so an unguarded server-side fetch lets an
4
+ // attacker reach cloud metadata (169.254.169.254) or internal hosts. We resolve the
5
+ // host and refuse any non-public address, re-validate every redirect hop, AND pin the
6
+ // socket to the validated IP via an undici dispatcher so a DNS rebind between the
7
+ // validation and the connect cannot swing the request onto a blocked address.
8
+ import { lookup } from "node:dns/promises";
9
+ import { Agent } from "undici";
10
+ import { InvalidInputError } from "./inputError.js";
11
+ // True for loopback, RFC1918, CGNAT, link-local/metadata, and IPv6 loopback/link-local/ULA.
12
+ export function isBlockedAddress(ip) {
13
+ const v4 = ip.match(/^(\d+)\.(\d+)\.(\d+)\.(\d+)$/);
14
+ if (v4) {
15
+ const a = Number(v4[1]), b = Number(v4[2]);
16
+ if (a === 0 || a === 127)
17
+ return true; // 0.0.0.0/8, loopback
18
+ if (a === 10)
19
+ return true; // RFC1918
20
+ if (a === 172 && b >= 16 && b <= 31)
21
+ return true; // RFC1918
22
+ if (a === 192 && b === 168)
23
+ return true; // RFC1918
24
+ if (a === 169 && b === 254)
25
+ return true; // link-local + cloud metadata
26
+ if (a === 100 && b >= 64 && b <= 127)
27
+ return true; // CGNAT 100.64/10
28
+ return false;
29
+ }
30
+ const v6 = ip.toLowerCase().replace(/^\[/, "").replace(/\]$/, "");
31
+ if (v6 === "::1" || v6 === "::")
32
+ return true; // loopback / unspecified
33
+ if (v6.startsWith("fe80"))
34
+ return true; // link-local
35
+ if (v6.startsWith("fc") || v6.startsWith("fd"))
36
+ return true; // unique-local fc00::/7
37
+ const mapped = v6.match(/^::ffff:(\d+\.\d+\.\d+\.\d+)$/); // IPv4-mapped
38
+ if (mapped)
39
+ return isBlockedAddress(mapped[1]);
40
+ return false;
41
+ }
42
+ // Parse + scheme-check + DNS-resolve a URL, rejecting any non-public address.
43
+ // Returns the URL plus the validated addresses (null when allowPrivate skips resolution).
44
+ async function validatePublic(raw, opts) {
45
+ let u;
46
+ try {
47
+ u = new URL(raw);
48
+ }
49
+ catch {
50
+ throw new InvalidInputError(`invalid gem URL: ${raw}`);
51
+ }
52
+ if (u.protocol !== "http:" && u.protocol !== "https:") {
53
+ throw new InvalidInputError(`gem URL must be http(s), got ${u.protocol}`);
54
+ }
55
+ if (opts.allowPrivate)
56
+ return { url: u, validated: null };
57
+ const host = u.hostname.replace(/^\[/, "").replace(/\]$/, "");
58
+ const results = await lookup(host, { all: true });
59
+ if (results.length === 0)
60
+ throw new InvalidInputError(`could not resolve gem URL host: ${host}`);
61
+ for (const r of results) {
62
+ if (isBlockedAddress(r.address))
63
+ throw new InvalidInputError(`refusing to fetch gem from non-public address ${r.address} (${host})`);
64
+ }
65
+ return { url: u, validated: results };
66
+ }
67
+ export async function assertPublicUrl(raw, opts = {}) {
68
+ return (await validatePublic(raw, opts)).url;
69
+ }
70
+ // A Node lookup function pinned to the validated addresses — it ignores the hostname it is
71
+ // asked to resolve, so a host that rebinds to a blocked IP after validation cannot redirect
72
+ // the socket. This is the piece that actually closes the validate→connect race.
73
+ export function makePinnedLookup(validated) {
74
+ return (_hostname, options, callback) => {
75
+ const cb = (typeof options === "function" ? options : callback);
76
+ const all = typeof options === "object" && options.all;
77
+ if (all)
78
+ cb(null, validated);
79
+ else
80
+ cb(null, validated[0].address, validated[0].family);
81
+ };
82
+ }
83
+ // Fetch a .gem over http(s): validate the URL + every redirect hop, and pin each connection
84
+ // to the validated IP set via an undici dispatcher. Size-capped.
85
+ export async function fetchGemBytes(raw, opts = {}) {
86
+ const maxRedirects = opts.maxRedirects ?? 3;
87
+ const maxBytes = opts.maxBytes ?? 50 * 1024 * 1024;
88
+ let target = raw;
89
+ for (let hop = 0;; hop++) {
90
+ const { url, validated } = await validatePublic(target, opts);
91
+ const dispatcher = validated ? new Agent({ connect: { lookup: makePinnedLookup(validated) } }) : undefined;
92
+ try {
93
+ const res = await fetch(url.toString(), { redirect: "manual", ...(dispatcher ? { dispatcher } : {}) });
94
+ const loc = res.headers.get("location");
95
+ if (res.status >= 300 && res.status < 400 && loc) {
96
+ if (hop >= maxRedirects)
97
+ throw new Error("too many redirects fetching gem");
98
+ target = new URL(loc, url).toString();
99
+ continue;
100
+ }
101
+ if (!res.ok)
102
+ throw new Error(`gem fetch failed: HTTP ${res.status}`);
103
+ const buf = Buffer.from(await res.arrayBuffer());
104
+ if (buf.length > maxBytes)
105
+ throw new Error(`gem exceeds max size (${buf.length} > ${maxBytes} bytes)`);
106
+ return buf;
107
+ }
108
+ finally {
109
+ await dispatcher?.close();
110
+ }
111
+ }
112
+ }
@@ -0,0 +1,37 @@
1
+ // src/gem/sandbox.ts
2
+ // Pluggable sandbox-backend registry in front of the RunConnectFn seam. Each backend
3
+ // produces a RunConnectFn pre-scoped to a run dir. Auto-allow is capability-gated:
4
+ // isolated backends run permission:"allow" (the FS boundary bounds blast radius);
5
+ // the child-spawn fallback stays deny unless AGENTGEM_GEM_RUN_AUTOALLOW=1.
6
+ import { connectRunSession } from "./acpRun.js"; // value used at call-time (safe ESM cycle)
7
+ import { wrapWithSandbox } from "./sandboxLaunch.js";
8
+ import { binOnPath } from "./binPath.js";
9
+ export function envPermission(env = process.env) {
10
+ return env.AGENTGEM_GEM_RUN_AUTOALLOW === "1" ? "allow" : "deny";
11
+ }
12
+ // An isolated backend: wrap the agent command with the OS sandbox launcher (so the
13
+ // agent AND its child shells inherit the jail) and auto-allow tool calls. `bin` is the
14
+ // launcher resolved on PATH (not a hard-coded absolute path — distros place bwrap in
15
+ // /usr/bin or /usr/local/bin), matching the bare name `wrapWithSandbox` actually spawns.
16
+ function isolatedBackend(id, kind, bin, supported) {
17
+ return {
18
+ id, isolated: true,
19
+ available: () => supported() && binOnPath(bin),
20
+ connectFn: (runDir) => (descriptor, app) => connectRunSession({ ...descriptor, command: wrapWithSandbox(kind, runDir, descriptor.command) }, "allow", app),
21
+ };
22
+ }
23
+ export const childSpawnBackend = {
24
+ id: "child-spawn",
25
+ isolated: false,
26
+ available: () => true,
27
+ connectFn: () => (descriptor, app) => connectRunSession(descriptor, envPermission(), app),
28
+ };
29
+ export const RUN_BACKENDS = [
30
+ isolatedBackend("macos-seatbelt", "macos-seatbelt", "sandbox-exec", () => process.platform === "darwin"),
31
+ isolatedBackend("linux-bubblewrap", "linux-bubblewrap", "bwrap", () => process.platform === "linux"),
32
+ childSpawnBackend,
33
+ ];
34
+ export function selectRunBackend(runDir, registry = RUN_BACKENDS) {
35
+ const backend = registry.find((b) => b.isolated && b.available()) ?? registry[registry.length - 1] ?? childSpawnBackend;
36
+ return { backend, connectFn: backend.connectFn(runDir) };
37
+ }