@ninemind/agentgem 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -0
- package/dist/gem/acpRecommender.js +259 -0
- package/dist/gem/acpRun.js +156 -0
- package/dist/gem/acpSession.js +79 -0
- package/dist/gem/analysisCache.js +55 -0
- package/dist/gem/archive.js +17 -0
- package/dist/gem/binPath.js +9 -0
- package/dist/gem/buildGem.js +4 -1
- package/dist/gem/channels.js +29 -0
- package/dist/gem/credentials.js +3 -2
- package/dist/gem/distill.js +162 -0
- package/dist/gem/draftStage.js +77 -0
- package/dist/gem/gemVerify.js +35 -0
- package/dist/gem/inputError.js +21 -0
- package/dist/gem/registry.js +23 -4
- package/dist/gem/runGem.js +161 -0
- package/dist/gem/safeFetch.js +112 -0
- package/dist/gem/sandbox.js +37 -0
- package/dist/gem/sandboxLaunch.js +55 -0
- package/dist/gem/scrub.js +108 -0
- package/dist/gem/search.js +34 -0
- package/dist/gem/share.js +21 -0
- package/dist/gem/targets.js +280 -16
- package/dist/gem/testbedFlavors.js +1 -0
- package/dist/gem/workflowScan.js +0 -0
- package/dist/gem/workspaces.js +4 -3
- package/dist/gem.controller.js +151 -16
- package/dist/gem.tools.js +53 -5
- package/dist/gemRunStream.js +67 -0
- package/dist/index.js +15 -0
- package/dist/originGuard.js +36 -0
- package/dist/public/index.html +444 -10
- package/dist/schemas.js +180 -7
- package/dist/workflowStream.js +78 -0
- package/package.json +7 -2
package/README.md
CHANGED
|
@@ -37,9 +37,18 @@ call exactly the same thing.
|
|
|
37
37
|
re-reading raw config.
|
|
38
38
|
- **Composition** — the manifest/lock split lets small, focused Gems be reconciled into
|
|
39
39
|
larger agents with a single re-resolved lock, not a pile of overlapping config.
|
|
40
|
+
- **Workflow-aware recommendations** — [Analyze](docs/analyze.md) scans your agent's
|
|
41
|
+
session history to see which skills, MCP servers, and hooks you actually use, and
|
|
42
|
+
suggests ready-to-build Gems grouped by recurring workflow. It also **distills brand-new
|
|
43
|
+
draft skills** from the procedures you repeat by hand — review them and fold them
|
|
44
|
+
straight into a Gem.
|
|
40
45
|
- **Deploy targets** — Eve and OpenAI Sandbox (code-gen), Flue (materialize, deployable to
|
|
41
46
|
Cloudflare), and Bedrock AgentCore (managed backend); code-gen targets share a common
|
|
42
47
|
`compose` step.
|
|
48
|
+
- **Agent-to-agent (A2A)** — export a Gem as an [A2A](docs/a2a.md) Agent Card or a
|
|
49
|
+
runnable A2A server so other agents can discover and call it.
|
|
50
|
+
- **A native desktop app** — a [macOS/Windows/Linux build](docs/desktop.md) alongside the
|
|
51
|
+
`npx` CLI, hosting the same local server in its own window.
|
|
43
52
|
- **A GitHub-backed registry** — publish, resolve, merge, and install composable Gems over
|
|
44
53
|
the same archive format.
|
|
45
54
|
- **An agent-native path** — every operation is also an MCP tool, so your local agent can
|
|
@@ -106,6 +115,20 @@ pnpm clean # or: npm run clean — rm -rf dist *.tsbuildinfo (run before r
|
|
|
106
115
|
|
|
107
116
|
See [CONTRIBUTING.md](CONTRIBUTING.md) for the full workflow.
|
|
108
117
|
|
|
118
|
+
### Desktop app
|
|
119
|
+
|
|
120
|
+
Prefer a double-click app over the CLI? AgentGem ships a native **desktop build**
|
|
121
|
+
for macOS, Windows, and Linux — download it from
|
|
122
|
+
[Releases](https://github.com/ninemindai/agentgem/releases) (a `desktop-v*` build).
|
|
123
|
+
It hosts the same local server in its own window, adds a native folder picker, app
|
|
124
|
+
menu, and system tray, and never sends secrets off your machine.
|
|
125
|
+
|
|
126
|
+
> The builds are currently **unsigned**: on macOS right-click → **Open**, on Windows
|
|
127
|
+
> choose **More info → Run anyway** the first time.
|
|
128
|
+
|
|
129
|
+
To run or package it from source, see the [desktop guide](docs/desktop.md) — in
|
|
130
|
+
short, `pnpm -C desktop dev` to run, `pnpm -C desktop dist` to build installers.
|
|
131
|
+
|
|
109
132
|
## Layering
|
|
110
133
|
|
|
111
134
|
Depends on AgentBack: `@agentback/core` (lifecycle), `@agentback/rest` +
|
|
@@ -116,8 +139,11 @@ API, and the MCP endpoint are three boundaries over one set of Zod contracts —
|
|
|
116
139
|
|
|
117
140
|
For deeper reference, see [`docs/`](docs/index.md):
|
|
118
141
|
[getting started](docs/getting-started.md) ·
|
|
142
|
+
[desktop app](docs/desktop.md) ·
|
|
143
|
+
[analyze](docs/analyze.md) ·
|
|
119
144
|
[concepts](docs/concepts.md) ·
|
|
120
145
|
[targets & deploy](docs/targets.md) ·
|
|
146
|
+
[A2A](docs/a2a.md) ·
|
|
121
147
|
[registry](docs/registry.md).
|
|
122
148
|
|
|
123
149
|
## License
|
|
@@ -0,0 +1,259 @@
|
|
|
1
|
+
// src/gem/acpRecommender.ts
|
|
2
|
+
//
|
|
3
|
+
// Turns a deterministic WorkflowSignal + inventory into a GemRecommendation by
|
|
4
|
+
// grounding a local ACP coding agent (Claude) with the signal and asking it to
|
|
5
|
+
// cluster/name/justify a Gem. The agent only ranks and explains — its output is
|
|
6
|
+
// re-validated against the inventory (the source of truth), and any failure
|
|
7
|
+
// degrades to a deterministic frequency-based recommendation. Never throws.
|
|
8
|
+
import { join } from "node:path";
|
|
9
|
+
import { agentgemHome } from "../resolveDir.js";
|
|
10
|
+
import { connectAcpAdapter } from "./acpSession.js";
|
|
11
|
+
// Instructions are a boolean on ProjectSelection, not a named include.
|
|
12
|
+
const SELECTABLE = ["skill", "mcp_server", "hook"];
|
|
13
|
+
// Pinned Claude ACP adapter (npm: @agentclientprotocol/claude-agent-acp).
|
|
14
|
+
export const CLAUDE_AGENT = { id: "claude-code", name: "Claude Code", command: ["claude-agent-acp"] };
|
|
15
|
+
// Neutral working dir for the recommender's ACP session. We do NOT open the
|
|
16
|
+
// session in the analyzed project, or claude-agent-acp would log a session
|
|
17
|
+
// transcript THERE — inflating that project's own session history (skewing
|
|
18
|
+
// future analyses and busting the per-project cache). The agent only reasons
|
|
19
|
+
// over the JSON brief, so its cwd is irrelevant to the result.
|
|
20
|
+
export function analysisWorkspace() { return join(agentgemHome(), ".agentgem", "analysis"); }
|
|
21
|
+
let testConnectFn = null;
|
|
22
|
+
/** Test-only seam: route recommendWorkflow + distillWorkflow through an in-process fake agent. */
|
|
23
|
+
export function setConnectFnForTests(fn) { testConnectFn = fn; }
|
|
24
|
+
/** The active test connect fn (or null). distillWorkflow shares this seam. */
|
|
25
|
+
export function currentTestConnectFn() { return testConnectFn; }
|
|
26
|
+
// ── Deterministic analysis (fallback + the agent's baseline) ─────────────────
|
|
27
|
+
// One frequency-based candidate. Multi-candidate splitting is the agent's value-add;
|
|
28
|
+
// the deterministic fallback stays a single coherent Gem.
|
|
29
|
+
export function deterministicAnalysis(signal) {
|
|
30
|
+
const include = [];
|
|
31
|
+
let includeInstructions = false;
|
|
32
|
+
for (const a of signal.artifacts) {
|
|
33
|
+
if (a.type === "instructions") {
|
|
34
|
+
if (a.invocations > 0)
|
|
35
|
+
includeInstructions = true;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
if (!SELECTABLE.includes(a.type))
|
|
39
|
+
continue;
|
|
40
|
+
if (a.invocations > 0 && a.confidence === "high")
|
|
41
|
+
include.push({ type: a.type, name: a.name, reason: `${a.invocations} use(s) across ${a.sessionsUsedIn} session(s)`, root: a.root });
|
|
42
|
+
}
|
|
43
|
+
const gaps = signal.unresolved.filter((u) => u.kind !== "builtin").map((u) => u.name);
|
|
44
|
+
const candidates = include.length ? [{
|
|
45
|
+
name: signal.root.split("/").pop() || "workflow",
|
|
46
|
+
description: `Recommended from ${signal.sessions.scanned} session(s) of usage.`,
|
|
47
|
+
root: signal.root,
|
|
48
|
+
includeInstructions,
|
|
49
|
+
include,
|
|
50
|
+
confidence: "medium",
|
|
51
|
+
}] : [];
|
|
52
|
+
return { candidates, gaps, distilled: [] };
|
|
53
|
+
}
|
|
54
|
+
/**
|
|
55
|
+
* Map a validated candidate to a GemSelection. Global artifacts (root===null)
|
|
56
|
+
* go top-level; project artifacts go under projects[root]; instructions are a
|
|
57
|
+
* project boolean. buildGem resolves both namespaces from introspectAll.
|
|
58
|
+
*/
|
|
59
|
+
export function recommendationToSelection(c) {
|
|
60
|
+
const sel = {};
|
|
61
|
+
const globalNames = (t) => c.include.filter((i) => i.type === t && i.root === null).map((i) => i.name);
|
|
62
|
+
const gSkills = globalNames("skill"), gMcp = globalNames("mcp_server"), gHooks = globalNames("hook");
|
|
63
|
+
if (gSkills.length)
|
|
64
|
+
sel.skills = gSkills;
|
|
65
|
+
if (gMcp.length)
|
|
66
|
+
sel.mcpServers = gMcp;
|
|
67
|
+
if (gHooks.length)
|
|
68
|
+
sel.hooks = gHooks;
|
|
69
|
+
const projects = {};
|
|
70
|
+
const ensure = (root) => (projects[root] ??= {});
|
|
71
|
+
for (const i of c.include) {
|
|
72
|
+
if (i.root === null)
|
|
73
|
+
continue;
|
|
74
|
+
const ps = ensure(i.root);
|
|
75
|
+
if (i.type === "skill")
|
|
76
|
+
(ps.skills ??= []).push(i.name);
|
|
77
|
+
else if (i.type === "mcp_server")
|
|
78
|
+
(ps.mcpServers ??= []).push(i.name);
|
|
79
|
+
else if (i.type === "hook")
|
|
80
|
+
(ps.hooks ??= []).push(i.name);
|
|
81
|
+
}
|
|
82
|
+
if (c.includeInstructions)
|
|
83
|
+
ensure(c.root).includeInstructions = true;
|
|
84
|
+
if (Object.keys(projects).length)
|
|
85
|
+
sel.projects = projects;
|
|
86
|
+
return sel;
|
|
87
|
+
}
|
|
88
|
+
// Pull the first {...} block out of an agent message that may wrap JSON in prose/fences.
|
|
89
|
+
function extractJson(text) {
|
|
90
|
+
const start = text.indexOf("{");
|
|
91
|
+
const end = text.lastIndexOf("}");
|
|
92
|
+
return start >= 0 && end > start ? text.slice(start, end + 1) : text;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Validate a raw agent response against the inventory. Each candidate's include
|
|
96
|
+
* names are checked against the inventory; hallucinated names are dropped
|
|
97
|
+
* (logged) and a candidate with no surviving includes is discarded. On any
|
|
98
|
+
* structural failure or zero valid candidates, fall back to the deterministic
|
|
99
|
+
* analysis. The inventory is authoritative.
|
|
100
|
+
*/
|
|
101
|
+
export function validateAnalysis(raw, inv, signal) {
|
|
102
|
+
const fallback = deterministicAnalysis(signal);
|
|
103
|
+
let obj = raw;
|
|
104
|
+
if (typeof raw === "string") {
|
|
105
|
+
try {
|
|
106
|
+
obj = JSON.parse(extractJson(raw));
|
|
107
|
+
}
|
|
108
|
+
catch {
|
|
109
|
+
return fallback;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (!obj || typeof obj !== "object" || !Array.isArray(obj.candidates))
|
|
113
|
+
return fallback;
|
|
114
|
+
const g = inv.global ?? { skills: [], mcpServers: [], hooks: [] };
|
|
115
|
+
// Resolve a name to its namespace: project root if present there, else global
|
|
116
|
+
// (null), else undefined (hallucinated). Project is preferred on collision.
|
|
117
|
+
const proj = {
|
|
118
|
+
skill: new Set(inv.project.skills.map((s) => s.name)),
|
|
119
|
+
mcp_server: new Set(inv.project.mcpServers.map((m) => m.name)),
|
|
120
|
+
hook: new Set(inv.project.hooks.map((h) => h.name)),
|
|
121
|
+
};
|
|
122
|
+
const glob = {
|
|
123
|
+
skill: new Set(g.skills.map((s) => s.name)),
|
|
124
|
+
mcp_server: new Set(g.mcpServers.map((m) => m.name)),
|
|
125
|
+
hook: new Set(g.hooks.map((h) => h.name)),
|
|
126
|
+
};
|
|
127
|
+
const resolveRoot = (type, name) => proj[type]?.has(name) ? inv.project.root : glob[type]?.has(name) ? null : undefined;
|
|
128
|
+
const candidates = [];
|
|
129
|
+
for (const c of obj.candidates) {
|
|
130
|
+
if (!c || typeof c !== "object" || !Array.isArray(c.include))
|
|
131
|
+
continue;
|
|
132
|
+
const include = [];
|
|
133
|
+
for (const it of c.include) {
|
|
134
|
+
if (!it || !SELECTABLE.includes(it.type) || typeof it.name !== "string")
|
|
135
|
+
continue;
|
|
136
|
+
const root = resolveRoot(it.type, it.name);
|
|
137
|
+
if (root === undefined) {
|
|
138
|
+
console.error(`workflow: dropping hallucinated ${it.type} '${it.name}'`);
|
|
139
|
+
continue;
|
|
140
|
+
}
|
|
141
|
+
include.push({ type: it.type, name: it.name, reason: typeof it.reason === "string" ? it.reason : "", root });
|
|
142
|
+
}
|
|
143
|
+
if (!include.length)
|
|
144
|
+
continue;
|
|
145
|
+
candidates.push({
|
|
146
|
+
name: typeof c.name === "string" ? c.name : (signal.root.split("/").pop() || "workflow"),
|
|
147
|
+
description: typeof c.description === "string" ? c.description : "",
|
|
148
|
+
root: signal.root,
|
|
149
|
+
includeInstructions: c.includeInstructions === true,
|
|
150
|
+
include,
|
|
151
|
+
confidence: ["high", "medium", "low"].includes(c.confidence) ? c.confidence : "medium",
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
if (!candidates.length)
|
|
155
|
+
return fallback;
|
|
156
|
+
const gaps = Array.isArray(obj.gaps) ? obj.gaps.filter((g) => typeof g === "string") : fallback.gaps;
|
|
157
|
+
return { candidates, gaps, distilled: [] };
|
|
158
|
+
}
|
|
159
|
+
// ── The agent run ────────────────────────────────────────────────────────────
|
|
160
|
+
const GROUNDING = (signalJson, inventoryJson) => `You recommend reusable "Gems" — bundles of installed artifacts for a recurring workflow.\n` +
|
|
161
|
+
`A project often exercises SEVERAL distinct flows (e.g. diagram generation vs web scraping). ` +
|
|
162
|
+
`Use the per-session "shapes" (sets of artifacts used together) plus co-occurrence to identify each ` +
|
|
163
|
+
`recurring flow, and propose ONE Gem per flow.\n` +
|
|
164
|
+
`The inventory has PROJECT artifacts (scoped to this repo) and GLOBAL artifacts (from the machine / ` +
|
|
165
|
+
`installed plugins). Include either by exact name — both get bundled into the Gem.\n` +
|
|
166
|
+
`USAGE SIGNAL (authoritative — invocation counts and shapes are facts):\n${signalJson}\n\n` +
|
|
167
|
+
`INVENTORY (the only artifacts that exist — never invent names outside this):\n${inventoryJson}\n\n` +
|
|
168
|
+
`Return ONLY a JSON object: {"candidates":[{"name","description","includeInstructions":bool,` +
|
|
169
|
+
`"include":[{"type":"skill"|"mcp_server"|"hook","name","reason"}],"confidence":"high"|"medium"|"low"}],"gaps":[string]}.\n` +
|
|
170
|
+
`Each candidate is one coherent flow. Prefer 1–4 candidates; don't split trivially or duplicate. Use exact inventory names.`;
|
|
171
|
+
// Skill bodies are large; send descriptions only. Global section is limited to
|
|
172
|
+
// artifacts that actually fired (the global catalog can be huge) — `usedGlobal`.
|
|
173
|
+
function trimInventory(inv, usedGlobal) {
|
|
174
|
+
const p = inv.project;
|
|
175
|
+
const g = inv.global ?? { skills: [], mcpServers: [], hooks: [] };
|
|
176
|
+
return {
|
|
177
|
+
projectRoot: p.root, name: p.name,
|
|
178
|
+
project: {
|
|
179
|
+
skills: p.skills.map((s) => ({ name: s.name, description: s.description ?? "" })),
|
|
180
|
+
mcpServers: p.mcpServers.map((m) => ({ name: m.name, transport: m.transport })),
|
|
181
|
+
instructions: p.instructions.map((i) => ({ name: i.name })),
|
|
182
|
+
hooks: p.hooks.map((h) => ({ name: h.name, event: h.event, matcher: h.matcher ?? null })),
|
|
183
|
+
},
|
|
184
|
+
global: {
|
|
185
|
+
skills: g.skills.filter((s) => usedGlobal.has(s.name)).map((s) => ({ name: s.name })),
|
|
186
|
+
mcpServers: g.mcpServers.filter((m) => usedGlobal.has(m.name)).map((m) => ({ name: m.name })),
|
|
187
|
+
hooks: g.hooks.filter((h) => usedGlobal.has(h.name)).map((h) => ({ name: h.name, event: h.event })),
|
|
188
|
+
},
|
|
189
|
+
};
|
|
190
|
+
}
|
|
191
|
+
function withTimeout(p, ms) {
|
|
192
|
+
return Promise.race([p, new Promise((_, rej) => setTimeout(() => rej(new Error(`agent timeout after ${ms}ms`)), ms))]);
|
|
193
|
+
}
|
|
194
|
+
/**
|
|
195
|
+
* Analyse `signal`/`inventory` into candidate Gems. Total: never throws. On any
|
|
196
|
+
* agent error/timeout/junk, returns the deterministic analysis with degraded:true.
|
|
197
|
+
*/
|
|
198
|
+
export async function recommendWorkflow(signal, inv, opts = {}) {
|
|
199
|
+
const connectFn = opts.connectFn ?? testConnectFn ?? defaultConnectFn;
|
|
200
|
+
const timeoutMs = opts.timeoutMs ?? 60_000;
|
|
201
|
+
let conn = null;
|
|
202
|
+
let handle = null;
|
|
203
|
+
try {
|
|
204
|
+
const usedGlobal = new Set(signal.artifacts.filter((a) => a.root === null && a.invocations > 0).map((a) => a.name));
|
|
205
|
+
const trimmedInv = trimInventory(inv, usedGlobal);
|
|
206
|
+
conn = await connectFn(CLAUDE_AGENT, null);
|
|
207
|
+
handle = await conn.ctx.open(analysisWorkspace()); // neutral cwd — don't pollute the project
|
|
208
|
+
await handle.setMode("plan"); // explicit — never edits files
|
|
209
|
+
const prompt = GROUNDING(JSON.stringify(signal), JSON.stringify(trimmedInv));
|
|
210
|
+
const text = await withTimeout(handle.promptText(prompt, opts.onDelta), timeoutMs);
|
|
211
|
+
return { analysis: validateAnalysis(text, inv, signal), degraded: false };
|
|
212
|
+
}
|
|
213
|
+
catch (err) {
|
|
214
|
+
console.error("workflow: recommender fell back to deterministic:", err.message);
|
|
215
|
+
return { analysis: deterministicAnalysis(signal), degraded: true };
|
|
216
|
+
}
|
|
217
|
+
finally {
|
|
218
|
+
try {
|
|
219
|
+
handle?.dispose();
|
|
220
|
+
}
|
|
221
|
+
catch { /* ignore */ }
|
|
222
|
+
try {
|
|
223
|
+
conn?.close();
|
|
224
|
+
}
|
|
225
|
+
catch { /* ignore */ }
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
/**
|
|
229
|
+
* Real connect: route through the shared adapter plumbing in plan mode with
|
|
230
|
+
* permissions auto-denied (the recommender must never run tools), aggregating
|
|
231
|
+
* only the agent's message text into a string.
|
|
232
|
+
*/
|
|
233
|
+
export const defaultConnectFn = async (descriptor) => {
|
|
234
|
+
const raw = await connectAcpAdapter(descriptor, { clientName: "agentgem-workflow-recommender", permission: "deny" });
|
|
235
|
+
const ctx = {
|
|
236
|
+
async open(cwd) {
|
|
237
|
+
const session = await raw.open(cwd);
|
|
238
|
+
return {
|
|
239
|
+
setMode: (mode) => session.setMode(mode),
|
|
240
|
+
async promptText(text, onDelta) {
|
|
241
|
+
let out = "";
|
|
242
|
+
await session.prompt(text, (u) => {
|
|
243
|
+
const update = u;
|
|
244
|
+
if (update?.sessionUpdate === "agent_message_chunk") {
|
|
245
|
+
const block = update.content;
|
|
246
|
+
if (block?.type === "text" && typeof block.text === "string") {
|
|
247
|
+
out += block.text;
|
|
248
|
+
onDelta?.(block.text);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
});
|
|
252
|
+
return out;
|
|
253
|
+
},
|
|
254
|
+
dispose: () => session.dispose(),
|
|
255
|
+
};
|
|
256
|
+
},
|
|
257
|
+
};
|
|
258
|
+
return { ctx, close: raw.close };
|
|
259
|
+
};
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
// src/gem/acpRun.ts
|
|
2
|
+
//
|
|
3
|
+
// Runs a materialized Gem by driving a locally-installed ACP coding agent (Claude)
|
|
4
|
+
// against a task, and captures what the agent DID — its message text plus the
|
|
5
|
+
// trace of tool invocations. This is the trust-inversion of acpRecommender:
|
|
6
|
+
// recommender runner (here)
|
|
7
|
+
// ─────────── ─────────────
|
|
8
|
+
// neutral analysisWorkspace() → the materialized testbed dir
|
|
9
|
+
// mode "plan" (never edits) → a tool-capable mode (agent uses the Gem)
|
|
10
|
+
// captures agent_message_chunk → also captures tool_call updates
|
|
11
|
+
//
|
|
12
|
+
// As with the recommender, the SDK details live behind a single connectFn seam so
|
|
13
|
+
// tests inject a plain fake. Unlike the recommender there is no deterministic
|
|
14
|
+
// fallback — a failed run is a real outcome the caller (e.g. verification) needs,
|
|
15
|
+
// so we never throw: failures surface as { ok:false, error }.
|
|
16
|
+
//
|
|
17
|
+
// NOTE (consolidation): the ACP façade is duplicated from acpRecommender on purpose
|
|
18
|
+
// while this path is prototyped. Once both are proven, the two connectFns should be
|
|
19
|
+
// unified into a shared acpSession module.
|
|
20
|
+
import { connectAcpAdapter } from "./acpSession.js";
|
|
21
|
+
import { selectRunBackend, envPermission } from "./sandbox.js"; // values used at call-time (safe ESM cycle)
|
|
22
|
+
export function createAccumulator() {
|
|
23
|
+
return { text: "", toolCalls: [] };
|
|
24
|
+
}
|
|
25
|
+
export function applyUpdate(acc, update, handlers) {
|
|
26
|
+
switch (update.sessionUpdate) {
|
|
27
|
+
case "agent_message_chunk": {
|
|
28
|
+
const block = update.content;
|
|
29
|
+
if (block?.type === "text" && typeof block.text === "string") {
|
|
30
|
+
acc.text += block.text;
|
|
31
|
+
handlers?.onDelta?.(block.text);
|
|
32
|
+
}
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
case "tool_call": {
|
|
36
|
+
if (!update.toolCallId)
|
|
37
|
+
return;
|
|
38
|
+
const tool = {
|
|
39
|
+
toolCallId: update.toolCallId,
|
|
40
|
+
title: update.title ?? "",
|
|
41
|
+
kind: update.kind,
|
|
42
|
+
status: update.status,
|
|
43
|
+
};
|
|
44
|
+
acc.toolCalls.push(tool);
|
|
45
|
+
handlers?.onToolCall?.(tool); // fires once, on start — final status lands in the result
|
|
46
|
+
return;
|
|
47
|
+
}
|
|
48
|
+
case "tool_call_update": {
|
|
49
|
+
const existing = acc.toolCalls.find((t) => t.toolCallId === update.toolCallId);
|
|
50
|
+
if (!existing)
|
|
51
|
+
return; // update for a tool we never saw start — ignore
|
|
52
|
+
if (update.status !== undefined)
|
|
53
|
+
existing.status = update.status;
|
|
54
|
+
if (update.kind !== undefined)
|
|
55
|
+
existing.kind = update.kind;
|
|
56
|
+
if (update.title !== undefined && update.title !== "")
|
|
57
|
+
existing.title = update.title;
|
|
58
|
+
return;
|
|
59
|
+
}
|
|
60
|
+
default:
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
// Pinned Claude ACP adapter, same binary the recommender spawns.
|
|
65
|
+
export const CLAUDE_RUN_AGENT = { id: "claude-code", name: "Claude Code", command: ["claude-agent-acp"] };
|
|
66
|
+
// The default non-plan mode: lets the agent actually invoke the Gem's tools. The
|
|
67
|
+
// recommender pins "plan"; the runner pins its counterpart so the trust-inversion
|
|
68
|
+
// is explicit rather than incidental.
|
|
69
|
+
export const DEFAULT_RUN_MODE = "default";
|
|
70
|
+
// Default prompt timeout. Generous — a real Gem run can drive the agent through
|
|
71
|
+
// several tool calls — but bounded so a wedged agent can't hang the caller.
|
|
72
|
+
export const DEFAULT_RUN_TIMEOUT_MS = 300_000;
|
|
73
|
+
function withTimeout(p, ms) {
|
|
74
|
+
return new Promise((resolve, reject) => {
|
|
75
|
+
const timer = setTimeout(() => reject(new Error(`agent run timed out after ${ms}ms`)), ms);
|
|
76
|
+
p.then((v) => { clearTimeout(timer); resolve(v); }, (e) => { clearTimeout(timer); reject(e); });
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
// Test seam: route runGemWithAgent through an in-process fake agent (mirrors
|
|
80
|
+
// acpRecommender.setConnectFnForTests). Lets the REST/SSE surface be exercised
|
|
81
|
+
// without spawning a real coding agent.
|
|
82
|
+
let testConnectFn = null;
|
|
83
|
+
export function setRunConnectFnForTests(fn) { testConnectFn = fn; }
|
|
84
|
+
/** True when a fake agent is injected — callers skip adapter resolution/fetch. */
|
|
85
|
+
export function hasTestConnectFn() { return testConnectFn !== null; }
|
|
86
|
+
/**
|
|
87
|
+
* Drive a local ACP agent against `task` inside the already-materialized `dir`,
|
|
88
|
+
* returning what it did. Never throws — connection/spawn failures come back as
|
|
89
|
+
* { ok:false, error }.
|
|
90
|
+
*/
|
|
91
|
+
export async function runGemWithAgent(opts) {
|
|
92
|
+
const explicit = opts.connectFn ?? testConnectFn;
|
|
93
|
+
const selected = explicit ? null : selectRunBackend(opts.dir);
|
|
94
|
+
const connectFn = explicit ?? selected.connectFn;
|
|
95
|
+
const sandbox = selected
|
|
96
|
+
? { backend: selected.backend.id, isolated: selected.backend.isolated }
|
|
97
|
+
: { backend: "injected", isolated: false };
|
|
98
|
+
const mode = opts.mode ?? DEFAULT_RUN_MODE;
|
|
99
|
+
const timeoutMs = opts.timeoutMs ?? DEFAULT_RUN_TIMEOUT_MS;
|
|
100
|
+
let conn = null;
|
|
101
|
+
let handle = null;
|
|
102
|
+
try {
|
|
103
|
+
conn = await connectFn(opts.descriptor ?? CLAUDE_RUN_AGENT, null);
|
|
104
|
+
handle = await conn.ctx.open(opts.dir); // the testbed dir — NOT a neutral one
|
|
105
|
+
await handle.setMode(mode); // tool-capable — the agent uses the Gem
|
|
106
|
+
const result = await withTimeout(handle.prompt(opts.task, opts.onDelta, opts.onToolCall), timeoutMs);
|
|
107
|
+
return { ok: true, result, sandbox };
|
|
108
|
+
}
|
|
109
|
+
catch (err) {
|
|
110
|
+
return { ok: false, result: { text: "", toolCalls: [] }, error: err.message, sandbox };
|
|
111
|
+
}
|
|
112
|
+
finally {
|
|
113
|
+
try {
|
|
114
|
+
handle?.dispose();
|
|
115
|
+
}
|
|
116
|
+
catch { /* ignore */ }
|
|
117
|
+
try {
|
|
118
|
+
conn?.close();
|
|
119
|
+
}
|
|
120
|
+
catch { /* ignore */ }
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* The shared run-session façade: connect the ACP adapter with an explicit permission
|
|
125
|
+
* policy and fold each update into a RunResult via applyUpdate, capturing the tool
|
|
126
|
+
* trace. Backends in sandbox.ts call this with a wrapped descriptor (isolated => "allow")
|
|
127
|
+
* or the raw descriptor (child-spawn => env policy).
|
|
128
|
+
*
|
|
129
|
+
* SECURITY: On the isolated path (macos-seatbelt / linux-bubblewrap), auto-allow is
|
|
130
|
+
* safe by default — the OS-native FS boundary bounds the blast radius to the run dir
|
|
131
|
+
* and temp. On the child-spawn fallback, permission is "deny" unless
|
|
132
|
+
* AGENTGEM_GEM_RUN_AUTOALLOW=1 is set (env escape hatch, retained for trusted local
|
|
133
|
+
* sessions). Combined with the loopback origin guard and the server-derived run dir,
|
|
134
|
+
* this keeps a malicious browser tab from driving a fully-permissioned local agent.
|
|
135
|
+
*/
|
|
136
|
+
export async function connectRunSession(descriptor, permission, _app) {
|
|
137
|
+
const raw = await connectAcpAdapter(descriptor, { clientName: "agentgem-gem-runner", permission });
|
|
138
|
+
const ctx = {
|
|
139
|
+
async open(cwd) {
|
|
140
|
+
const session = await raw.open(cwd);
|
|
141
|
+
return {
|
|
142
|
+
setMode: (mode) => session.setMode(mode),
|
|
143
|
+
async prompt(text, onDelta, onToolCall) {
|
|
144
|
+
const acc = createAccumulator();
|
|
145
|
+
await session.prompt(text, (u) => applyUpdate(acc, (u ?? {}), { onDelta, onToolCall }));
|
|
146
|
+
return acc;
|
|
147
|
+
},
|
|
148
|
+
dispose: () => session.dispose(),
|
|
149
|
+
};
|
|
150
|
+
},
|
|
151
|
+
};
|
|
152
|
+
return { ctx, close: raw.close };
|
|
153
|
+
}
|
|
154
|
+
// Back-compat: the unsandboxed child-spawn connect, env-gated via the single source of
|
|
155
|
+
// truth for the auto-allow flag (shared with sandbox.ts's child-spawn backend).
|
|
156
|
+
export const defaultRunConnectFn = (descriptor, app) => connectRunSession(descriptor, envPermission(), app);
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
// src/gem/acpSession.ts
|
|
2
|
+
//
|
|
3
|
+
// The shared ACP adapter plumbing used by BOTH the workflow recommender and the
|
|
4
|
+
// Gem runner: spawn the adapter binary, bridge stdio via the SDK, build a session,
|
|
5
|
+
// set its mode, and pump session updates until the turn stops. The two callers
|
|
6
|
+
// differ only in permission policy (deny vs allow) and how they fold updates
|
|
7
|
+
// (text-only string vs structured RunResult), so those stay in the callers — this
|
|
8
|
+
// module owns the boilerplate that was previously copy-pasted between them.
|
|
9
|
+
//
|
|
10
|
+
// NEEDS LIVE VALIDATION: stdio bridging against the real ACP adapter (covered by
|
|
11
|
+
// the runner + recommender live smokes, since both now route through here).
|
|
12
|
+
import { spawn } from "node:child_process";
|
|
13
|
+
import { mkdirSync } from "node:fs";
|
|
14
|
+
import { Readable, Writable } from "node:stream";
|
|
15
|
+
export async function connectAcpAdapter(descriptor, opts) {
|
|
16
|
+
const { client, ndJsonStream, PROTOCOL_VERSION } = await import("@agentclientprotocol/sdk");
|
|
17
|
+
const [bin, ...args] = descriptor.command;
|
|
18
|
+
const child = spawn(bin, args, { stdio: ["pipe", "pipe", "inherit"], env: process.env });
|
|
19
|
+
await new Promise((resolve, reject) => {
|
|
20
|
+
child.once("spawn", () => resolve());
|
|
21
|
+
child.once("error", (e) => reject(new Error(`failed to spawn ${bin}: ${e.message}`)));
|
|
22
|
+
});
|
|
23
|
+
const app = client({ name: opts.clientName });
|
|
24
|
+
const reply = opts.permission === "allow"
|
|
25
|
+
? { outcome: { outcome: "selected", optionId: "allow" } }
|
|
26
|
+
: { outcome: { outcome: "cancelled" } };
|
|
27
|
+
app.onRequest?.("session/request_permission", async () => reply);
|
|
28
|
+
const input = Readable.toWeb(child.stdout);
|
|
29
|
+
const output = Writable.toWeb(child.stdin);
|
|
30
|
+
const connection = app.connect(ndJsonStream(output, input));
|
|
31
|
+
const agentCtx = connection.agent;
|
|
32
|
+
// ACP requires an `initialize` handshake before any session/new. claude-agent-acp
|
|
33
|
+
// tolerated skipping it; codex-acp strictly rejects session/new with "Not
|
|
34
|
+
// initialized" (-32603) without it. We advertise no client capabilities we don't
|
|
35
|
+
// implement (no fs/terminal handlers) — both adapters write files directly.
|
|
36
|
+
await agentCtx.request("initialize", { protocolVersion: PROTOCOL_VERSION });
|
|
37
|
+
return {
|
|
38
|
+
async open(cwd) {
|
|
39
|
+
try {
|
|
40
|
+
mkdirSync(cwd, { recursive: true });
|
|
41
|
+
}
|
|
42
|
+
catch { /* best-effort */ }
|
|
43
|
+
const session = await agentCtx.buildSession(cwd).start();
|
|
44
|
+
const sessionId = session.sessionId;
|
|
45
|
+
return {
|
|
46
|
+
async setMode(mode) {
|
|
47
|
+
try {
|
|
48
|
+
await agentCtx.request("session/set_mode", { sessionId, modeId: mode });
|
|
49
|
+
}
|
|
50
|
+
catch { /* best-effort */ }
|
|
51
|
+
},
|
|
52
|
+
async prompt(text, onUpdate) {
|
|
53
|
+
void session.prompt(text);
|
|
54
|
+
for (;;) {
|
|
55
|
+
const msg = await session.nextUpdate();
|
|
56
|
+
if (msg.kind === "stop")
|
|
57
|
+
break;
|
|
58
|
+
if (msg.kind === "session_update")
|
|
59
|
+
onUpdate(msg.update);
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
dispose() { try {
|
|
63
|
+
session.dispose?.();
|
|
64
|
+
}
|
|
65
|
+
catch { /* ignore */ } },
|
|
66
|
+
};
|
|
67
|
+
},
|
|
68
|
+
close: () => {
|
|
69
|
+
try {
|
|
70
|
+
connection.close();
|
|
71
|
+
}
|
|
72
|
+
catch { /* ignore */ }
|
|
73
|
+
try {
|
|
74
|
+
child.kill();
|
|
75
|
+
}
|
|
76
|
+
catch { /* ignore */ }
|
|
77
|
+
},
|
|
78
|
+
};
|
|
79
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// src/gem/analysisCache.ts
|
|
2
|
+
//
|
|
3
|
+
// Per-project cache of the (expensive, ~15-20s) workflow analysis. Keyed by the
|
|
4
|
+
// project root and a transcript "token" that changes whenever a session is added
|
|
5
|
+
// or updated — so the cache stays valid until the project's sessions change, and
|
|
6
|
+
// revisiting a project to pick a different candidate is instant. Best-effort and
|
|
7
|
+
// persistent (~/.agentgem/analysis-cache.json); failures never throw.
|
|
8
|
+
import { readFileSync, writeFileSync, mkdirSync, statSync } from "node:fs";
|
|
9
|
+
import { join, dirname } from "node:path";
|
|
10
|
+
import { agentgemHome } from "../resolveDir.js";
|
|
11
|
+
const MAX_ENTRIES = 50;
|
|
12
|
+
function cachePath() { return join(agentgemHome(), ".agentgem", "analysis-cache.json"); }
|
|
13
|
+
// Bump on any change to what an analysis result contains (the token is otherwise
|
|
14
|
+
// content-blind). v2 = the payload now carries the `distilled` track, so v1 entries
|
|
15
|
+
// (which lack it) must not be served (proposal §8).
|
|
16
|
+
const TOKEN_VERSION = "v2";
|
|
17
|
+
/** A cheap validity token: version + transcript count + newest mtime. New/updated session → new token. */
|
|
18
|
+
export function transcriptToken(paths) {
|
|
19
|
+
let maxMs = 0;
|
|
20
|
+
for (const p of paths) {
|
|
21
|
+
try {
|
|
22
|
+
const m = statSync(p).mtimeMs;
|
|
23
|
+
if (m > maxMs)
|
|
24
|
+
maxMs = m;
|
|
25
|
+
}
|
|
26
|
+
catch { /* gone — ignore */ }
|
|
27
|
+
}
|
|
28
|
+
return `${TOKEN_VERSION}:${paths.length}:${Math.round(maxMs)}`;
|
|
29
|
+
}
|
|
30
|
+
function readAll() {
|
|
31
|
+
try {
|
|
32
|
+
const j = JSON.parse(readFileSync(cachePath(), "utf8"));
|
|
33
|
+
return Array.isArray(j) ? j : [];
|
|
34
|
+
}
|
|
35
|
+
catch {
|
|
36
|
+
return [];
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/** Cached result for (root, token), or null on miss/stale. */
|
|
40
|
+
export function readAnalysisCache(root, token) {
|
|
41
|
+
const e = readAll().find((x) => x.root === root && x.token === token);
|
|
42
|
+
return e ? e.result : null;
|
|
43
|
+
}
|
|
44
|
+
/** Store (root, token) → result, replacing any prior entry for root. Capped + best-effort. */
|
|
45
|
+
export function writeAnalysisCache(root, token, result, nowMs) {
|
|
46
|
+
try {
|
|
47
|
+
const all = readAll().filter((x) => x.root !== root);
|
|
48
|
+
all.push({ root, token, result, ts: nowMs });
|
|
49
|
+
all.sort((a, b) => b.ts - a.ts);
|
|
50
|
+
const path = cachePath();
|
|
51
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
52
|
+
writeFileSync(path, JSON.stringify(all.slice(0, MAX_ENTRIES)), "utf8");
|
|
53
|
+
}
|
|
54
|
+
catch { /* best-effort */ }
|
|
55
|
+
}
|
package/dist/gem/archive.js
CHANGED
|
@@ -86,6 +86,14 @@ export function writeGemArchive(gem, opts = {}) {
|
|
|
86
86
|
if (place(path, JSON.stringify(body, null, 2), a.name, "mcp_server"))
|
|
87
87
|
artifacts.push({ type: "mcp_server", name: a.name, path });
|
|
88
88
|
}
|
|
89
|
+
else if (a.type === "channel") {
|
|
90
|
+
const path = `channels/${withExt(seg, ".json")}`;
|
|
91
|
+
const body = { platform: a.platform, secretRefs: a.secretRefs };
|
|
92
|
+
if (a.description !== undefined)
|
|
93
|
+
body.description = a.description;
|
|
94
|
+
if (place(path, JSON.stringify(body, null, 2), a.name, "channel"))
|
|
95
|
+
artifacts.push({ type: "channel", name: a.name, path });
|
|
96
|
+
}
|
|
89
97
|
else {
|
|
90
98
|
const path = `hooks/${withExt(seg, ".json")}`;
|
|
91
99
|
const body = { event: a.event, config: a.config };
|
|
@@ -159,6 +167,15 @@ export function readGemArchive(files) {
|
|
|
159
167
|
a.secretRefs = o.secretRefs;
|
|
160
168
|
return a;
|
|
161
169
|
}
|
|
170
|
+
if (e.type === "channel") {
|
|
171
|
+
const o = JSON.parse(body(e.path));
|
|
172
|
+
const a = { type: "channel", name: e.name, platform: o.platform, secretRefs: o.secretRefs };
|
|
173
|
+
if (o.description !== undefined)
|
|
174
|
+
a.description = o.description;
|
|
175
|
+
return a;
|
|
176
|
+
}
|
|
177
|
+
if (e.type !== "hook")
|
|
178
|
+
throw new Error(`unknown artifact type '${e.type}' in manifest`);
|
|
162
179
|
const o = JSON.parse(body(e.path));
|
|
163
180
|
const a = { type: "hook", name: e.name, event: o.event, config: o.config };
|
|
164
181
|
if (o.matcher !== undefined)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
// src/gem/binPath.ts
|
|
2
|
+
// Leaf helper: is `binName` resolvable on the current PATH? Shared by the adapter
|
|
3
|
+
// resolver (runGem) and the sandbox backend availability checks (sandbox), so neither
|
|
4
|
+
// hard-codes an absolute install path for a tool that distros place in different dirs.
|
|
5
|
+
import { existsSync } from "node:fs";
|
|
6
|
+
import { delimiter, join } from "node:path";
|
|
7
|
+
export function binOnPath(binName) {
|
|
8
|
+
return (process.env.PATH ?? "").split(delimiter).some((d) => d && existsSync(join(d, binName)));
|
|
9
|
+
}
|
package/dist/gem/buildGem.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { redactMcpConfig } from "./redact.js";
|
|
2
|
+
import { makeChannelArtifact } from "./channels.js";
|
|
2
3
|
export function buildGem(inventory, selection, opts = {}) {
|
|
3
4
|
const artifacts = [];
|
|
4
5
|
const projects = inventory.projects ?? [];
|
|
@@ -68,9 +69,11 @@ export function buildGem(inventory, selection, opts = {}) {
|
|
|
68
69
|
});
|
|
69
70
|
artifacts.length = 0;
|
|
70
71
|
artifacts.push(...guarded);
|
|
72
|
+
for (const ch of opts.channels ?? [])
|
|
73
|
+
artifacts.push(makeChannelArtifact(ch.platform, ch.name));
|
|
71
74
|
const requiredSecrets = [];
|
|
72
75
|
for (const a of artifacts) {
|
|
73
|
-
if ((a.type === "mcp_server" || a.type === "hook") && a.secretRefs) {
|
|
76
|
+
if ((a.type === "mcp_server" || a.type === "hook" || a.type === "channel") && a.secretRefs) {
|
|
74
77
|
for (const ref of a.secretRefs)
|
|
75
78
|
requiredSecrets.push({ name: ref.name, artifact: a.name, location: ref.location });
|
|
76
79
|
}
|