chainlesschain 0.45.81 → 0.46.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,438 @@
1
+ /**
2
+ * Cowork Learning Engine — analyze historical runs to optimize template
3
+ * selection, surface failure patterns, and recommend templates for new
4
+ * prompts based on past outcomes.
5
+ *
6
+ * Reads `.chainlesschain/cowork/history.jsonl` produced by the runner.
7
+ * Records have shape:
8
+ * { taskId, status, templateId, templateName, result, userMessage, timestamp }
9
+ * where result = { summary, tokenCount, toolsUsed, iterationCount, artifacts }.
10
+ *
11
+ * All operations are pure/sync over the in-memory record list, making the
12
+ * module trivially testable with injected fs.
13
+ *
14
+ * @module cowork-learning
15
+ */
16
+
17
+ import {
18
+ existsSync,
19
+ readFileSync,
20
+ writeFileSync,
21
+ mkdirSync,
22
+ appendFileSync,
23
+ } from "node:fs";
24
+ import { join } from "node:path";
25
+
26
+ export const _deps = {
27
+ existsSync,
28
+ readFileSync,
29
+ writeFileSync,
30
+ mkdirSync,
31
+ appendFileSync,
32
+ now: () => new Date(),
33
+ };
34
+
35
+ /** Minimum historical runs before a template qualifies for a patch suggestion. */
36
+ export const MIN_RUNS_FOR_PATCH = 10;
37
+ /** Minimum distinct failures needed to trigger a suggestion. */
38
+ export const MIN_FAILURES_FOR_PATCH = 3;
39
+
40
+ // ─── Loading ─────────────────────────────────────────────────────────────────
41
+
42
+ /** Read the full history as an array. Returns [] if the file is missing. */
43
+ export function loadHistory(cwd) {
44
+ const file = join(cwd, ".chainlesschain", "cowork", "history.jsonl");
45
+ if (!_deps.existsSync(file)) return [];
46
+ const raw = _deps.readFileSync(file, "utf-8");
47
+ const out = [];
48
+ for (const line of raw.split("\n")) {
49
+ const trimmed = line.trim();
50
+ if (!trimmed) continue;
51
+ try {
52
+ out.push(JSON.parse(trimmed));
53
+ } catch (_e) {
54
+ // Skip malformed lines
55
+ }
56
+ }
57
+ return out;
58
+ }
59
+
60
+ // ─── Stats ───────────────────────────────────────────────────────────────────
61
+
62
+ /**
63
+ * Aggregate per-template stats across all runs.
64
+ *
65
+ * @returns {Array<{
66
+ * templateId: string,
67
+ * templateName: string,
68
+ * runs: number,
69
+ * successes: number,
70
+ * failures: number,
71
+ * successRate: number, // 0..1
72
+ * avgTokens: number,
73
+ * avgIterations: number,
74
+ * topTools: Array<{ tool: string, count: number }>,
75
+ * lastRunAt: string|null,
76
+ * }>}
77
+ */
78
+ export function computeTemplateStats(history) {
79
+ const groups = new Map();
80
+ for (const rec of history) {
81
+ const id = rec.templateId || "unknown";
82
+ if (!groups.has(id)) {
83
+ groups.set(id, {
84
+ templateId: id,
85
+ templateName: rec.templateName || id,
86
+ runs: 0,
87
+ successes: 0,
88
+ failures: 0,
89
+ totalTokens: 0,
90
+ totalIterations: 0,
91
+ toolCounts: new Map(),
92
+ lastRunAt: null,
93
+ });
94
+ }
95
+ const g = groups.get(id);
96
+ g.runs += 1;
97
+ if (rec.status === "completed") g.successes += 1;
98
+ else g.failures += 1;
99
+ const r = rec.result || {};
100
+ g.totalTokens += Number(r.tokenCount || 0);
101
+ g.totalIterations += Number(r.iterationCount || 0);
102
+ for (const t of r.toolsUsed || []) {
103
+ g.toolCounts.set(t, (g.toolCounts.get(t) || 0) + 1);
104
+ }
105
+ if (rec.timestamp && (!g.lastRunAt || rec.timestamp > g.lastRunAt)) {
106
+ g.lastRunAt = rec.timestamp;
107
+ }
108
+ }
109
+
110
+ const result = [];
111
+ for (const g of groups.values()) {
112
+ const topTools = [...g.toolCounts.entries()]
113
+ .sort((a, b) => b[1] - a[1])
114
+ .slice(0, 5)
115
+ .map(([tool, count]) => ({ tool, count }));
116
+ result.push({
117
+ templateId: g.templateId,
118
+ templateName: g.templateName,
119
+ runs: g.runs,
120
+ successes: g.successes,
121
+ failures: g.failures,
122
+ successRate: g.runs > 0 ? g.successes / g.runs : 0,
123
+ avgTokens: g.runs > 0 ? Math.round(g.totalTokens / g.runs) : 0,
124
+ avgIterations: g.runs > 0 ? +(g.totalIterations / g.runs).toFixed(1) : 0,
125
+ topTools,
126
+ lastRunAt: g.lastRunAt,
127
+ });
128
+ }
129
+ // Sort by runs desc, then successRate desc
130
+ result.sort((a, b) => b.runs - a.runs || b.successRate - a.successRate);
131
+ return result;
132
+ }
133
+
134
+ // ─── Recommendation ──────────────────────────────────────────────────────────
135
+
136
+ /**
137
+ * Tokenize a string into lowercased word tokens (Unicode-aware, keeps CJK).
138
+ * Splits on non-letter/digit/CJK characters.
139
+ */
140
+ function tokenize(text) {
141
+ if (!text || typeof text !== "string") return [];
142
+ const tokens = text
143
+ .toLowerCase()
144
+ .split(/[^\p{L}\p{N}]+/u)
145
+ .filter(Boolean);
146
+ return tokens;
147
+ }
148
+
149
+ /**
150
+ * Recommend the best template for a new user message based on history.
151
+ *
152
+ * Scoring: for each historical record of a successful run, count token
153
+ * overlap between its userMessage and the query. The template with the
154
+ * highest cumulative overlap × successRate wins.
155
+ *
156
+ * @param {string} userMessage
157
+ * @param {Array<object>} history
158
+ * @param {object} [options]
159
+ * @param {number} [options.minRuns] - Only consider templates with at least this many runs
160
+ * @returns {{ templateId: string, score: number, confidence: number, reasons: string[] } | null}
161
+ */
162
+ export function recommendTemplate(userMessage, history, options = {}) {
163
+ const { minRuns = 1 } = options;
164
+ const queryTokens = new Set(tokenize(userMessage));
165
+ if (queryTokens.size === 0) return null;
166
+
167
+ const stats = computeTemplateStats(history);
168
+ const statsById = new Map(stats.map((s) => [s.templateId, s]));
169
+
170
+ const scores = new Map(); // templateId -> cumulative overlap
171
+ for (const rec of history) {
172
+ if (rec.status !== "completed") continue;
173
+ const id = rec.templateId || "unknown";
174
+ const histTokens = tokenize(rec.userMessage || "");
175
+ let overlap = 0;
176
+ for (const t of histTokens) {
177
+ if (queryTokens.has(t)) overlap += 1;
178
+ }
179
+ if (overlap > 0) {
180
+ scores.set(id, (scores.get(id) || 0) + overlap);
181
+ }
182
+ }
183
+
184
+ let best = null;
185
+ for (const [templateId, overlap] of scores) {
186
+ const s = statsById.get(templateId);
187
+ if (!s || s.runs < minRuns) continue;
188
+ const finalScore = overlap * (0.5 + s.successRate / 2);
189
+ if (!best || finalScore > best.score) {
190
+ best = {
191
+ templateId,
192
+ score: +finalScore.toFixed(2),
193
+ confidence: +s.successRate.toFixed(2),
194
+ reasons: [
195
+ `${overlap} overlapping token(s) with past runs`,
196
+ `${s.successes}/${s.runs} past successes (${Math.round(s.successRate * 100)}%)`,
197
+ ],
198
+ };
199
+ }
200
+ }
201
+ return best;
202
+ }
203
+
204
+ // ─── Failure analysis ────────────────────────────────────────────────────────
205
+
206
+ /**
207
+ * Group failures by template and surface the most common failure summaries.
208
+ *
209
+ * @param {Array<object>} history
210
+ * @param {object} [options]
211
+ * @param {number} [options.limit] - Max examples per template
212
+ * @returns {Array<{
213
+ * templateId: string,
214
+ * templateName: string,
215
+ * failureCount: number,
216
+ * commonSummaries: Array<{ summary: string, count: number }>,
217
+ * examples: Array<{ taskId: string, userMessage: string, summary: string, timestamp: string }>,
218
+ * }>}
219
+ */
220
+ export function summarizeFailures(history, options = {}) {
221
+ const { limit = 3 } = options;
222
+ const groups = new Map();
223
+ for (const rec of history) {
224
+ if (rec.status === "completed") continue;
225
+ const id = rec.templateId || "unknown";
226
+ if (!groups.has(id)) {
227
+ groups.set(id, {
228
+ templateId: id,
229
+ templateName: rec.templateName || id,
230
+ failureCount: 0,
231
+ summaryCounts: new Map(),
232
+ examples: [],
233
+ });
234
+ }
235
+ const g = groups.get(id);
236
+ g.failureCount += 1;
237
+ const summary = (rec.result?.summary || "").slice(0, 200);
238
+ if (summary) {
239
+ g.summaryCounts.set(summary, (g.summaryCounts.get(summary) || 0) + 1);
240
+ }
241
+ if (g.examples.length < limit) {
242
+ g.examples.push({
243
+ taskId: rec.taskId,
244
+ userMessage: (rec.userMessage || "").slice(0, 200),
245
+ summary,
246
+ timestamp: rec.timestamp || "",
247
+ });
248
+ }
249
+ }
250
+
251
+ const out = [];
252
+ for (const g of groups.values()) {
253
+ const commonSummaries = [...g.summaryCounts.entries()]
254
+ .sort((a, b) => b[1] - a[1])
255
+ .slice(0, 5)
256
+ .map(([summary, count]) => ({ summary, count }));
257
+ out.push({
258
+ templateId: g.templateId,
259
+ templateName: g.templateName,
260
+ failureCount: g.failureCount,
261
+ commonSummaries,
262
+ examples: g.examples,
263
+ });
264
+ }
265
+ out.sort((a, b) => b.failureCount - a.failureCount);
266
+ return out;
267
+ }
268
+
269
+ // ─── N2: Feedback loop — suggest + apply prompt patches ──────────────────────
270
+
271
+ /**
272
+ * Classify suggestion confidence based on sample size and failure rate.
273
+ * Thresholds mirror the design doc in 87-cowork-evolution-n1-n7.md.
274
+ */
275
+ function _classifyConfidence(runs, failures) {
276
+ const rate = runs > 0 ? failures / runs : 0;
277
+ if (runs >= 30 && rate >= 0.4) return "high";
278
+ if (runs >= 20 && rate >= 0.25) return "medium";
279
+ return "low";
280
+ }
281
+
282
+ /**
283
+ * Extract short hint phrases from a batch of failure summaries — cheap
284
+ * heuristic without LLM. Picks the top N most-repeated normalized tokens
285
+ * longer than 3 chars (we use this to compose a human-readable patch body).
286
+ */
287
+ function _extractHintPhrases(summaries, maxHints = 3) {
288
+ const counts = new Map();
289
+ for (const s of summaries) {
290
+ const txt = (s?.summary || "").toLowerCase();
291
+ if (!txt) continue;
292
+ for (const word of txt.split(/[^\p{L}\p{N}]+/u)) {
293
+ if (word.length < 4) continue;
294
+ counts.set(word, (counts.get(word) || 0) + (s.count || 1));
295
+ }
296
+ }
297
+ return [...counts.entries()]
298
+ .sort((a, b) => b[1] - a[1])
299
+ .slice(0, maxHints)
300
+ .map(([w]) => w);
301
+ }
302
+
303
+ /**
304
+ * Build a prompt patch for one template based on its failure analysis.
305
+ * Returns null when below thresholds.
306
+ */
307
+ export function buildPatchForTemplate(statsEntry, failureEntry) {
308
+ if (!statsEntry || !failureEntry) return null;
309
+ if (statsEntry.runs < MIN_RUNS_FOR_PATCH) return null;
310
+ if (failureEntry.failureCount < MIN_FAILURES_FOR_PATCH) return null;
311
+
312
+ const hints = _extractHintPhrases(failureEntry.commonSummaries);
313
+ const confidence = _classifyConfidence(
314
+ statsEntry.runs,
315
+ failureEntry.failureCount,
316
+ );
317
+ const lines = [];
318
+ lines.push(
319
+ `Historical failure pattern detected (${failureEntry.failureCount}/${statsEntry.runs} runs failed).`,
320
+ );
321
+ if (hints.length > 0) {
322
+ lines.push(
323
+ `Common terms in failures: ${hints.join(", ")}. When relevant, double-check assumptions around these areas before proceeding.`,
324
+ );
325
+ }
326
+ return {
327
+ templateId: statsEntry.templateId,
328
+ templateName: statsEntry.templateName,
329
+ runs: statsEntry.runs,
330
+ failures: failureEntry.failureCount,
331
+ failureRate: +(failureEntry.failureCount / statsEntry.runs).toFixed(2),
332
+ confidence,
333
+ patch: lines.join(" "),
334
+ hints,
335
+ sampleSummaries: failureEntry.commonSummaries.slice(0, 3),
336
+ };
337
+ }
338
+
339
+ /**
340
+ * Scan history and return one suggested patch per qualifying template.
341
+ * Pure: never writes to disk. Call `applyPromptPatch` to persist.
342
+ *
343
+ * @param {Array<object>} history
344
+ * @returns {Array<{
345
+ * templateId, templateName, runs, failures, failureRate,
346
+ * confidence, patch, hints, sampleSummaries,
347
+ * }>}
348
+ */
349
+ export function suggestPromptPatch(history) {
350
+ const stats = computeTemplateStats(history);
351
+ const failures = summarizeFailures(history);
352
+ const failuresById = new Map(failures.map((f) => [f.templateId, f]));
353
+ const out = [];
354
+ for (const s of stats) {
355
+ const f = failuresById.get(s.templateId);
356
+ if (!f) continue;
357
+ const patch = buildPatchForTemplate(s, f);
358
+ if (patch) out.push(patch);
359
+ }
360
+ // Highest-confidence first, then biggest failure count
361
+ const order = { high: 3, medium: 2, low: 1 };
362
+ out.sort(
363
+ (a, b) =>
364
+ order[b.confidence] - order[a.confidence] || b.failures - a.failures,
365
+ );
366
+ return out;
367
+ }
368
+
369
+ function _userTemplatesDir(cwd) {
370
+ return join(cwd, ".chainlesschain", "cowork", "user-templates");
371
+ }
372
+
373
+ function _patchesLogFile(cwd) {
374
+ return join(cwd, ".chainlesschain", "cowork", "learning-patches.jsonl");
375
+ }
376
+
377
+ /**
378
+ * Load an existing user-override template JSON (or null if none).
379
+ */
380
+ export function loadUserTemplate(cwd, templateId) {
381
+ const file = join(_userTemplatesDir(cwd), `${templateId}.json`);
382
+ if (!_deps.existsSync(file)) return null;
383
+ try {
384
+ return JSON.parse(_deps.readFileSync(file, "utf-8"));
385
+ } catch (_e) {
386
+ return null;
387
+ }
388
+ }
389
+
390
+ /**
391
+ * Persist a patch to the user-templates layer and append an audit record.
392
+ * Never modifies the bundled templates. Always additive — existing patches
393
+ * for the same template are concatenated with a newline so history is
394
+ * preserved.
395
+ *
396
+ * @param {string} cwd
397
+ * @param {object} patch - output from `suggestPromptPatch`
398
+ * @returns {{ templateId, file, systemPromptExtension }}
399
+ */
400
+ export function applyPromptPatch(cwd, patch) {
401
+ if (!patch || !patch.templateId) {
402
+ throw new Error("patch must include templateId");
403
+ }
404
+ const dir = _userTemplatesDir(cwd);
405
+ _deps.mkdirSync(dir, { recursive: true });
406
+
407
+ const existing = loadUserTemplate(cwd, patch.templateId);
408
+ const prev = existing?.systemPromptExtension || "";
409
+ const extended = prev ? `${prev}\n\n${patch.patch}` : patch.patch;
410
+ const doc = {
411
+ templateId: patch.templateId,
412
+ templateName: patch.templateName,
413
+ systemPromptExtension: extended,
414
+ updatedAt: _deps.now().toISOString(),
415
+ };
416
+ const file = join(dir, `${patch.templateId}.json`);
417
+ _deps.writeFileSync(file, JSON.stringify(doc, null, 2), "utf-8");
418
+
419
+ // Audit trail — never pruned automatically
420
+ _deps.appendFileSync(
421
+ _patchesLogFile(cwd),
422
+ JSON.stringify({
423
+ appliedAt: _deps.now().toISOString(),
424
+ templateId: patch.templateId,
425
+ confidence: patch.confidence,
426
+ runs: patch.runs,
427
+ failures: patch.failures,
428
+ patch: patch.patch,
429
+ }) + "\n",
430
+ "utf-8",
431
+ );
432
+
433
+ return {
434
+ templateId: patch.templateId,
435
+ file,
436
+ systemPromptExtension: extended,
437
+ };
438
+ }
@@ -0,0 +1,182 @@
1
+ /**
2
+ * Cowork Template MCP Tools — mount a template's declared MCP servers and
3
+ * expose their tools to the sub-agent's LLM.
4
+ *
5
+ * A template can declare `mcpServers: [{ name, command, args, env, cwd }]`
6
+ * (same shape accepted by skill-mcp's validateMcpServerConfig). At task
7
+ * start we spawn an MCPClient, connect each server, list their tools, and
8
+ * build three parallel maps the agent-core runtime already consumes:
9
+ *
10
+ * - `extraToolDefinitions` — OpenAI-style function definitions appended
11
+ * to the tool list the LLM sees.
12
+ * - `externalToolDescriptors` — descriptor metadata keyed by tool name.
13
+ * - `externalToolExecutors` — { kind: "mcp", serverName, toolName } routing
14
+ * handles that agent-core's default-case switch dispatches through
15
+ * `mcpClient.callTool(serverName, toolName, args)`.
16
+ *
17
+ * Tool names are prefixed `mcp__<serverName>__<toolName>` to avoid collisions
18
+ * across servers and with built-in AGENT_TOOLS.
19
+ *
20
+ * @module cowork-mcp-tools
21
+ */
22
+ import { validateMcpServerConfig } from "./skill-mcp.js";
23
+
24
+ export const _deps = {
25
+ importMcpClient: async () => {
26
+ const mod = await import("../harness/mcp-client.js");
27
+ return mod.MCPClient;
28
+ },
29
+ };
30
+
31
+ /** Build the namespaced tool name used on the wire. */
32
+ export function buildToolName(serverName, toolName) {
33
+ return `mcp__${serverName}__${toolName}`;
34
+ }
35
+
36
+ /**
37
+ * Convert a single MCP tool (from tools/list) into the three shapes
38
+ * agent-core consumes.
39
+ *
40
+ * @param {string} serverName
41
+ * @param {{ name: string, description?: string, inputSchema?: object }} tool
42
+ * @returns {{ definition: object, descriptor: object, executor: object }}
43
+ */
44
+ export function toAgentTool(serverName, tool) {
45
+ const wireName = buildToolName(serverName, tool.name);
46
+ return {
47
+ definition: {
48
+ type: "function",
49
+ function: {
50
+ name: wireName,
51
+ description:
52
+ tool.description ||
53
+ `MCP tool "${tool.name}" from server "${serverName}"`,
54
+ parameters: tool.inputSchema || {
55
+ type: "object",
56
+ properties: {},
57
+ },
58
+ },
59
+ },
60
+ descriptor: {
61
+ name: wireName,
62
+ kind: "mcp",
63
+ category: "mcp",
64
+ source: "cowork-template-mcp",
65
+ serverName,
66
+ originalName: tool.name,
67
+ },
68
+ executor: {
69
+ kind: "mcp",
70
+ serverName,
71
+ toolName: tool.name,
72
+ },
73
+ };
74
+ }
75
+
76
+ /**
77
+ * Mount a template's MCP servers and expose their tools. Returns maps ready
78
+ * to hand to SubAgentContext + a cleanup() that disconnects all servers.
79
+ *
80
+ * Failures connecting individual servers are tolerated — the returned
81
+ * `skipped` array lists them with error messages. The whole call only
82
+ * throws if `template.mcpServers` is non-empty but validation produces zero
83
+ * valid configs (caller likely mis-configured the template).
84
+ *
85
+ * @param {{ mcpServers?: Array<object> }} template
86
+ * @param {object} [opts]
87
+ * @param {(msg: string, err?: Error) => void} [opts.onWarn]
88
+ * @returns {Promise<{
89
+ * mcpClient: object|null,
90
+ * mounted: string[],
91
+ * skipped: Array<{ name: string, error: string }>,
92
+ * extraToolDefinitions: Array<object>,
93
+ * externalToolDescriptors: Record<string, object>,
94
+ * externalToolExecutors: Record<string, object>,
95
+ * cleanup: () => Promise<void>,
96
+ * }>}
97
+ */
98
+ export async function mountTemplateMcpTools(template, opts = {}) {
99
+ const empty = {
100
+ mcpClient: null,
101
+ mounted: [],
102
+ skipped: [],
103
+ extraToolDefinitions: [],
104
+ externalToolDescriptors: {},
105
+ externalToolExecutors: {},
106
+ cleanup: async () => {},
107
+ };
108
+
109
+ const declared = Array.isArray(template?.mcpServers)
110
+ ? template.mcpServers
111
+ : [];
112
+ if (declared.length === 0) return empty;
113
+
114
+ const validated = declared
115
+ .map((entry) => validateMcpServerConfig(entry))
116
+ .filter(Boolean);
117
+ if (validated.length === 0) return empty;
118
+
119
+ const MCPClient = await _deps.importMcpClient();
120
+ const mcpClient = new MCPClient();
121
+ const mounted = [];
122
+ const skipped = [];
123
+ const extraToolDefinitions = [];
124
+ const externalToolDescriptors = {};
125
+ const externalToolExecutors = {};
126
+
127
+ for (const server of validated) {
128
+ try {
129
+ await mcpClient.connect(server.name, server);
130
+ mounted.push(server.name);
131
+ const tools = mcpClient.listTools(server.name);
132
+ for (const tool of tools) {
133
+ const { definition, descriptor, executor } = toAgentTool(
134
+ server.name,
135
+ tool,
136
+ );
137
+ extraToolDefinitions.push(definition);
138
+ externalToolDescriptors[definition.function.name] = descriptor;
139
+ externalToolExecutors[definition.function.name] = executor;
140
+ }
141
+ } catch (err) {
142
+ const message = err?.message || String(err);
143
+ skipped.push({ name: server.name, error: message });
144
+ if (typeof opts.onWarn === "function") {
145
+ opts.onWarn(
146
+ `[cowork-mcp] Failed to mount "${server.name}": ${message}`,
147
+ err,
148
+ );
149
+ }
150
+ }
151
+ }
152
+
153
+ const cleanup = async () => {
154
+ if (typeof mcpClient.disconnectAll === "function") {
155
+ try {
156
+ await mcpClient.disconnectAll();
157
+ return;
158
+ } catch (_e) {
159
+ // fall through to per-server disconnect
160
+ }
161
+ }
162
+ for (const name of mounted) {
163
+ try {
164
+ if (typeof mcpClient.disconnect === "function") {
165
+ await mcpClient.disconnect(name);
166
+ }
167
+ } catch (_e) {
168
+ // swallow — cleanup must not fail the task
169
+ }
170
+ }
171
+ };
172
+
173
+ return {
174
+ mcpClient,
175
+ mounted,
176
+ skipped,
177
+ extraToolDefinitions,
178
+ externalToolDescriptors,
179
+ externalToolExecutors,
180
+ cleanup,
181
+ };
182
+ }