kairn-cli 2.9.0 → 2.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/README.md +171 -483
  2. package/dist/cli.js +1952 -963
  3. package/dist/cli.js.map +1 -1
  4. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -181,6 +181,101 @@ var init_providers = __esm({
181
181
  }
182
182
  });
183
183
 
184
+ // src/compiler/agents/types.ts
185
+ function validatePlan(plan) {
186
+ if (plan == null || typeof plan !== "object") {
187
+ throw new Error("CompilationPlan must be a non-null object");
188
+ }
189
+ const obj = plan;
190
+ if (typeof obj["project_context"] !== "string") {
191
+ throw new Error(
192
+ "CompilationPlan.project_context must be a string"
193
+ );
194
+ }
195
+ if (obj["project_context"].length === 0) {
196
+ throw new Error(
197
+ "CompilationPlan.project_context must not be empty"
198
+ );
199
+ }
200
+ if (!Array.isArray(obj["phases"])) {
201
+ throw new Error("CompilationPlan.phases must be an array");
202
+ }
203
+ if (obj["phases"].length === 0) {
204
+ throw new Error(
205
+ "CompilationPlan.phases must contain at least one phase"
206
+ );
207
+ }
208
+ for (let pi = 0; pi < obj["phases"].length; pi++) {
209
+ validatePhase(obj["phases"][pi], pi);
210
+ }
211
+ return plan;
212
+ }
213
+ function validatePhase(phase, index) {
214
+ if (phase == null || typeof phase !== "object") {
215
+ throw new Error(`phases[${index}] must be a non-null object`);
216
+ }
217
+ const obj = phase;
218
+ if (typeof obj["id"] !== "string") {
219
+ throw new Error(`phases[${index}].id must be a string`);
220
+ }
221
+ if (!Array.isArray(obj["agents"])) {
222
+ throw new Error(`phases[${index}].agents must be an array`);
223
+ }
224
+ for (let ai = 0; ai < obj["agents"].length; ai++) {
225
+ validateAgentTask(obj["agents"][ai], index, ai);
226
+ }
227
+ if (!Array.isArray(obj["dependsOn"])) {
228
+ throw new Error(`phases[${index}].dependsOn must be an array`);
229
+ }
230
+ }
231
+ function validateAgentTask(task, phaseIndex, taskIndex) {
232
+ if (task == null || typeof task !== "object") {
233
+ throw new Error(
234
+ `phases[${phaseIndex}].agents[${taskIndex}] must be a non-null object`
235
+ );
236
+ }
237
+ const obj = task;
238
+ const prefix = `phases[${phaseIndex}].agents[${taskIndex}]`;
239
+ if (typeof obj["agent"] !== "string") {
240
+ throw new Error(`${prefix}.agent must be a string`);
241
+ }
242
+ if (!VALID_AGENT_NAMES.includes(obj["agent"])) {
243
+ throw new Error(
244
+ `${prefix}.agent "${obj["agent"]}" is not a valid agent name. Valid names: ${VALID_AGENT_NAMES.join(", ")}`
245
+ );
246
+ }
247
+ if (!Array.isArray(obj["items"])) {
248
+ throw new Error(`${prefix}.items must be an array`);
249
+ }
250
+ if (typeof obj["max_tokens"] !== "number") {
251
+ throw new Error(`${prefix}.max_tokens must be a number`);
252
+ }
253
+ }
254
+ var VALID_AGENT_NAMES, TruncationError;
255
+ var init_types = __esm({
256
+ "src/compiler/agents/types.ts"() {
257
+ "use strict";
258
+ VALID_AGENT_NAMES = [
259
+ "sections-writer",
260
+ "command-writer",
261
+ "agent-writer",
262
+ "rule-writer",
263
+ "doc-writer",
264
+ "skill-writer"
265
+ ];
266
+ TruncationError = class extends Error {
267
+ agentName;
268
+ tokensUsed;
269
+ constructor(message, options) {
270
+ super(message);
271
+ this.name = "TruncationError";
272
+ this.agentName = options.agentName;
273
+ this.tokensUsed = options.tokensUsed;
274
+ }
275
+ };
276
+ }
277
+ });
278
+
184
279
  // src/llm.ts
185
280
  import Anthropic2 from "@anthropic-ai/sdk";
186
281
  import OpenAI2 from "openai";
@@ -242,12 +337,22 @@ async function callLLM(config, userMessage, options) {
242
337
  system: cacheControl ? [{ type: "text", text: systemPrompt, cache_control: { type: "ephemeral" } }] : systemPrompt,
243
338
  messages
244
339
  });
340
+ if (response.stop_reason === "max_tokens") {
341
+ const agentLabel = options.agentName ?? "unknown";
342
+ throw new TruncationError(
343
+ `Response truncated at ${maxTokens} tokens. Agent: ${agentLabel}`,
344
+ { agentName: agentLabel, tokensUsed: maxTokens }
345
+ );
346
+ }
245
347
  const textBlock = response.content.find((block) => block.type === "text");
246
348
  if (!textBlock || textBlock.type !== "text") {
247
349
  throw new Error("No text response from compiler LLM");
248
350
  }
249
351
  return textBlock.text;
250
352
  } catch (err) {
353
+ if (err instanceof TruncationError) {
354
+ throw err;
355
+ }
251
356
  throw new Error(classifyError(err, providerName));
252
357
  }
253
358
  }
@@ -265,12 +370,22 @@ async function callLLM(config, userMessage, options) {
265
370
  ],
266
371
  ...jsonMode ? { response_format: { type: "json_object" } } : {}
267
372
  });
373
+ if (response.choices[0]?.finish_reason === "length") {
374
+ const agentLabel = options.agentName ?? "unknown";
375
+ throw new TruncationError(
376
+ `Response truncated at ${maxTokens} tokens. Agent: ${agentLabel}`,
377
+ { agentName: agentLabel, tokensUsed: maxTokens }
378
+ );
379
+ }
268
380
  const text = response.choices[0]?.message?.content;
269
381
  if (!text) {
270
382
  throw new Error("No text response from compiler LLM");
271
383
  }
272
384
  return text;
273
385
  } catch (err) {
386
+ if (err instanceof TruncationError) {
387
+ throw err;
388
+ }
274
389
  throw new Error(classifyError(err, providerName));
275
390
  }
276
391
  }
@@ -279,142 +394,409 @@ var init_llm = __esm({
279
394
  "use strict";
280
395
  init_providers();
281
396
  init_keychain();
397
+ init_types();
282
398
  }
283
399
  });
284
400
 
285
- // src/evolve/baseline.ts
286
- import fs16 from "fs/promises";
287
- import path16 from "path";
288
- async function snapshotBaseline(projectRoot, workspacePath) {
289
- const claudeDir = path16.join(projectRoot, ".claude");
290
- const baselineDir = path16.join(workspacePath, "baseline");
291
- const iter0Dir = path16.join(workspacePath, "iterations", "0", "harness");
292
- try {
293
- await fs16.access(claudeDir);
294
- } catch {
295
- throw new Error(`.claude/ directory not found in ${projectRoot}`);
401
+ // src/ir/types.ts
402
+ function createEmptyIR() {
403
+ return {
404
+ meta: {
405
+ name: "",
406
+ purpose: "",
407
+ techStack: { language: "" },
408
+ autonomyLevel: 2
409
+ },
410
+ sections: [],
411
+ commands: [],
412
+ rules: [],
413
+ agents: [],
414
+ skills: [],
415
+ docs: [],
416
+ hooks: [],
417
+ settings: createEmptySettings(),
418
+ mcpServers: [],
419
+ intents: []
420
+ };
421
+ }
422
+ function createEmptySettings() {
423
+ return { hooks: {}, raw: {} };
424
+ }
425
+ function createSection(id, heading, content, order) {
426
+ return { id, heading, content, order };
427
+ }
428
+ function createCommandNode(name, content, description) {
429
+ return { name, description: description ?? "", content };
430
+ }
431
+ function createRuleNode(name, content, paths) {
432
+ const node = { name, content };
433
+ if (paths !== void 0) {
434
+ node.paths = paths;
296
435
  }
297
- await copyDir(claudeDir, baselineDir);
298
- await copyDir(claudeDir, iter0Dir);
299
- const mcpJsonPath = path16.join(projectRoot, ".mcp.json");
300
- try {
301
- await fs16.access(mcpJsonPath);
302
- await fs16.copyFile(mcpJsonPath, path16.join(baselineDir, ".mcp.json"));
303
- await fs16.copyFile(mcpJsonPath, path16.join(iter0Dir, ".mcp.json"));
304
- } catch {
436
+ return node;
437
+ }
438
+ function createAgentNode(name, content, model) {
439
+ const node = { name, content };
440
+ if (model !== void 0) {
441
+ node.model = model;
305
442
  }
443
+ return node;
306
444
  }
307
- async function copyDir(src, dest) {
308
- await fs16.mkdir(dest, { recursive: true });
309
- const entries = await fs16.readdir(src, { withFileTypes: true });
310
- for (const entry of entries) {
311
- const srcPath = path16.join(src, entry.name);
312
- const destPath = path16.join(dest, entry.name);
313
- if (entry.isDirectory()) {
314
- await copyDir(srcPath, destPath);
315
- } else {
316
- await fs16.copyFile(srcPath, destPath);
445
+ function createEmptyDiff() {
446
+ return {
447
+ sections: {
448
+ added: [],
449
+ removed: [],
450
+ modified: [],
451
+ reordered: []
452
+ },
453
+ commands: {
454
+ added: [],
455
+ removed: [],
456
+ modified: []
457
+ },
458
+ rules: {
459
+ added: [],
460
+ removed: [],
461
+ modified: []
462
+ },
463
+ agents: {
464
+ added: [],
465
+ removed: [],
466
+ modified: []
467
+ },
468
+ mcpServers: {
469
+ added: [],
470
+ removed: []
471
+ },
472
+ settings: {
473
+ changes: []
317
474
  }
318
- }
475
+ };
319
476
  }
320
- var init_baseline = __esm({
321
- "src/evolve/baseline.ts"() {
477
+ var init_types2 = __esm({
478
+ "src/ir/types.ts"() {
322
479
  "use strict";
323
480
  }
324
481
  });
325
482
 
326
- // src/evolve/trace.ts
327
- import fs17 from "fs/promises";
328
- import path17 from "path";
329
- async function loadTrace(traceDir) {
330
- const stdout = await fs17.readFile(path17.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
331
- const stderr = await fs17.readFile(path17.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
332
- const filesChangedStr = await fs17.readFile(
333
- path17.join(traceDir, "files_changed.json"),
334
- "utf-8"
335
- ).catch(() => "{}");
336
- const timingStr = await fs17.readFile(
337
- path17.join(traceDir, "timing.json"),
338
- "utf-8"
339
- ).catch(() => "{}");
340
- const scoreStr = await fs17.readFile(
341
- path17.join(traceDir, "score.json"),
342
- "utf-8"
343
- ).catch(() => '{"pass": false}');
344
- const toolCallsStr = await fs17.readFile(
345
- path17.join(traceDir, "tool_calls.jsonl"),
346
- "utf-8"
347
- ).catch(() => "");
348
- const toolCalls = toolCallsStr.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
349
- const parentDir = path17.basename(path17.dirname(traceDir));
350
- const iteration = parseInt(parentDir, 10) || 0;
351
- return {
352
- taskId: path17.basename(traceDir),
353
- iteration,
354
- stdout,
355
- stderr,
356
- toolCalls,
357
- filesChanged: JSON.parse(filesChangedStr),
358
- score: JSON.parse(scoreStr),
359
- timing: JSON.parse(timingStr)
360
- };
361
- }
362
- async function loadIterationTraces(workspacePath, iteration) {
363
- const tracesDir = path17.join(workspacePath, "traces", iteration.toString());
364
- const traces = [];
365
- try {
366
- const taskDirs = await fs17.readdir(tracesDir);
367
- for (const taskId of taskDirs) {
368
- const trace = await loadTrace(path17.join(tracesDir, taskId));
369
- traces.push(trace);
483
+ // src/ir/renderer.ts
484
+ function renderClaudeMd(_meta, sections) {
485
+ const sorted = [...sections].sort((a, b) => a.order - b.order);
486
+ const blocks = [];
487
+ for (const section of sorted) {
488
+ if (section.heading && section.content) {
489
+ blocks.push(`${section.heading}
490
+
491
+ ${section.content}`);
492
+ } else if (section.heading) {
493
+ blocks.push(section.heading);
494
+ } else if (section.content) {
495
+ blocks.push(section.content);
370
496
  }
371
- } catch {
372
497
  }
373
- return traces;
374
- }
375
- async function writeTrace(traceDir, trace) {
376
- await fs17.mkdir(traceDir, { recursive: true });
377
- await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
378
- await fs17.writeFile(path17.join(traceDir, "stderr.log"), trace.stderr, "utf-8");
379
- const toolCallsLines = trace.toolCalls.map((tc) => JSON.stringify(tc)).join("\n");
380
- await fs17.writeFile(path17.join(traceDir, "tool_calls.jsonl"), toolCallsLines, "utf-8");
381
- await fs17.writeFile(
382
- path17.join(traceDir, "files_changed.json"),
383
- JSON.stringify(trace.filesChanged, null, 2),
384
- "utf-8"
385
- );
386
- await fs17.writeFile(
387
- path17.join(traceDir, "timing.json"),
388
- JSON.stringify(trace.timing, null, 2),
389
- "utf-8"
390
- );
391
- await fs17.writeFile(
392
- path17.join(traceDir, "score.json"),
393
- JSON.stringify(trace.score, null, 2),
394
- "utf-8"
395
- );
396
- }
397
- async function writeScore(traceDir, score) {
398
- await fs17.writeFile(
399
- path17.join(traceDir, "score.json"),
400
- JSON.stringify(score, null, 2),
401
- "utf-8"
402
- );
498
+ if (blocks.length === 0) {
499
+ return "\n";
500
+ }
501
+ return blocks.join("\n\n") + "\n";
403
502
  }
404
- async function writeIterationLog(workspacePath, log) {
405
- const iterDir = path17.join(workspacePath, "iterations", log.iteration.toString());
406
- await fs17.mkdir(iterDir, { recursive: true });
407
- await fs17.writeFile(
408
- path17.join(iterDir, "scores.json"),
409
- JSON.stringify({ score: log.score, taskResults: log.taskResults }, null, 2),
410
- "utf-8"
411
- );
412
- await fs17.writeFile(
413
- path17.join(iterDir, "proposer_reasoning.md"),
414
- log.proposal?.reasoning ?? "Baseline evaluation (no proposal)",
415
- "utf-8"
503
+ function renderSettings(settings) {
504
+ const result = JSON.parse(
505
+ JSON.stringify(settings.raw)
416
506
  );
417
- await fs17.writeFile(
507
+ if (settings.denyPatterns && settings.denyPatterns.length > 0) {
508
+ const permissions = result["permissions"] ?? {};
509
+ permissions["deny"] = settings.denyPatterns;
510
+ result["permissions"] = permissions;
511
+ }
512
+ if (settings.statusLine) {
513
+ result["statusLine"] = settings.statusLine;
514
+ }
515
+ const hookEvents = [
516
+ "PreToolUse",
517
+ "PostToolUse",
518
+ "UserPromptSubmit",
519
+ "SessionStart",
520
+ "PostCompact"
521
+ ];
522
+ const hooksObj = {};
523
+ let hasHooks = false;
524
+ for (const event of hookEvents) {
525
+ const entries = settings.hooks[event];
526
+ if (entries && entries.length > 0) {
527
+ hooksObj[event] = entries;
528
+ hasHooks = true;
529
+ }
530
+ }
531
+ if (hasHooks) {
532
+ result["hooks"] = hooksObj;
533
+ }
534
+ return JSON.stringify(result, null, 2) + "\n";
535
+ }
536
+ function renderMcpConfig(servers) {
537
+ if (servers.length === 0) {
538
+ return "";
539
+ }
540
+ const mcpServers = {};
541
+ for (const server of servers) {
542
+ const entry = {
543
+ command: server.command,
544
+ args: server.args
545
+ };
546
+ if (server.env && Object.keys(server.env).length > 0) {
547
+ entry["env"] = server.env;
548
+ }
549
+ mcpServers[server.id] = entry;
550
+ }
551
+ return JSON.stringify({ mcpServers }, null, 2) + "\n";
552
+ }
553
+ function renderRuleWithFrontmatter(rule) {
554
+ if (!rule.paths || rule.paths.length === 0) {
555
+ return rule.content;
556
+ }
557
+ const yamlLines = ["---", "paths:"];
558
+ for (const p of rule.paths) {
559
+ yamlLines.push(` - ${p}`);
560
+ }
561
+ yamlLines.push("---");
562
+ return yamlLines.join("\n") + "\n\n" + rule.content;
563
+ }
564
+ function renderAgentWithFrontmatter(agent) {
565
+ const hasModel = agent.model !== void 0;
566
+ const hasDisallowed = agent.disallowedTools !== void 0 && agent.disallowedTools.length > 0;
567
+ const hasRouting = agent.modelRouting !== void 0;
568
+ const hasExtra = agent.extraFrontmatter !== void 0 && Object.keys(agent.extraFrontmatter).length > 0;
569
+ if (!hasModel && !hasDisallowed && !hasRouting && !hasExtra) {
570
+ return agent.content;
571
+ }
572
+ const yamlLines = ["---"];
573
+ if (hasModel) {
574
+ yamlLines.push(`model: ${agent.model}`);
575
+ }
576
+ if (hasDisallowed) {
577
+ yamlLines.push("disallowedTools:");
578
+ for (const tool of agent.disallowedTools) {
579
+ yamlLines.push(` - ${tool}`);
580
+ }
581
+ }
582
+ if (hasRouting) {
583
+ yamlLines.push("modelRouting:");
584
+ yamlLines.push(` default: ${agent.modelRouting.default}`);
585
+ if (agent.modelRouting.escalateTo) {
586
+ yamlLines.push(` escalateTo: ${agent.modelRouting.escalateTo}`);
587
+ }
588
+ if (agent.modelRouting.escalateWhen) {
589
+ yamlLines.push(` escalateWhen: ${agent.modelRouting.escalateWhen}`);
590
+ }
591
+ }
592
+ if (hasExtra) {
593
+ for (const [key, value] of Object.entries(agent.extraFrontmatter)) {
594
+ if (Array.isArray(value)) {
595
+ yamlLines.push(`${key}:`);
596
+ for (const item of value) {
597
+ yamlLines.push(` - ${String(item)}`);
598
+ }
599
+ } else if (typeof value === "object" && value !== null) {
600
+ yamlLines.push(`${key}:`);
601
+ for (const [subKey, subVal] of Object.entries(value)) {
602
+ yamlLines.push(` ${subKey}: ${String(subVal)}`);
603
+ }
604
+ } else {
605
+ yamlLines.push(`${key}: ${String(value)}`);
606
+ }
607
+ }
608
+ }
609
+ yamlLines.push("---");
610
+ return yamlLines.join("\n") + "\n\n" + agent.content;
611
+ }
612
+ function settingsHasContent(settings) {
613
+ if (settings.statusLine) return true;
614
+ if (settings.denyPatterns && settings.denyPatterns.length > 0) return true;
615
+ if (Object.keys(settings.raw).length > 0) return true;
616
+ const hookEvents = [
617
+ "PreToolUse",
618
+ "PostToolUse",
619
+ "UserPromptSubmit",
620
+ "SessionStart",
621
+ "PostCompact"
622
+ ];
623
+ for (const event of hookEvents) {
624
+ const entries = settings.hooks[event];
625
+ if (entries && entries.length > 0) return true;
626
+ }
627
+ return false;
628
+ }
629
+ function renderHarness(ir) {
630
+ const files = /* @__PURE__ */ new Map();
631
+ if (ir.sections.length > 0 || ir.meta.name) {
632
+ files.set("CLAUDE.md", renderClaudeMd(ir.meta, ir.sections));
633
+ }
634
+ if (settingsHasContent(ir.settings)) {
635
+ files.set("settings.json", renderSettings(ir.settings));
636
+ }
637
+ for (const cmd of ir.commands) {
638
+ files.set(`commands/${cmd.name}.md`, cmd.content);
639
+ }
640
+ for (const rule of ir.rules) {
641
+ files.set(`rules/${rule.name}.md`, renderRuleWithFrontmatter(rule));
642
+ }
643
+ for (const agent of ir.agents) {
644
+ files.set(`agents/${agent.name}.md`, renderAgentWithFrontmatter(agent));
645
+ }
646
+ for (const skill of ir.skills) {
647
+ files.set(`skills/${skill.name}.md`, skill.content);
648
+ }
649
+ for (const doc of ir.docs) {
650
+ files.set(`docs/${doc.name}.md`, doc.content);
651
+ }
652
+ for (const hook of ir.hooks) {
653
+ files.set(`hooks/${hook.name}.mjs`, hook.content);
654
+ }
655
+ const mcpContent = renderMcpConfig(ir.mcpServers);
656
+ if (mcpContent) {
657
+ files.set(".mcp.json", mcpContent);
658
+ }
659
+ return files;
660
+ }
661
+ var init_renderer = __esm({
662
+ "src/ir/renderer.ts"() {
663
+ "use strict";
664
+ }
665
+ });
666
+
667
+ // src/evolve/baseline.ts
668
+ import fs16 from "fs/promises";
669
+ import path16 from "path";
670
+ async function snapshotBaseline(projectRoot, workspacePath) {
671
+ const claudeDir = path16.join(projectRoot, ".claude");
672
+ const baselineDir = path16.join(workspacePath, "baseline");
673
+ const iter0Dir = path16.join(workspacePath, "iterations", "0", "harness");
674
+ try {
675
+ await fs16.access(claudeDir);
676
+ } catch {
677
+ throw new Error(`.claude/ directory not found in ${projectRoot}`);
678
+ }
679
+ await copyDir(claudeDir, baselineDir);
680
+ await copyDir(claudeDir, iter0Dir);
681
+ const mcpJsonPath = path16.join(projectRoot, ".mcp.json");
682
+ try {
683
+ await fs16.access(mcpJsonPath);
684
+ await fs16.copyFile(mcpJsonPath, path16.join(baselineDir, ".mcp.json"));
685
+ await fs16.copyFile(mcpJsonPath, path16.join(iter0Dir, ".mcp.json"));
686
+ } catch {
687
+ }
688
+ }
689
+ async function copyDir(src, dest) {
690
+ await fs16.mkdir(dest, { recursive: true });
691
+ const entries = await fs16.readdir(src, { withFileTypes: true });
692
+ for (const entry of entries) {
693
+ const srcPath = path16.join(src, entry.name);
694
+ const destPath = path16.join(dest, entry.name);
695
+ if (entry.isDirectory()) {
696
+ await copyDir(srcPath, destPath);
697
+ } else {
698
+ await fs16.copyFile(srcPath, destPath);
699
+ }
700
+ }
701
+ }
702
+ var init_baseline = __esm({
703
+ "src/evolve/baseline.ts"() {
704
+ "use strict";
705
+ }
706
+ });
707
+
708
+ // src/evolve/trace.ts
709
+ import fs17 from "fs/promises";
710
+ import path17 from "path";
711
+ async function loadTrace(traceDir) {
712
+ const stdout = await fs17.readFile(path17.join(traceDir, "stdout.log"), "utf-8").catch(() => "");
713
+ const stderr = await fs17.readFile(path17.join(traceDir, "stderr.log"), "utf-8").catch(() => "");
714
+ const filesChangedStr = await fs17.readFile(
715
+ path17.join(traceDir, "files_changed.json"),
716
+ "utf-8"
717
+ ).catch(() => "{}");
718
+ const timingStr = await fs17.readFile(
719
+ path17.join(traceDir, "timing.json"),
720
+ "utf-8"
721
+ ).catch(() => "{}");
722
+ const scoreStr = await fs17.readFile(
723
+ path17.join(traceDir, "score.json"),
724
+ "utf-8"
725
+ ).catch(() => '{"pass": false}');
726
+ const toolCallsStr = await fs17.readFile(
727
+ path17.join(traceDir, "tool_calls.jsonl"),
728
+ "utf-8"
729
+ ).catch(() => "");
730
+ const toolCalls = toolCallsStr.split("\n").filter((line) => line.trim()).map((line) => JSON.parse(line));
731
+ const parentDir = path17.basename(path17.dirname(traceDir));
732
+ const iteration = parseInt(parentDir, 10) || 0;
733
+ return {
734
+ taskId: path17.basename(traceDir),
735
+ iteration,
736
+ stdout,
737
+ stderr,
738
+ toolCalls,
739
+ filesChanged: JSON.parse(filesChangedStr),
740
+ score: JSON.parse(scoreStr),
741
+ timing: JSON.parse(timingStr)
742
+ };
743
+ }
744
+ async function loadIterationTraces(workspacePath, iteration) {
745
+ const tracesDir = path17.join(workspacePath, "traces", iteration.toString());
746
+ const traces = [];
747
+ try {
748
+ const taskDirs = await fs17.readdir(tracesDir);
749
+ for (const taskId of taskDirs) {
750
+ const trace = await loadTrace(path17.join(tracesDir, taskId));
751
+ traces.push(trace);
752
+ }
753
+ } catch {
754
+ }
755
+ return traces;
756
+ }
757
+ async function writeTrace(traceDir, trace) {
758
+ await fs17.mkdir(traceDir, { recursive: true });
759
+ await fs17.writeFile(path17.join(traceDir, "stdout.log"), trace.stdout, "utf-8");
760
+ await fs17.writeFile(path17.join(traceDir, "stderr.log"), trace.stderr, "utf-8");
761
+ const toolCallsLines = trace.toolCalls.map((tc) => JSON.stringify(tc)).join("\n");
762
+ await fs17.writeFile(path17.join(traceDir, "tool_calls.jsonl"), toolCallsLines, "utf-8");
763
+ await fs17.writeFile(
764
+ path17.join(traceDir, "files_changed.json"),
765
+ JSON.stringify(trace.filesChanged, null, 2),
766
+ "utf-8"
767
+ );
768
+ await fs17.writeFile(
769
+ path17.join(traceDir, "timing.json"),
770
+ JSON.stringify(trace.timing, null, 2),
771
+ "utf-8"
772
+ );
773
+ await fs17.writeFile(
774
+ path17.join(traceDir, "score.json"),
775
+ JSON.stringify(trace.score, null, 2),
776
+ "utf-8"
777
+ );
778
+ }
779
+ async function writeScore(traceDir, score) {
780
+ await fs17.writeFile(
781
+ path17.join(traceDir, "score.json"),
782
+ JSON.stringify(score, null, 2),
783
+ "utf-8"
784
+ );
785
+ }
786
+ async function writeIterationLog(workspacePath, log) {
787
+ const iterDir = path17.join(workspacePath, "iterations", log.iteration.toString());
788
+ await fs17.mkdir(iterDir, { recursive: true });
789
+ await fs17.writeFile(
790
+ path17.join(iterDir, "scores.json"),
791
+ JSON.stringify({ score: log.score, taskResults: log.taskResults }, null, 2),
792
+ "utf-8"
793
+ );
794
+ await fs17.writeFile(
795
+ path17.join(iterDir, "proposer_reasoning.md"),
796
+ log.proposal?.reasoning ?? "Baseline evaluation (no proposal)",
797
+ "utf-8"
798
+ );
799
+ await fs17.writeFile(
418
800
  path17.join(iterDir, "mutation_diff.patch"),
419
801
  log.diffPatch ?? "",
420
802
  "utf-8"
@@ -1011,7 +1393,7 @@ function parseToolCalls(stdout) {
1011
1393
  return [];
1012
1394
  }
1013
1395
  }
1014
- async function runWithConcurrency(tasks, limit) {
1396
+ async function runWithConcurrency2(tasks, limit) {
1015
1397
  const results = new Array(tasks.length);
1016
1398
  const executing = /* @__PURE__ */ new Set();
1017
1399
  const errors = [];
@@ -1113,7 +1495,7 @@ async function evaluateAll(tasks, harnessPath, workspacePath, iteration, config,
1113
1495
  });
1114
1496
  return { id: task.id, score: finalScore };
1115
1497
  };
1116
- const taskResults = await runWithConcurrency(
1498
+ const taskResults = await runWithConcurrency2(
1117
1499
  tasks.map((task) => () => evaluateTask(task)),
1118
1500
  concurrency
1119
1501
  );
@@ -1480,147 +1862,65 @@ agent environment (.claude/ directory) based on execution traces from real tasks
1480
1862
  2. Execution traces: Full stdout/stderr, tool call sequences, file changes, and scores
1481
1863
  3. History: Previous iterations' proposals, diffs, and resulting score changes
1482
1864
 
1483
- ## Your Task
1484
- Analyze the traces to identify WHY tasks fail or underperform. Then propose specific,
1485
- minimal changes to the harness files that will fix those failures.
1486
-
1487
- ## Diagnosis Process
1488
- 1. For each failed/low-scoring task:
1489
- a. Read the full trace (stdout, tool calls, file changes)
1490
- b. Identify the ROOT CAUSE: bad instruction? Missing tool? Wrong rule?
1491
- c. Trace the failure back to a specific harness decision
1492
- d. Propose a fix
1493
-
1494
- 2. For each successful task:
1495
- a. Note what worked well
1496
- b. Ensure proposed changes don't break what's working
1497
-
1498
- 3. Check history for counterfactual evidence
1499
-
1500
- ## Available Mutation Actions
1501
- 1. **replace** \u2014 Replace old_text with new_text in a file: { "file": "...", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." }
1502
- 2. **add_section** \u2014 Append new content to a file (or create it): { "file": "...", "action": "add_section", "new_text": "...", "rationale": "..." }
1503
- 3. **create_file** \u2014 Create a new file: { "file": "...", "action": "create_file", "new_text": "...", "rationale": "..." }
1504
- 4. **delete_section** \u2014 Remove specific text from a file: { "file": "...", "action": "delete_section", "old_text": "...", "rationale": "..." }
1505
- 5. **delete_file** \u2014 Delete an entire file: { "file": "...", "action": "delete_file", "rationale": "..." }
1506
-
1507
- ## Output Format
1508
- Return a JSON object:
1509
- {
1510
- "reasoning": "Your full causal analysis...",
1511
- "mutations": [
1512
- { "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
1513
- { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." },
1514
- { "file": "rules/obsolete.md", "action": "delete_file", "rationale": "..." }
1515
- ],
1516
- "expected_impact": { "task-id": "+15% \u2014 explanation" }
1517
- }
1518
-
1519
- ## MCP Configuration
1520
- You can also mutate .mcp.json to add, remove, or reconfigure MCP servers.
1521
- Treat .mcp.json like any other harness file \u2014 propose changes when traces show
1522
- the agent lacks a tool it needs, or has tools that add noise without benefit.
1523
-
1524
- ## Rules
1525
- - Propose AT MOST 3 mutations per iteration. Fewer, targeted mutations are more stable than many broad ones.
1526
- - Each mutation must have a clear rationale tied to a specific trace observation.
1527
- - Never remove something that's working for another task.
1528
- - If a previous iteration's change caused a regression, REVERT it.
1529
- - Consider both additions AND removals. Remove sections that add noise without improving task performance.
1530
- - Bloated harnesses hurt performance \u2014 trim what isn't earning its keep.
1531
-
1532
- ## Anti-Gaming (CRITICAL)
1533
- - Mutations must improve GENERAL-PURPOSE development quality, not target specific eval criteria.
1534
- - You do NOT have access to scoring rubrics or expected outcomes. Diagnose problems from traces only.
1535
- - Do NOT add over-specified rules that restate existing conventions with stronger emphasis (e.g., changing "use chalk.green for success" to "MUST use chalk.green, no exceptions"). If a convention already exists, trust it.
1536
- - Do NOT add rules that only apply to a narrow eval scenario (e.g., write permissions for a specific directory just because one task needed it).
1537
- - Ask: "Would this mutation help a developer working on ANY task in this project?" If not, don't propose it.
1538
-
1539
- Return ONLY valid JSON.`;
1540
- STDOUT_TRUNCATION_LIMIT = 1e3;
1541
- MAX_CONTEXT_CHARS = 1e5;
1542
- }
1543
- });
1544
-
1545
- // src/ir/types.ts
1546
- function createEmptyIR() {
1547
- return {
1548
- meta: {
1549
- name: "",
1550
- purpose: "",
1551
- techStack: { language: "" },
1552
- autonomyLevel: 2
1553
- },
1554
- sections: [],
1555
- commands: [],
1556
- rules: [],
1557
- agents: [],
1558
- skills: [],
1559
- docs: [],
1560
- hooks: [],
1561
- settings: createEmptySettings(),
1562
- mcpServers: [],
1563
- intents: []
1564
- };
1565
- }
1566
- function createEmptySettings() {
1567
- return { hooks: {}, raw: {} };
1568
- }
1569
- function createSection(id, heading, content, order) {
1570
- return { id, heading, content, order };
1571
- }
1572
- function createCommandNode(name, content, description) {
1573
- return { name, description: description ?? "", content };
1574
- }
1575
- function createRuleNode(name, content, paths) {
1576
- const node = { name, content };
1577
- if (paths !== void 0) {
1578
- node.paths = paths;
1579
- }
1580
- return node;
1581
- }
1582
- function createAgentNode(name, content, model) {
1583
- const node = { name, content };
1584
- if (model !== void 0) {
1585
- node.model = model;
1586
- }
1587
- return node;
1588
- }
1589
- function createEmptyDiff() {
1590
- return {
1591
- sections: {
1592
- added: [],
1593
- removed: [],
1594
- modified: [],
1595
- reordered: []
1596
- },
1597
- commands: {
1598
- added: [],
1599
- removed: [],
1600
- modified: []
1601
- },
1602
- rules: {
1603
- added: [],
1604
- removed: [],
1605
- modified: []
1606
- },
1607
- agents: {
1608
- added: [],
1609
- removed: [],
1610
- modified: []
1611
- },
1612
- mcpServers: {
1613
- added: [],
1614
- removed: []
1615
- },
1616
- settings: {
1617
- changes: []
1618
- }
1619
- };
1865
+ ## Your Task
1866
+ Analyze the traces to identify WHY tasks fail or underperform. Then propose specific,
1867
+ minimal changes to the harness files that will fix those failures.
1868
+
1869
+ ## Diagnosis Process
1870
+ 1. For each failed/low-scoring task:
1871
+ a. Read the full trace (stdout, tool calls, file changes)
1872
+ b. Identify the ROOT CAUSE: bad instruction? Missing tool? Wrong rule?
1873
+ c. Trace the failure back to a specific harness decision
1874
+ d. Propose a fix
1875
+
1876
+ 2. For each successful task:
1877
+ a. Note what worked well
1878
+ b. Ensure proposed changes don't break what's working
1879
+
1880
+ 3. Check history for counterfactual evidence
1881
+
1882
+ ## Available Mutation Actions
1883
+ 1. **replace** \u2014 Replace old_text with new_text in a file: { "file": "...", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." }
1884
+ 2. **add_section** \u2014 Append new content to a file (or create it): { "file": "...", "action": "add_section", "new_text": "...", "rationale": "..." }
1885
+ 3. **create_file** \u2014 Create a new file: { "file": "...", "action": "create_file", "new_text": "...", "rationale": "..." }
1886
+ 4. **delete_section** \u2014 Remove specific text from a file: { "file": "...", "action": "delete_section", "old_text": "...", "rationale": "..." }
1887
+ 5. **delete_file** \u2014 Delete an entire file: { "file": "...", "action": "delete_file", "rationale": "..." }
1888
+
1889
+ ## Output Format
1890
+ Return a JSON object:
1891
+ {
1892
+ "reasoning": "Your full causal analysis...",
1893
+ "mutations": [
1894
+ { "file": "CLAUDE.md", "action": "replace", "old_text": "...", "new_text": "...", "rationale": "..." },
1895
+ { "file": "commands/develop.md", "action": "add_section", "new_text": "...", "rationale": "..." },
1896
+ { "file": "rules/obsolete.md", "action": "delete_file", "rationale": "..." }
1897
+ ],
1898
+ "expected_impact": { "task-id": "+15% \u2014 explanation" }
1620
1899
  }
1621
- var init_types = __esm({
1622
- "src/ir/types.ts"() {
1623
- "use strict";
1900
+
1901
+ ## MCP Configuration
1902
+ You can also mutate .mcp.json to add, remove, or reconfigure MCP servers.
1903
+ Treat .mcp.json like any other harness file \u2014 propose changes when traces show
1904
+ the agent lacks a tool it needs, or has tools that add noise without benefit.
1905
+
1906
+ ## Rules
1907
+ - Propose AT MOST 3 mutations per iteration. Fewer, targeted mutations are more stable than many broad ones.
1908
+ - Each mutation must have a clear rationale tied to a specific trace observation.
1909
+ - Never remove something that's working for another task.
1910
+ - If a previous iteration's change caused a regression, REVERT it.
1911
+ - Consider both additions AND removals. Remove sections that add noise without improving task performance.
1912
+ - Bloated harnesses hurt performance \u2014 trim what isn't earning its keep.
1913
+
1914
+ ## Anti-Gaming (CRITICAL)
1915
+ - Mutations must improve GENERAL-PURPOSE development quality, not target specific eval criteria.
1916
+ - You do NOT have access to scoring rubrics or expected outcomes. Diagnose problems from traces only.
1917
+ - Do NOT add over-specified rules that restate existing conventions with stronger emphasis (e.g., changing "use chalk.green for success" to "MUST use chalk.green, no exceptions"). If a convention already exists, trust it.
1918
+ - Do NOT add rules that only apply to a narrow eval scenario (e.g., write permissions for a specific directory just because one task needed it).
1919
+ - Ask: "Would this mutation help a developer working on ANY task in this project?" If not, don't propose it.
1920
+
1921
+ Return ONLY valid JSON.`;
1922
+ STDOUT_TRUNCATION_LIMIT = 1e3;
1923
+ MAX_CONTEXT_CHARS = 1e5;
1624
1924
  }
1625
1925
  });
1626
1926
 
@@ -1718,10 +2018,10 @@ function parseClaudeMd(content) {
1718
2018
  order: 0
1719
2019
  });
1720
2020
  for (let i = 1; i < chunks.length; i++) {
1721
- const chunk = chunks[i];
1722
- const newlineIdx = chunk.indexOf("\n");
1723
- const heading = newlineIdx >= 0 ? chunk.slice(0, newlineIdx).trim() : chunk.trim();
1724
- const sectionContent = newlineIdx >= 0 ? chunk.slice(newlineIdx + 1).trim() : "";
2021
+ const chunk3 = chunks[i];
2022
+ const newlineIdx = chunk3.indexOf("\n");
2023
+ const heading = newlineIdx >= 0 ? chunk3.slice(0, newlineIdx).trim() : chunk3.trim();
2024
+ const sectionContent = newlineIdx >= 0 ? chunk3.slice(newlineIdx + 1).trim() : "";
1725
2025
  const sectionId = resolveSectionId(heading);
1726
2026
  sections.push({
1727
2027
  id: sectionId,
@@ -2106,7 +2406,7 @@ var SECTION_ID_MAP;
2106
2406
  var init_parser = __esm({
2107
2407
  "src/ir/parser.ts"() {
2108
2408
  "use strict";
2109
- init_types();
2409
+ init_types2();
2110
2410
  SECTION_ID_MAP = [
2111
2411
  { pattern: /^(purpose|about|what)\b/i, id: "purpose" },
2112
2412
  { pattern: /^(tech\s*stack|technology|stack)\b/i, id: "tech-stack" },
@@ -2292,7 +2592,7 @@ var COMMANDS_PATH_RE, RULES_PATH_RE, AGENTS_PATH_RE;
2292
2592
  var init_translate = __esm({
2293
2593
  "src/ir/translate.ts"() {
2294
2594
  "use strict";
2295
- init_types();
2595
+ init_types2();
2296
2596
  init_parser();
2297
2597
  COMMANDS_PATH_RE = /^commands\/([^/]+?)(?:\.md)?$/;
2298
2598
  RULES_PATH_RE = /^rules\/([^/]+?)(?:\.md)?$/;
@@ -2503,205 +2803,21 @@ function applyIRMutation(ir, mutation) {
2503
2803
  ...ir,
2504
2804
  settings: applySettingsUpdate(ir.settings, mutation.path, mutation.value)
2505
2805
  };
2506
- }
2507
- // -- Raw text (legacy fallback) ----------------------------------------
2508
- case "raw_text": {
2509
- console.warn(
2510
- "raw_text mutation is a legacy fallback \u2014 the text operation will be applied during rendering"
2511
- );
2512
- return { ...ir };
2513
- }
2514
- }
2515
- }
2516
- var STRUCTURED_SETTINGS_KEYS;
2517
- var init_mutations = __esm({
2518
- "src/ir/mutations.ts"() {
2519
- "use strict";
2520
- STRUCTURED_SETTINGS_KEYS = /* @__PURE__ */ new Set(["statusLine", "hooks", "denyPatterns"]);
2521
- }
2522
- });
2523
-
2524
- // src/ir/renderer.ts
2525
- function renderClaudeMd(_meta, sections) {
2526
- const sorted = [...sections].sort((a, b) => a.order - b.order);
2527
- const blocks = [];
2528
- for (const section of sorted) {
2529
- if (section.heading && section.content) {
2530
- blocks.push(`${section.heading}
2531
-
2532
- ${section.content}`);
2533
- } else if (section.heading) {
2534
- blocks.push(section.heading);
2535
- } else if (section.content) {
2536
- blocks.push(section.content);
2537
- }
2538
- }
2539
- if (blocks.length === 0) {
2540
- return "\n";
2541
- }
2542
- return blocks.join("\n\n") + "\n";
2543
- }
2544
- function renderSettings(settings) {
2545
- const result = JSON.parse(
2546
- JSON.stringify(settings.raw)
2547
- );
2548
- if (settings.denyPatterns && settings.denyPatterns.length > 0) {
2549
- const permissions = result["permissions"] ?? {};
2550
- permissions["deny"] = settings.denyPatterns;
2551
- result["permissions"] = permissions;
2552
- }
2553
- if (settings.statusLine) {
2554
- result["statusLine"] = settings.statusLine;
2555
- }
2556
- const hookEvents = [
2557
- "PreToolUse",
2558
- "PostToolUse",
2559
- "UserPromptSubmit",
2560
- "SessionStart",
2561
- "PostCompact"
2562
- ];
2563
- const hooksObj = {};
2564
- let hasHooks = false;
2565
- for (const event of hookEvents) {
2566
- const entries = settings.hooks[event];
2567
- if (entries && entries.length > 0) {
2568
- hooksObj[event] = entries;
2569
- hasHooks = true;
2570
- }
2571
- }
2572
- if (hasHooks) {
2573
- result["hooks"] = hooksObj;
2574
- }
2575
- return JSON.stringify(result, null, 2) + "\n";
2576
- }
2577
- function renderMcpConfig(servers) {
2578
- if (servers.length === 0) {
2579
- return "";
2580
- }
2581
- const mcpServers = {};
2582
- for (const server of servers) {
2583
- const entry = {
2584
- command: server.command,
2585
- args: server.args
2586
- };
2587
- if (server.env && Object.keys(server.env).length > 0) {
2588
- entry["env"] = server.env;
2589
- }
2590
- mcpServers[server.id] = entry;
2591
- }
2592
- return JSON.stringify({ mcpServers }, null, 2) + "\n";
2593
- }
2594
- function renderRuleWithFrontmatter(rule) {
2595
- if (!rule.paths || rule.paths.length === 0) {
2596
- return rule.content;
2597
- }
2598
- const yamlLines = ["---", "paths:"];
2599
- for (const p of rule.paths) {
2600
- yamlLines.push(` - ${p}`);
2601
- }
2602
- yamlLines.push("---");
2603
- return yamlLines.join("\n") + "\n\n" + rule.content;
2604
- }
2605
- function renderAgentWithFrontmatter(agent) {
2606
- const hasModel = agent.model !== void 0;
2607
- const hasDisallowed = agent.disallowedTools !== void 0 && agent.disallowedTools.length > 0;
2608
- const hasRouting = agent.modelRouting !== void 0;
2609
- const hasExtra = agent.extraFrontmatter !== void 0 && Object.keys(agent.extraFrontmatter).length > 0;
2610
- if (!hasModel && !hasDisallowed && !hasRouting && !hasExtra) {
2611
- return agent.content;
2612
- }
2613
- const yamlLines = ["---"];
2614
- if (hasModel) {
2615
- yamlLines.push(`model: ${agent.model}`);
2616
- }
2617
- if (hasDisallowed) {
2618
- yamlLines.push("disallowedTools:");
2619
- for (const tool of agent.disallowedTools) {
2620
- yamlLines.push(` - ${tool}`);
2621
- }
2622
- }
2623
- if (hasRouting) {
2624
- yamlLines.push("modelRouting:");
2625
- yamlLines.push(` default: ${agent.modelRouting.default}`);
2626
- if (agent.modelRouting.escalateTo) {
2627
- yamlLines.push(` escalateTo: ${agent.modelRouting.escalateTo}`);
2628
- }
2629
- if (agent.modelRouting.escalateWhen) {
2630
- yamlLines.push(` escalateWhen: ${agent.modelRouting.escalateWhen}`);
2631
- }
2632
- }
2633
- if (hasExtra) {
2634
- for (const [key, value] of Object.entries(agent.extraFrontmatter)) {
2635
- if (Array.isArray(value)) {
2636
- yamlLines.push(`${key}:`);
2637
- for (const item of value) {
2638
- yamlLines.push(` - ${String(item)}`);
2639
- }
2640
- } else if (typeof value === "object" && value !== null) {
2641
- yamlLines.push(`${key}:`);
2642
- for (const [subKey, subVal] of Object.entries(value)) {
2643
- yamlLines.push(` ${subKey}: ${String(subVal)}`);
2644
- }
2645
- } else {
2646
- yamlLines.push(`${key}: ${String(value)}`);
2647
- }
2648
- }
2649
- }
2650
- yamlLines.push("---");
2651
- return yamlLines.join("\n") + "\n\n" + agent.content;
2652
- }
2653
- function settingsHasContent(settings) {
2654
- if (settings.statusLine) return true;
2655
- if (settings.denyPatterns && settings.denyPatterns.length > 0) return true;
2656
- if (Object.keys(settings.raw).length > 0) return true;
2657
- const hookEvents = [
2658
- "PreToolUse",
2659
- "PostToolUse",
2660
- "UserPromptSubmit",
2661
- "SessionStart",
2662
- "PostCompact"
2663
- ];
2664
- for (const event of hookEvents) {
2665
- const entries = settings.hooks[event];
2666
- if (entries && entries.length > 0) return true;
2667
- }
2668
- return false;
2669
- }
2670
- function renderHarness(ir) {
2671
- const files = /* @__PURE__ */ new Map();
2672
- if (ir.sections.length > 0 || ir.meta.name) {
2673
- files.set("CLAUDE.md", renderClaudeMd(ir.meta, ir.sections));
2674
- }
2675
- if (settingsHasContent(ir.settings)) {
2676
- files.set("settings.json", renderSettings(ir.settings));
2677
- }
2678
- for (const cmd of ir.commands) {
2679
- files.set(`commands/${cmd.name}.md`, cmd.content);
2680
- }
2681
- for (const rule of ir.rules) {
2682
- files.set(`rules/${rule.name}.md`, renderRuleWithFrontmatter(rule));
2683
- }
2684
- for (const agent of ir.agents) {
2685
- files.set(`agents/${agent.name}.md`, renderAgentWithFrontmatter(agent));
2686
- }
2687
- for (const skill of ir.skills) {
2688
- files.set(`skills/${skill.name}.md`, skill.content);
2689
- }
2690
- for (const doc of ir.docs) {
2691
- files.set(`docs/${doc.name}.md`, doc.content);
2692
- }
2693
- for (const hook of ir.hooks) {
2694
- files.set(`hooks/${hook.name}.mjs`, hook.content);
2695
- }
2696
- const mcpContent = renderMcpConfig(ir.mcpServers);
2697
- if (mcpContent) {
2698
- files.set(".mcp.json", mcpContent);
2806
+ }
2807
+ // -- Raw text (legacy fallback) ----------------------------------------
2808
+ case "raw_text": {
2809
+ console.warn(
2810
+ "raw_text mutation is a legacy fallback \u2014 the text operation will be applied during rendering"
2811
+ );
2812
+ return { ...ir };
2813
+ }
2699
2814
  }
2700
- return files;
2701
2815
  }
2702
- var init_renderer = __esm({
2703
- "src/ir/renderer.ts"() {
2816
+ var STRUCTURED_SETTINGS_KEYS;
2817
+ var init_mutations = __esm({
2818
+ "src/ir/mutations.ts"() {
2704
2819
  "use strict";
2820
+ STRUCTURED_SETTINGS_KEYS = /* @__PURE__ */ new Set(["statusLine", "hooks", "denyPatterns"]);
2705
2821
  }
2706
2822
  });
2707
2823
 
@@ -2964,7 +3080,7 @@ function deepEqual(a, b) {
2964
3080
  var init_diff = __esm({
2965
3081
  "src/ir/diff.ts"() {
2966
3082
  "use strict";
2967
- init_types();
3083
+ init_types2();
2968
3084
  }
2969
3085
  });
2970
3086
 
@@ -3590,7 +3706,8 @@ var init_targeting = __esm({
3590
3706
  "refactor": ["architecture", "conventions"],
3591
3707
  "test-writing": ["verification", "commands"],
3592
3708
  "config-change": ["settings", "mcp"],
3593
- "documentation": ["general"]
3709
+ "documentation": ["general"],
3710
+ "persistence-completion": ["commands", "verification"]
3594
3711
  };
3595
3712
  }
3596
3713
  });
@@ -4585,7 +4702,7 @@ function estimateTime(model, intent) {
4585
4702
  "qwen": 10
4586
4703
  };
4587
4704
  const basePerPass = Object.entries(perPass).find(([k]) => model.toLowerCase().includes(k))?.[1] ?? 20;
4588
- const totalBase = basePerPass * 2;
4705
+ const totalBase = basePerPass * 3;
4589
4706
  if (isComplex) {
4590
4707
  const low = Math.floor(totalBase * 1.5);
4591
4708
  const high = Math.floor(totalBase * 4);
@@ -4635,13 +4752,7 @@ function createProgressRenderer() {
4635
4752
  }
4636
4753
  currentPhase = "";
4637
4754
  } else if (progress.status === "warning") {
4638
- const lastIdx = lines.length - 1;
4639
- if (lastIdx >= 0) {
4640
- lines[lastIdx] = ` ${chalk.yellow("\u26A0")} ${progress.message}`;
4641
- }
4642
- currentPhase = progress.phase;
4643
- phaseStart = Date.now();
4644
- lines.push(` ${warmStone("\u25D0")} Retrying in concise mode... ${chalk.dim("[0s]")}`);
4755
+ lines.push(` ${chalk.yellow("\u26A0")} ${progress.message}`);
4645
4756
  }
4646
4757
  render();
4647
4758
  },
@@ -4909,7 +5020,7 @@ You must output a JSON object matching the SkeletonSpec schema.
4909
5020
  - MCP servers: maximum 6. Prefer fewer.
4910
5021
  - Skills: maximum 3. Only include directly relevant ones.
4911
5022
  - Agents: maximum 5. Orchestration pipeline (/develop) agents.
4912
- - Hooks: maximum 4 (auto-format, block-destructive, PostCompact, plus one contextual).
5023
+ - Hooks: maximum 5 (auto-format, block-destructive, PostCompact, memory-persistence, plus one contextual).
4913
5024
 
4914
5025
  If the workflow doesn't clearly need a tool, DO NOT include it.
4915
5026
  Each MCP server costs 500-2000 tokens of context window.
@@ -4937,16 +5048,17 @@ Return ONLY valid JSON matching this structure:
4937
5048
  \`\`\`
4938
5049
 
4939
5050
  Return ONLY valid JSON. No markdown fences. No text outside the JSON.`;
4940
- var HARNESS_PROMPT = `You are the Kairn harness compiler. Your job is to generate the full environment content from a project skeleton.
5051
+ var SYSTEM_PROMPT = `You are the Kairn environment compiler. Your job is to generate a minimal, optimal Claude Code agent environment from a user's natural language description of what they want their agent to do.
4941
5052
 
4942
5053
  You will receive:
4943
- 1. The skeleton (tool selections + project outline)
4944
- 2. The user's original intent
5054
+ 1. The user's intent (what they want to build/do)
5055
+ 2. A tool registry (available MCP servers, plugins, and hooks)
4945
5056
 
4946
- You must generate all harness content: CLAUDE.md, commands, rules, agents, skills, and docs.
5057
+ You must output a JSON object matching the EnvironmentSpec schema.
4947
5058
 
4948
5059
  ## Core Principles
4949
5060
 
5061
+ - **Minimalism over completeness.** Fewer, well-chosen tools beat many generic ones. Each MCP server costs 500-2000 context tokens.
4950
5062
  - **Workflow-specific, not generic.** Every instruction, command, and rule must relate to the user's actual workflow.
4951
5063
  - **Concise CLAUDE.md.** Under 150 lines. No generic text like "be helpful." Include build/test commands, reference docs/ and skills/.
4952
5064
  - **Security by default.** Always include deny rules for destructive commands and secret file access.
@@ -5069,494 +5181,1262 @@ Do not add generic filler. Every line must be specific to the user's workflow.
5069
5181
  17. A "Completion Standards" section in CLAUDE.md (never mark done without verifying: requirements met, tests passing, no debug artifacts, reviewed from 3 perspectives)
5070
5182
  18. A "Sprint Contract" section in CLAUDE.md (confirm acceptance criteria exist before implementing, verify each criterion after)
5071
5183
 
5072
- ## Shell-Integrated Commands
5184
+ ## Tool Selection Rules
5185
+
5186
+ - Only select tools directly relevant to the described workflow
5187
+ - Prefer free tools (auth: "none") when quality is comparable
5188
+ - Tier 1 tools (Context7, Sequential Thinking, security-guidance) should be included in most environments
5189
+ - For tools requiring API keys (auth: "api_key"), use \${ENV_VAR} syntax \u2014 never hardcode keys
5190
+ - Maximum 6-8 MCP servers to avoid context bloat
5191
+ - Include a \`reason\` for each selected tool explaining why it fits this workflow
5073
5192
 
5074
- Commands that reference live project state should use Claude Code's \`!\` prefix for shell output:
5193
+ ## Context Budget (STRICT)
5075
5194
 
5076
- \`\`\`markdown
5077
- # Example: .claude/commands/review.md
5078
- Review the staged changes for quality and security:
5195
+ - MCP servers: maximum 6. Prefer fewer.
5196
+ - CLAUDE.md: maximum 150 lines.
5197
+ - Rules: maximum 5 files, each under 20 lines.
5198
+ - Skills: maximum 3. Only include directly relevant ones.
5199
+ - Agents: maximum 5. Orchestration pipeline (/develop) agents.
5200
+ - Commands: no limit (loaded on demand, zero context cost).
5201
+ - Hooks: maximum 5 (auto-format, block-destructive, PostCompact, memory-persistence, plus one contextual).
5079
5202
 
5080
- !git diff --staged
5203
+ If the workflow doesn't clearly need a tool, DO NOT include it.
5204
+ Each MCP server costs 500-2000 tokens of context window.
5081
5205
 
5082
- Run tests and check for failures:
5206
+ ## Output Schema
5083
5207
 
5084
- !npm test 2>&1 | tail -20
5208
+ Return ONLY valid JSON matching this structure:
5085
5209
 
5086
- Focus on: security, error handling, test coverage.
5210
+ \`\`\`json
5211
+ {
5212
+ "name": "short-kebab-case-name",
5213
+ "description": "One-line description of the environment",
5214
+ "tools": [
5215
+ { "tool_id": "id-from-registry", "reason": "why this tool fits" }
5216
+ ],
5217
+ "harness": {
5218
+ "claude_md": "The full CLAUDE.md content (under 150 lines)",
5219
+ "settings": {
5220
+ "permissions": {
5221
+ "allow": ["Bash(npm run *)", "Read", "Write", "Edit"],
5222
+ "deny": ["Bash(rm -rf *)", "Bash(curl * | sh)", "Read(./.env)", "Read(./secrets/**)"]
5223
+ }
5224
+ },
5225
+ "mcp_config": {
5226
+ "server-name": { "command": "npx", "args": ["..."], "env": {} }
5227
+ },
5228
+ "commands": {
5229
+ "help": "markdown content for /project:help",
5230
+ "develop": "markdown content for /project:develop",
5231
+ "persist": "markdown content for /project:persist"
5232
+ },
5233
+ "rules": {
5234
+ "continuity": "markdown content for continuity rule",
5235
+ "security": "markdown content for security rule"
5236
+ },
5237
+ "skills": {
5238
+ "skill-name/SKILL": "markdown content with YAML frontmatter"
5239
+ },
5240
+ "agents": {
5241
+ "architect": "agent markdown with YAML frontmatter",
5242
+ "planner": "agent markdown with YAML frontmatter",
5243
+ "implementer": "agent markdown with YAML frontmatter",
5244
+ "fixer": "agent markdown with YAML frontmatter",
5245
+ "doc-updater": "agent markdown with YAML frontmatter"
5246
+ },
5247
+ "docs": {
5248
+ "DECISIONS": "# Decisions\\n\\nArchitectural decisions.",
5249
+ "LEARNINGS": "# Learnings\\n\\nNon-obvious discoveries.",
5250
+ "SPRINT": "# Sprint\\n\\nLiving spec and plan."
5251
+ }
5252
+ }
5253
+ }
5087
5254
  \`\`\`
5088
5255
 
5089
- Use \`!\` when a command needs: git status, test results, build output, or file listings.
5256
+ Do not include any text outside the JSON object. Do not wrap in markdown code fences.`;
5257
+ var CLARIFICATION_PROMPT = `You are helping a user define their project for environment compilation.
5090
5258
 
5091
- ## Path-Scoped Rules
5259
+ Given their initial description, generate 3-5 clarifying questions to understand:
5260
+ 1. Language and framework
5261
+ 2. What the project specifically does (be precise)
5262
+ 3. Primary workflow (build, research, write, analyze?)
5263
+ 4. Key dependencies or integrations
5264
+ 5. Target audience
5092
5265
 
5093
- For code projects with multiple domains (API, frontend, tests), generate path-scoped rules using YAML frontmatter:
5266
+ For each question, provide a reasonable suggestion based on the description.
5094
5267
 
5095
- \`\`\`markdown
5096
- # Example: rules/api.md
5097
- ---
5098
- paths:
5099
- - "src/api/**"
5100
- - "src/routes/**"
5101
- ---
5102
- - All handlers return { data, error } shape
5103
- - Use Zod for request validation
5104
- - Log errors with request ID context
5105
- \`\`\`
5268
+ Output ONLY a JSON array:
5269
+ [
5270
+ { "question": "Language/framework?", "suggestion": "TypeScript + Node.js" },
5271
+ ...
5272
+ ]
5106
5273
 
5107
- \`\`\`markdown
5108
- # Example: rules/testing.md
5109
- ---
5110
- paths:
5111
- - "tests/**"
5112
- - "**/*.test.*"
5113
- - "**/*.spec.*"
5114
- ---
5115
- - Use AAA pattern: Arrange-Act-Assert
5116
- - One assertion per test when possible
5117
- - Mock external dependencies, never real APIs
5118
- \`\`\`
5274
+ Rules:
5275
+ - Suggestions should be reasonable guesses, clearly marked as suggestions
5276
+ - Keep questions short (under 10 words)
5277
+ - Maximum 5 questions
5278
+ - If the description is already very detailed, ask fewer questions`;
5119
5279
 
5120
- Keep \`security.md\` and \`continuity.md\` as unconditional (no paths frontmatter).
5121
- Only generate scoped rules when the workflow involves multiple code domains.
5280
+ // src/registry/loader.ts
5281
+ import fs3 from "fs/promises";
5282
+ import path3 from "path";
5283
+ import { fileURLToPath as fileURLToPath2 } from "url";
5284
+ var __filename2 = fileURLToPath2(import.meta.url);
5285
+ var __dirname2 = path3.dirname(__filename2);
5286
+ async function loadBundledRegistry() {
5287
+ const candidates = [
5288
+ path3.resolve(__dirname2, "../registry/tools.json"),
5289
+ path3.resolve(__dirname2, "../src/registry/tools.json"),
5290
+ path3.resolve(__dirname2, "../../src/registry/tools.json")
5291
+ ];
5292
+ for (const candidate of candidates) {
5293
+ try {
5294
+ const data = await fs3.readFile(candidate, "utf-8");
5295
+ return JSON.parse(data);
5296
+ } catch {
5297
+ continue;
5298
+ }
5299
+ }
5300
+ throw new Error("Could not find tools.json registry");
5301
+ }
5302
+ async function loadUserRegistry() {
5303
+ try {
5304
+ const data = await fs3.readFile(getUserRegistryPath(), "utf-8");
5305
+ return JSON.parse(data);
5306
+ } catch {
5307
+ return [];
5308
+ }
5309
+ }
5310
+ async function saveUserRegistry(tools) {
5311
+ await fs3.writeFile(getUserRegistryPath(), JSON.stringify(tools, null, 2), "utf-8");
5312
+ }
5313
+ async function loadRegistry() {
5314
+ const bundled = await loadBundledRegistry();
5315
+ const user = await loadUserRegistry();
5316
+ if (user.length === 0) return bundled;
5317
+ const merged = /* @__PURE__ */ new Map();
5318
+ for (const tool of bundled) {
5319
+ merged.set(tool.id, tool);
5320
+ }
5321
+ for (const tool of user) {
5322
+ merged.set(tool.id, tool);
5323
+ }
5324
+ return Array.from(merged.values());
5325
+ }
5122
5326
 
5123
- ## Hooks
5327
+ // src/compiler/compile.ts
5328
+ init_providers();
5329
+ init_llm();
5124
5330
 
5125
- Generate hooks in settings.json based on project type:
5331
+ // src/compiler/plan.ts
5332
+ init_llm();
5333
+ init_types();
5334
+ var ORCHESTRATOR_PROMPT = `You are the Kairn compilation planner. Given a project skeleton and user intent, produce a CompilationPlan JSON that determines what to generate and in what order.
5335
+
5336
+ ## Agent Types
5337
+ - sections-writer: generates CLAUDE.md sections (Purpose, Tech Stack, Commands, Architecture, Conventions, Key Commands, Output, Verification, Known Gotchas, Debugging, Git Workflow, Engineering Standards)
5338
+ - rule-writer: generates .claude/rules/ files (security, continuity, plus project-specific)
5339
+ - doc-writer: generates .claude/docs/ files (DECISIONS, LEARNINGS, SPRINT)
5340
+ - command-writer: generates .claude/commands/ files (help, build, test, status, fix, develop, sprint, spec, prove, grill, persist, etc.)
5341
+ - agent-writer: generates .claude/agents/ files (architect, planner, implementer, fixer, doc-updater, qa-orchestrator, linter, e2e-tester)
5342
+ - skill-writer: generates .claude/skills/ files (tdd, etc.)
5343
+
5344
+ ## Phase Rules
5345
+ - Phase A (no dependencies): sections-writer, rule-writer, doc-writer
5346
+ - Phase B (depends on Phase A): command-writer, agent-writer, skill-writer (optional)
5347
+ - Phase C (depends on Phase B): reserved for linker (NOT included in plan \u2014 it runs separately)
5348
+
5349
+ ## Token Budgets
5350
+ - sections-writer: 4096, command-writer: 4096, agent-writer: 4096
5351
+ - rule-writer: 2048, doc-writer: 2048, skill-writer: 2048
5126
5352
 
5127
- **All code projects** \u2014 block destructive commands, credential leaks, injection, and network exfiltration:
5128
- \`\`\`json
5353
+ ## Output Format
5354
+ Return ONLY valid JSON:
5129
5355
  {
5130
- "hooks": {
5131
- "PreToolUse": [
5132
- {
5133
- "matcher": "Bash",
5134
- "hooks": [{
5135
- "type": "command",
5136
- "command": "CMD=$(cat | jq -r '.tool_input.command // empty') && echo \\"$CMD\\" | grep -qiE 'rm\\\\s+-rf\\\\s+/|DROP\\\\s+(TABLE|DATABASE)|curl.*\\\\|\\\\s*sh|:(){ :|:& };:|git\\\\s+push.*--force(?!-with-lease)|ch(mod|own).*-R\\\\s+/|npm\\\\s+publish(?!.*--dry-run)|(api[_-]?key|secret|token|password)\\\\s*[:=]|AKIA[0-9A-Z]{16}|BEGIN.*PRIVATE\\\\s+KEY|;\\\\s*(DROP|DELETE|ALTER|TRUNCATE)\\\\s+|\\\\.\\\\./\\\\.\\\\./\\\\.\\\\./|nc\\\\s+.*-e|/dev/tcp/|bash\\\\s+-i|curl.*-d.*@|wget.*--post-file' && echo 'Blocked dangerous command' >&2 && exit 2 || true"
5137
- }]
5356
+ "project_context": "2-3 sentence project summary",
5357
+ "phases": [
5358
+ {
5359
+ "id": "phase-a",
5360
+ "agents": [
5361
+ { "agent": "sections-writer", "items": ["purpose", "tech-stack", "commands", ...], "max_tokens": 4096 },
5362
+ { "agent": "rule-writer", "items": ["security", "continuity", ...], "max_tokens": 2048 },
5363
+ { "agent": "doc-writer", "items": ["DECISIONS", "LEARNINGS", "SPRINT"], "max_tokens": 2048 }
5364
+ ],
5365
+ "dependsOn": []
5366
+ },
5367
+ {
5368
+ "id": "phase-b",
5369
+ "agents": [...],
5370
+ "dependsOn": ["phase-a"]
5371
+ }
5372
+ ]
5373
+ }`;
5374
+ var STANDARD_SECTION_ITEMS = [
5375
+ "purpose",
5376
+ "tech-stack",
5377
+ "commands",
5378
+ "architecture",
5379
+ "conventions",
5380
+ "key-commands",
5381
+ "output",
5382
+ "verification",
5383
+ "gotchas",
5384
+ "debugging",
5385
+ "git-workflow"
5386
+ ];
5387
+ var STANDARD_DOC_ITEMS = ["DECISIONS", "LEARNINGS", "SPRINT"];
5388
+ var TOKEN_BUDGETS = {
5389
+ "sections-writer": 4096,
5390
+ "command-writer": 4096,
5391
+ "agent-writer": 4096,
5392
+ "rule-writer": 2048,
5393
+ "doc-writer": 2048,
5394
+ "skill-writer": 2048
5395
+ };
5396
+ async function generatePlan(intent, skeleton, config) {
5397
+ try {
5398
+ const userMessage = buildPlanMessage(intent, skeleton);
5399
+ const response = await callLLM(config, userMessage, {
5400
+ systemPrompt: ORCHESTRATOR_PROMPT,
5401
+ maxTokens: 2048,
5402
+ cacheControl: true
5403
+ });
5404
+ const parsed = parsePlanResponse(response);
5405
+ return validatePlan(parsed);
5406
+ } catch {
5407
+ return generateDefaultPlan(skeleton);
5408
+ }
5409
+ }
5410
+ function generateDefaultPlan(skeleton) {
5411
+ const projectContext = `${skeleton.name}: ${skeleton.description}`;
5412
+ const sectionItems = [...STANDARD_SECTION_ITEMS];
5413
+ const ruleItems = ["security", "continuity", ...skeleton.outline.custom_rules];
5414
+ const docItems = [...STANDARD_DOC_ITEMS];
5415
+ const phaseA = {
5416
+ id: "phase-a",
5417
+ agents: [
5418
+ { agent: "sections-writer", items: sectionItems, max_tokens: TOKEN_BUDGETS["sections-writer"] },
5419
+ { agent: "rule-writer", items: ruleItems, max_tokens: TOKEN_BUDGETS["rule-writer"] },
5420
+ { agent: "doc-writer", items: docItems, max_tokens: TOKEN_BUDGETS["doc-writer"] }
5421
+ ],
5422
+ dependsOn: []
5423
+ };
5424
+ const commandItems = ["help", ...skeleton.outline.key_commands];
5425
+ const phaseBAgents = [
5426
+ { agent: "command-writer", items: commandItems, max_tokens: TOKEN_BUDGETS["command-writer"] }
5427
+ ];
5428
+ if (skeleton.outline.custom_agents.length > 0) {
5429
+ phaseBAgents.push({
5430
+ agent: "agent-writer",
5431
+ items: skeleton.outline.custom_agents,
5432
+ max_tokens: TOKEN_BUDGETS["agent-writer"]
5433
+ });
5434
+ }
5435
+ if (skeleton.outline.custom_skills.length > 0) {
5436
+ phaseBAgents.push({
5437
+ agent: "skill-writer",
5438
+ items: skeleton.outline.custom_skills,
5439
+ max_tokens: TOKEN_BUDGETS["skill-writer"]
5440
+ });
5441
+ }
5442
+ const phaseB = {
5443
+ id: "phase-b",
5444
+ agents: phaseBAgents,
5445
+ dependsOn: ["phase-a"]
5446
+ };
5447
+ return {
5448
+ project_context: projectContext,
5449
+ phases: [phaseA, phaseB]
5450
+ };
5451
+ }
5452
+ function buildPlanMessage(intent, skeleton) {
5453
+ return [
5454
+ "## Intent",
5455
+ intent,
5456
+ "",
5457
+ "## Skeleton",
5458
+ JSON.stringify(skeleton, null, 2),
5459
+ "",
5460
+ "Generate the CompilationPlan JSON now."
5461
+ ].join("\n");
5462
+ }
5463
+ function parsePlanResponse(text) {
5464
+ let cleaned = text.trim();
5465
+ if (cleaned.startsWith("```")) {
5466
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
5467
+ }
5468
+ const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
5469
+ if (!jsonMatch) {
5470
+ throw new Error("Orchestrator did not return valid JSON");
5471
+ }
5472
+ return JSON.parse(jsonMatch[0]);
5473
+ }
5474
+
5475
+ // src/compiler/batch.ts
5476
+ init_types();
5477
+ init_types2();
5478
+ function mergeIntoIR(ir, result) {
5479
+ switch (result.agent) {
5480
+ case "sections-writer":
5481
+ ir.sections.push(...result.sections);
5482
+ break;
5483
+ case "command-writer":
5484
+ ir.commands.push(...result.commands);
5485
+ break;
5486
+ case "agent-writer":
5487
+ ir.agents.push(...result.agents);
5488
+ break;
5489
+ case "rule-writer":
5490
+ ir.rules.push(...result.rules);
5491
+ break;
5492
+ case "doc-writer":
5493
+ ir.docs.push(...result.docs);
5494
+ break;
5495
+ case "skill-writer":
5496
+ ir.skills.push(...result.skills);
5497
+ break;
5498
+ }
5499
+ }
5500
+ async function runWithConcurrency(tasks, limit) {
5501
+ if (tasks.length === 0) return [];
5502
+ const results = new Array(tasks.length);
5503
+ let nextIndex = 0;
5504
+ let firstError = void 0;
5505
+ let hasError = false;
5506
+ async function runNext() {
5507
+ while (nextIndex < tasks.length) {
5508
+ const index = nextIndex++;
5509
+ try {
5510
+ results[index] = await tasks[index]();
5511
+ } catch (err) {
5512
+ if (!hasError) {
5513
+ hasError = true;
5514
+ firstError = err;
5515
+ }
5516
+ return;
5517
+ }
5518
+ }
5519
+ }
5520
+ const workers = [];
5521
+ const workerCount = Math.min(limit, tasks.length);
5522
+ for (let i = 0; i < workerCount; i++) {
5523
+ workers.push(runNext());
5524
+ }
5525
+ await Promise.all(workers);
5526
+ if (hasError) {
5527
+ throw firstError;
5528
+ }
5529
+ return results;
5530
+ }
5531
+ async function executePlan(plan, executeAgent, concurrency, onProgress) {
5532
+ if (plan.phases.length === 0) {
5533
+ return createEmptyIR();
5534
+ }
5535
+ const phaseIds = new Set(plan.phases.map((p) => p.id));
5536
+ for (const phase of plan.phases) {
5537
+ for (const dep of phase.dependsOn) {
5538
+ if (!phaseIds.has(dep)) {
5539
+ throw new Error(
5540
+ `Phase "${phase.id}" depends on unknown phase "${dep}"`
5541
+ );
5542
+ }
5543
+ }
5544
+ }
5545
+ const phaseIndex = /* @__PURE__ */ new Map();
5546
+ for (let i = 0; i < plan.phases.length; i++) {
5547
+ phaseIndex.set(plan.phases[i].id, i);
5548
+ }
5549
+ for (const phase of plan.phases) {
5550
+ const myIdx = phaseIndex.get(phase.id);
5551
+ for (const dep of phase.dependsOn) {
5552
+ const depIdx = phaseIndex.get(dep);
5553
+ if (depIdx !== void 0 && depIdx >= myIdx) {
5554
+ throw new Error(
5555
+ `Phase "${phase.id}" has a dependency ordering violation: depends on "${dep}" which is not in an earlier position`
5556
+ );
5557
+ }
5558
+ }
5559
+ }
5560
+ const ir = createEmptyIR();
5561
+ const completed = /* @__PURE__ */ new Set();
5562
+ for (const phase of plan.phases) {
5563
+ for (const dep of phase.dependsOn) {
5564
+ if (!completed.has(dep)) {
5565
+ throw new Error(
5566
+ `Phase "${phase.id}" depends on incomplete phase "${dep}"`
5567
+ );
5568
+ }
5569
+ }
5570
+ onProgress?.({
5571
+ phaseId: phase.id,
5572
+ status: "start",
5573
+ agentCount: phase.agents.length
5574
+ });
5575
+ const agentTasks = phase.agents.map((task) => async () => {
5576
+ try {
5577
+ return await executeAgent(task);
5578
+ } catch (err) {
5579
+ if (err instanceof TruncationError) {
5580
+ const retryTask = {
5581
+ ...task,
5582
+ max_tokens: task.max_tokens * 2
5583
+ };
5584
+ return await executeAgent(retryTask);
5585
+ }
5586
+ throw err;
5587
+ }
5588
+ });
5589
+ const results = await runWithConcurrency(agentTasks, concurrency);
5590
+ for (const result of results) {
5591
+ mergeIntoIR(ir, result);
5592
+ }
5593
+ completed.add(phase.id);
5594
+ onProgress?.({
5595
+ phaseId: phase.id,
5596
+ status: "complete",
5597
+ agentCount: phase.agents.length,
5598
+ completedCount: phase.agents.length
5599
+ });
5600
+ }
5601
+ return ir;
5602
+ }
5603
+
5604
+ // src/compiler/linker.ts
5605
+ init_types2();
5606
+ var DEFAULT_HELP_CONTENT = "Show available commands and their descriptions.\n\nList all /project: commands with brief descriptions.";
5607
+ var DEFAULT_HELP_DESCRIPTION = "Show available commands";
5608
+ var DEFAULT_SECURITY_CONTENT = [
5609
+ "# Security Rules",
5610
+ "",
5611
+ "- NEVER log or echo API keys, tokens, or secrets",
5612
+ "- NEVER write secrets to files",
5613
+ "- NEVER execute user-provided strings as shell commands",
5614
+ "- Validate all inputs before use"
5615
+ ].join("\n");
5616
+ var DEFAULT_CONTINUITY_CONTENT = [
5617
+ "# Continuity",
5618
+ "",
5619
+ "After every significant decision or discovery:",
5620
+ "",
5621
+ "1. Update docs/DECISIONS.md",
5622
+ "2. Update docs/LEARNINGS.md",
5623
+ "3. Update docs/TODO.md task status"
5624
+ ].join("\n");
5625
+ var AGENT_REF_PATTERN = /@([\w-]+)/g;
5626
+ var COMMAND_REF_PATTERN = /\/project:([\w-]+)/g;
5627
+ function validateAgentReferences(patched, agentNames, report) {
5628
+ for (const cmd of patched.commands) {
5629
+ const refs = cmd.content.matchAll(AGENT_REF_PATTERN);
5630
+ for (const match of refs) {
5631
+ const name = match[1];
5632
+ if (!agentNames.has(name)) {
5633
+ report.warnings.push(
5634
+ `Command "${cmd.name}" references non-existent agent "${name}"`
5635
+ );
5636
+ cmd.content = cmd.content.replace(
5637
+ new RegExp(`@${escapeRegExp(name)}\\b`, "g"),
5638
+ name
5639
+ );
5640
+ report.autoFixes.push(
5641
+ `Removed @${name} mention from command "${cmd.name}"`
5642
+ );
5138
5643
  }
5139
- ]
5644
+ }
5140
5645
  }
5141
5646
  }
5142
- \`\`\`
5143
-
5144
- **Projects with Prettier/ESLint/Black** \u2014 auto-format on write:
5145
- \`\`\`json
5146
- {
5147
- "hooks": {
5148
- "PostToolUse": [{
5149
- "matcher": "Edit|Write",
5150
- "hooks": [{
5151
- "type": "command",
5152
- "command": "FILE=$(cat | jq -r '.tool_input.file_path // empty') && [ -n \\"$FILE\\" ] && npx prettier --write \\"$FILE\\" 2>/dev/null || true"
5153
- }]
5154
- }]
5647
+ function validateCommandReferences(patched, commandNames, report) {
5648
+ for (const agent of patched.agents) {
5649
+ const refs = agent.content.matchAll(COMMAND_REF_PATTERN);
5650
+ for (const match of refs) {
5651
+ const name = match[1];
5652
+ if (!commandNames.has(name)) {
5653
+ report.warnings.push(
5654
+ `Agent "${agent.name}" references non-existent command "${name}"`
5655
+ );
5656
+ }
5657
+ }
5155
5658
  }
5156
5659
  }
5157
- \`\`\`
5158
-
5159
- Merge hooks into the \`settings\` object alongside permissions. Choose the formatter hook based on detected dependencies (Prettier \u2192 prettier, ESLint \u2192 eslint, Black \u2192 black).
5160
-
5161
- ## PostCompact Hook
5162
-
5163
- All projects should include a PostCompact hook to restore context after compaction:
5164
-
5165
- \`\`\`json
5166
- {
5167
- "hooks": {
5168
- "PostCompact": [{
5169
- "matcher": "",
5170
- "hooks": [{
5171
- "type": "prompt",
5172
- "prompt": "Re-read CLAUDE.md and docs/SPRINT.md (if it exists) to restore project context after compaction."
5173
- }]
5174
- }]
5660
+ function injectHelpCommand(patched, report) {
5661
+ const commandNames = new Set(patched.commands.map((c) => c.name));
5662
+ if (!commandNames.has("help")) {
5663
+ patched.commands.push(
5664
+ createCommandNode("help", DEFAULT_HELP_CONTENT, DEFAULT_HELP_DESCRIPTION)
5665
+ );
5666
+ report.autoFixes.push("Injected default /project:help command");
5175
5667
  }
5176
5668
  }
5177
- \`\`\`
5178
-
5179
- Merge this into the settings hooks alongside the PreToolUse and PostToolUse hooks.
5180
-
5181
- For long-running sessions (>2 hours or >3 compactions), prefer "Full Reset" over re-inject:
5182
- replace the prompt-type PostCompact hook with a command-type hook that pipes CLAUDE.md + SPRINT.md + DECISIONS.md content directly into additionalContext.
5183
-
5184
- ## Memory Persistence Hooks
5185
-
5186
- For projects with multi-session workflows, include SessionStart/End hooks that persist context to \`.claude/memory.json\`:
5187
- - **SessionEnd:** Save recent decisions, sprint status, and known gotchas to \`.claude/memory.json\`
5188
- - **SessionStart:** Load \`.claude/memory.json\` and inject as additionalContext
5189
-
5190
- This ensures accumulated project knowledge survives session boundaries.
5191
-
5192
- ## For Code Projects, Additionally Include
5193
-
5194
- - \`/project:plan\` command (plan before coding)
5195
- - \`/project:review\` command (review changes)
5196
- - \`/project:test\` command (run and fix tests)
5197
- - \`/project:commit\` command (conventional commits)
5198
- - \`/project:status\` command (live git status, recent commits, SPRINT.md overview using ! prefix)
5199
- - \`/project:fix\` command (takes $ARGUMENTS as issue number, plans fix, implements, tests, commits)
5200
- - \`/project:sprint\` command (define acceptance criteria before coding, writes to docs/SPRINT.md)
5201
- - \`/project:develop\` command (full development pipeline \u2014 orchestrates @architect \u2192 @planner \u2192 @implementer \u2192 @verifier \u2192 @fixer \u2192 @grill \u2192 @doc-updater through spec, plan, TDD implement, review, and doc update phases). Phase 4 (Verify) MUST validate EACH acceptance criterion from docs/SPRINT.md individually, reporting PASS/FAIL per item as a contract scorecard. MUST include a Phase 7 "Completion Gate" that runs a Completion Verification checklist before marking the feature done: re-read original requirements, confirm each is met with evidence, run test suite + lint/typecheck, review git diff for unexpected changes or debug artifacts, answer 3 perspective questions (test engineer, code reviewer, requesting user). If ANY check fails, loop back to fix before completing.
5202
- - A TDD skill using the 3-phase isolation pattern (RED \u2192 GREEN \u2192 REFACTOR):
5203
- - RED: Write failing test only. Verify it FAILS.
5204
- - GREEN: Write MINIMUM code to pass. Nothing extra.
5205
- - REFACTOR: Improve while keeping tests green.
5206
- Rules: never write tests and implementation in same step, AAA pattern, one assertion per test.
5207
- - A multi-agent QA pipeline:
5208
- - \`@qa-orchestrator\` (sonnet) \u2014 delegates to linter and e2e-tester, compiles QA report
5209
- - \`@linter\` (haiku) \u2014 runs formatters, linters, security scanners
5210
- - \`@e2e-tester\` (sonnet, only when Playwright is in tools) \u2014 browser-based QA via Playwright
5211
- - A "Model Selection" section in generated agents:
5212
- \`\`\`
5213
- ## Model Selection (all agents)
5214
- - Haiku: simple file edits, linting, formatting, doc updates (<50 lines changed)
5215
- - Sonnet: implementation, testing, debugging, code review (50-500 lines)
5216
- - Opus: architecture decisions, spec writing, complex refactors (>500 lines or cross-cutting)
5217
- Default: Sonnet. Only escalate to Opus when the task involves multi-file architecture or ambiguous requirements.
5218
- \`\`\`
5219
- - Development pipeline agents (used by /project:develop). Each agent should include a modelRouting field in its YAML frontmatter:
5220
- - \`@architect\` (default: opus) \u2014 conducts spec interview with user, writes confirmed spec to docs/SPRINT.md with numbered acceptance criteria. Your spec is a CONTRACT \u2014 the verifier will check every criterion. Vague criteria = guaranteed rework.
5221
- - \`@planner\` (default: sonnet, escalate to opus for cross-cutting changes) \u2014 reads spec and codebase, creates step-by-step implementation plan in docs/PLAN.md
5222
- - \`@implementer\` (default: sonnet, escalate to opus for cross-cutting changes) \u2014 TDD-focused implementation, writes failing tests then minimum code to pass
5223
- - \`@fixer\` (default: sonnet, use haiku for single-file fixes) \u2014 targeted bug fixing from verifier/review feedback
5224
- - \`@doc-updater\` (default: haiku) \u2014 extracts decisions and learnings from completed work, updates docs/DECISIONS.md and docs/LEARNINGS.md
5225
- - \`/project:spec\` command (interview-based spec creation \u2014 asks 5-8 questions one at a time, writes structured spec to docs/SPRINT.md with ## Acceptance Criteria containing 3-8 numbered, testable conditions. Each criterion must be independently verifiable. Does NOT start coding until confirmed)
5226
- - \`/project:prove\` command (runs tests, shows git diff vs main, rates confidence HIGH/MEDIUM/LOW with evidence)
5227
- - \`/project:grill\` command (adversarial code review \u2014 challenges each change with "why this approach?", "what if X input?", rates BLOCKER/SHOULD-FIX/NITPICK, blocks until BLOCKERs resolved)
5228
- - \`/project:reset\` command (reads DECISIONS.md and LEARNINGS.md, proposes clean restart, stashes current work, implements elegant solution)
5229
-
5230
- ## For Research Projects, Additionally Include
5231
-
5232
- - \`/project:research\` command (deep research on a topic)
5233
- - \`/project:summarize\` command (summarize findings)
5234
- - A research-synthesis skill
5235
- - A researcher agent
5236
- - Note: the Verification section in CLAUDE.md should adapt for research \u2014 e.g. "Verify all sources are cited" instead of build/test commands
5237
-
5238
- ## For Content/Writing Projects, Additionally Include
5239
-
5240
- - \`/project:draft\` command (write first draft)
5241
- - \`/project:edit\` command (review and improve writing)
5242
- - A writing-workflow skill
5243
-
5244
- ## Hermes Runtime
5245
-
5246
- When generating for Hermes runtime, the same EnvironmentSpec JSON is produced. The adapter layer handles conversion:
5247
- - MCP config entries \u2192 Hermes config.yaml mcp_servers
5248
- - Commands and skills \u2192 ~/.hermes/skills/ markdown files
5249
- - Rules \u2192 ~/.hermes/skills/rule-*.md files
5250
-
5251
- The LLM output format does not change. Adapter-level conversion happens post-compilation.
5252
-
5253
- ## Autonomy Levels
5254
-
5255
- The user may specify an autonomy level (1-4). This affects CLAUDE.md content:
5256
-
5257
- - **Level 1 (Guided):** Add a "Workflow" section showing recommended command flow (e.g., spec \u2192 sprint \u2192 plan \u2192 code \u2192 prove \u2192 grill \u2192 commit) and a "When to Use What" reference table.
5258
- - **Level 2 (Assisted):** Level 1 content + mention /project:loop in the workflow section and @pm in the agents section of CLAUDE.md.
5259
- - **Level 3 (Autonomous):** Level 2 content + mention /project:auto and worktree-based PR delivery workflow.
5260
- - **Level 4 (Full Auto):** Level 3 content + add a prominent warning section about autonomous operation.
5261
-
5262
- The autonomy-specific commands, agents, and hooks are injected post-compilation. Focus on tailoring the CLAUDE.md content and workflow guidance for the selected level.
5263
-
5264
- If no autonomy level is specified, assume Level 1 (Guided).
5265
-
5266
- ## Output Schema
5267
-
5268
- Return ONLY valid JSON matching this structure:
5269
-
5270
- \`\`\`json
5271
- {
5272
- "claude_md": "Full CLAUDE.md content (under 150 lines)",
5273
- "commands": { "help": "...", "develop": "...", "status": "...", "fix": "...", "sprint": "...", "spec": "...", "prove": "...", "grill": "...", "reset": "..." },
5274
- "rules": { "continuity": "...", "security": "..." },
5275
- "agents": { "architect": "...", "planner": "...", "implementer": "...", "fixer": "...", "doc-updater": "...", "qa-orchestrator": "...", "linter": "...", "e2e-tester": "..." },
5276
- "skills": { "skill-name/SKILL": "..." },
5277
- "docs": { "DECISIONS": "...", "LEARNINGS": "...", "SPRINT": "..." }
5669
+ function injectSecurityRule(patched, report) {
5670
+ const ruleNames = new Set(patched.rules.map((r) => r.name));
5671
+ if (!ruleNames.has("security")) {
5672
+ patched.rules.push(createRuleNode("security", DEFAULT_SECURITY_CONTENT));
5673
+ report.autoFixes.push("Injected default security rule");
5674
+ }
5675
+ }
5676
+ function injectContinuityRule(patched, report) {
5677
+ const ruleNames = new Set(patched.rules.map((r) => r.name));
5678
+ if (!ruleNames.has("continuity")) {
5679
+ patched.rules.push(createRuleNode("continuity", DEFAULT_CONTINUITY_CONTENT));
5680
+ report.autoFixes.push("Injected default continuity rule");
5681
+ }
5682
+ }
5683
+ function escapeRegExp(s) {
5684
+ return s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
5685
+ }
5686
+ function linkHarness(ir) {
5687
+ const patched = JSON.parse(JSON.stringify(ir));
5688
+ const report = { warnings: [], autoFixes: [] };
5689
+ const agentNames = new Set(patched.agents.map((a) => a.name));
5690
+ const commandNames = new Set(patched.commands.map((c) => c.name));
5691
+ validateAgentReferences(patched, agentNames, report);
5692
+ validateCommandReferences(patched, commandNames, report);
5693
+ injectHelpCommand(patched, report);
5694
+ injectSecurityRule(patched, report);
5695
+ injectContinuityRule(patched, report);
5696
+ return { ir: patched, report };
5278
5697
  }
5279
- \`\`\`
5280
5698
 
5281
- Return ONLY valid JSON. No markdown fences. No text outside the JSON.`;
5282
- var SYSTEM_PROMPT = `You are the Kairn environment compiler. Your job is to generate a minimal, optimal Claude Code agent environment from a user's natural language description of what they want their agent to do.
5699
+ // src/compiler/agents/sections-writer.ts
5700
+ init_llm();
5701
+ init_types2();
5702
+ var SECTIONS_SYSTEM_PROMPT = `You are the Kairn sections writer. Generate CLAUDE.md sections for a development environment.
5703
+
5704
+ You will receive a project description and a list of section IDs to generate. Each section should be well-structured markdown.
5705
+
5706
+ ## Standard Sections (generate those requested)
5707
+ - purpose: Project purpose and goals (## Purpose heading, but use project-specific title like "# ProjectName Development")
5708
+ - tech-stack: Languages, frameworks, tools (## Tech Stack)
5709
+ - commands: Build/dev/test commands (## Commands, use code blocks)
5710
+ - architecture: Project structure (## Architecture, use code blocks for tree)
5711
+ - conventions: Coding conventions (## Conventions, bullet points)
5712
+ - key-commands: Slash commands reference (## Key Commands, bullet list)
5713
+ - output: Build output paths (## Output)
5714
+ - verification: Post-edit verification steps (## Verification)
5715
+ - gotchas: Known issues and footguns (## Known Gotchas)
5716
+ - debugging: Debugging tips (## Debugging)
5717
+ - git-workflow: Git conventions (## Git Workflow)
5718
+ - engineering-standards: Code quality standards (## Engineering Standards)
5283
5719
 
5284
- You will receive:
5285
- 1. The user's intent (what they want to build/do)
5286
- 2. A tool registry (available MCP servers, plugins, and hooks)
5720
+ ## Rules
5721
+ - Each section: 5-20 lines of content
5722
+ - Use project-specific details, not generic advice
5723
+ - Markdown formatting: headers, bullets, code blocks
5724
+ - Be concise but informative
5287
5725
 
5288
- You must output a JSON object matching the EnvironmentSpec schema.
5726
+ ## Output Format
5727
+ Return a JSON array:
5728
+ [
5729
+ { "id": "purpose", "heading": "# ProjectName Development", "content": "..." },
5730
+ { "id": "tech-stack", "heading": "## Tech Stack", "content": "..." }
5731
+ ]`;
5732
+ async function generateSections(intent, skeleton, task, config) {
5733
+ if (task.items.length === 0) {
5734
+ return { agent: "sections-writer", sections: [] };
5735
+ }
5736
+ const userMessage = buildUserMessage(intent, skeleton, task);
5737
+ const response = await callLLM(config, userMessage, {
5738
+ systemPrompt: SECTIONS_SYSTEM_PROMPT,
5739
+ maxTokens: task.max_tokens,
5740
+ agentName: "sections-writer",
5741
+ cacheControl: true
5742
+ });
5743
+ const sections = parseSectionsResponse(response);
5744
+ return { agent: "sections-writer", sections };
5745
+ }
5746
+ function buildUserMessage(intent, skeleton, task) {
5747
+ const parts = [
5748
+ `## Project
5749
+ ${intent}`,
5750
+ `## Tech Stack
5751
+ ${skeleton.outline.tech_stack.join(", ")}`,
5752
+ `## Workflow
5753
+ ${skeleton.outline.workflow_type}`,
5754
+ `## Sections to Generate
5755
+ ${task.items.join(", ")}`
5756
+ ];
5757
+ if (task.context_hint) {
5758
+ parts.push(`## Additional Context
5759
+ ${task.context_hint}`);
5760
+ }
5761
+ parts.push("Generate the sections JSON array now.");
5762
+ return parts.join("\n\n");
5763
+ }
5764
+ function parseSectionsResponse(text) {
5765
+ let cleaned = text.trim();
5766
+ if (cleaned.startsWith("```")) {
5767
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
5768
+ }
5769
+ const jsonMatch = cleaned.match(/\[[\s\S]*\]/);
5770
+ if (!jsonMatch) {
5771
+ throw new Error(
5772
+ "sections-writer: response did not contain a JSON array"
5773
+ );
5774
+ }
5775
+ const parsed = JSON.parse(jsonMatch[0]);
5776
+ if (!Array.isArray(parsed)) {
5777
+ throw new Error("sections-writer: expected JSON array");
5778
+ }
5779
+ return parsed.map((item, index) => {
5780
+ const obj = item;
5781
+ return createSection(
5782
+ String(obj.id ?? `section-${index}`),
5783
+ String(obj.heading ?? ""),
5784
+ String(obj.content ?? ""),
5785
+ index
5786
+ );
5787
+ });
5788
+ }
5289
5789
 
5290
- ## Core Principles
5790
+ // src/compiler/agents/rule-writer.ts
5791
+ init_llm();
5792
+ init_types2();
5793
+ var SYSTEM_PROMPT2 = `You are the @rule-writer specialist inside the Kairn compilation pipeline.
5291
5794
 
5292
- - **Minimalism over completeness.** Fewer, well-chosen tools beat many generic ones. Each MCP server costs 500-2000 context tokens.
5293
- - **Workflow-specific, not generic.** Every instruction, command, and rule must relate to the user's actual workflow.
5294
- - **Concise CLAUDE.md.** Under 150 lines. No generic text like "be helpful." Include build/test commands, reference docs/ and skills/.
5295
- - **Security by default.** Always include deny rules for destructive commands and secret file access.
5795
+ Your job is to generate Claude Code rule files (.claude/rules/*.md) for a project.
5296
5796
 
5297
- ## CLAUDE.md Template (mandatory structure)
5797
+ Each rule file may be **global** (applies everywhere) or **path-scoped** (applies only
5798
+ when the user edits files matching certain globs).
5298
5799
 
5299
- The \`claude_md\` field MUST follow this exact structure (max 150 lines):
5800
+ ## Output format
5300
5801
 
5301
- \`\`\`
5302
- # {Project Name}
5802
+ Return a JSON array. Each element:
5303
5803
 
5304
- ## Purpose
5305
- {one-line description}
5804
+ {
5805
+ "name": "rule-slug",
5806
+ "content": "Markdown content of the rule file.",
5807
+ "paths": null
5808
+ }
5809
+
5810
+ - **name**: kebab-case slug (e.g. "security", "api-conventions", "testing").
5811
+ - **content**: The full Markdown body of the rule. Be specific, actionable, and concise.
5812
+ Write imperative statements ("Do X", "Never Y"). Avoid vague advice.
5813
+ - **paths**: Either null (global rule) or a string array of glob patterns
5814
+ (e.g. ["src/api/**", "src/routes/**"]).
5815
+
5816
+ ## Required rules
5817
+
5818
+ Every project MUST include:
5819
+ 1. **security** -- baseline security constraints (no secrets in code, input validation,
5820
+ safe file I/O, no dynamic code execution, deny dangerous shell patterns).
5821
+ 2. **continuity** -- project memory rules (update decision logs, learning docs, track
5822
+ TODO progress, document gotchas).
5823
+
5824
+ If the user's rule list doesn't mention these, generate them anyway.
5825
+
5826
+ ## Guidelines
5827
+
5828
+ - Rules should be 5-20 lines of Markdown each.
5829
+ - Use bullet points for lists of constraints.
5830
+ - Path-scoped rules are for conventions that only matter in specific directories
5831
+ (e.g. API conventions for src/api/**, test rules for **/*.test.ts).
5832
+ - Global rules apply to the whole project (security, continuity, git workflow).
5833
+ - Do NOT include YAML frontmatter in the content -- the paths field handles scoping.
5834
+ - Return ONLY the JSON array. No explanation, no wrapping text.`;
5835
+ var DEFAULT_SECURITY_CONTENT2 = [
5836
+ "# Security Rules",
5837
+ "",
5838
+ "- NEVER log or echo API keys, tokens, or secrets",
5839
+ "- NEVER write secrets to files outside designated config locations",
5840
+ "- NEVER execute user-provided strings as shell commands",
5841
+ "- NEVER use dynamic code execution with untrusted input",
5842
+ "- Validate all external input before processing",
5843
+ "- Sanitize all file paths -- prevent path traversal (../)",
5844
+ "- Deny dangerous shell patterns: rm -rf /, curl|sh, wget|sh"
5845
+ ].join("\n");
5846
+ var DEFAULT_CONTINUITY_CONTENT2 = [
5847
+ "# Continuity",
5848
+ "",
5849
+ "After every significant decision or discovery:",
5850
+ "",
5851
+ "1. Update decision logs with what was decided and why",
5852
+ "2. Document non-obvious behavior, gotchas, and footguns",
5853
+ "3. Update task status as work progresses",
5854
+ "4. If a mistake is corrected, add it to the known gotchas section",
5855
+ "",
5856
+ "These files are the project memory. Keep them current."
5857
+ ].join("\n");
5858
+ function parseRulesJSON(raw) {
5859
+ let cleaned = raw.trim();
5860
+ const fenceStart = /^```(?:json)?\s*\n?/;
5861
+ const fenceEnd = /\n?```\s*$/;
5862
+ if (fenceStart.test(cleaned)) {
5863
+ cleaned = cleaned.replace(fenceStart, "").replace(fenceEnd, "");
5864
+ }
5865
+ const parsed = JSON.parse(cleaned);
5866
+ if (!Array.isArray(parsed)) {
5867
+ throw new Error("Expected JSON array from rule-writer LLM response");
5868
+ }
5869
+ return parsed;
5870
+ }
5871
+ function buildUserMessage2(intent, skeleton, task) {
5872
+ const lines = [
5873
+ "## Project intent",
5874
+ intent,
5875
+ "",
5876
+ "## Rules to generate",
5877
+ ...task.items.map((item) => `- ${item}`),
5878
+ "",
5879
+ "## Project context",
5880
+ JSON.stringify(skeleton.outline, null, 2)
5881
+ ];
5882
+ return lines.join("\n");
5883
+ }
5884
+ async function generateRules(intent, skeleton, task, config) {
5885
+ if (task.items.length === 0) {
5886
+ return { agent: "rule-writer", rules: [] };
5887
+ }
5888
+ const userMessage = buildUserMessage2(intent, skeleton, task);
5889
+ const raw = await callLLM(config, userMessage, {
5890
+ systemPrompt: SYSTEM_PROMPT2,
5891
+ cacheControl: true,
5892
+ maxTokens: task.max_tokens
5893
+ });
5894
+ const parsedRules = parseRulesJSON(raw);
5895
+ const rules = parsedRules.map(
5896
+ (r) => createRuleNode(
5897
+ r.name,
5898
+ r.content,
5899
+ r.paths !== null ? r.paths : void 0
5900
+ )
5901
+ );
5902
+ ensureRequiredRule(rules, "security", DEFAULT_SECURITY_CONTENT2);
5903
+ ensureRequiredRule(rules, "continuity", DEFAULT_CONTINUITY_CONTENT2);
5904
+ return { agent: "rule-writer", rules };
5905
+ }
5906
+ function ensureRequiredRule(rules, name, defaultContent) {
5907
+ const exists = rules.some((r) => r.name === name);
5908
+ if (!exists) {
5909
+ rules.unshift(createRuleNode(name, defaultContent));
5910
+ }
5911
+ }
5306
5912
 
5307
- ## Tech Stack
5308
- {bullet list of frameworks/languages}
5913
+ // src/compiler/agents/doc-writer.ts
5914
+ init_llm();
5915
+ var DEFAULT_DECISIONS = `# Decisions
5309
5916
 
5310
- ## Commands
5311
- {concrete build/test/lint/dev commands}
5917
+ | Date | Decision | Rationale |
5918
+ |------|----------|-----------|`;
5919
+ var DEFAULT_LEARNINGS = `# Learnings
5312
5920
 
5313
- ## Architecture
5314
- {brief folder structure, max 10 lines}
5921
+ | Date | Learning | Impact |
5922
+ |------|----------|--------|`;
5923
+ var DEFAULT_SPRINT = `# Sprint
5315
5924
 
5316
- ## Conventions
5317
- {3-5 specific coding rules}
5925
+ ## Acceptance Criteria
5318
5926
 
5319
- ## Key Commands
5320
- {list /project: commands with descriptions}
5927
+ - [ ] Criterion 1
5321
5928
 
5322
- ## Output
5323
- {where results go, key files}
5929
+ ## Status
5324
5930
 
5325
- ## Verification
5326
- After implementing any change, verify it works:
5327
- - {build command} \u2014 must pass with no errors
5328
- - {test command} \u2014 all tests must pass
5329
- - {lint command} \u2014 no warnings or errors
5330
- - {type check command} \u2014 no type errors
5931
+ Not started`;
5932
+ var REQUIRED_DOCS = [
5933
+ { name: "DECISIONS", defaultContent: DEFAULT_DECISIONS },
5934
+ { name: "LEARNINGS", defaultContent: DEFAULT_LEARNINGS },
5935
+ { name: "SPRINT", defaultContent: DEFAULT_SPRINT }
5936
+ ];
5937
+ var DOC_WRITER_SYSTEM_PROMPT = `You are the doc-writer specialist agent in a multi-agent compilation pipeline.
5331
5938
 
5332
- If any verification step fails, fix the issue before moving on.
5333
- Do NOT skip verification steps.
5939
+ Your role: generate documentation files for a Claude Code agent environment's \`.claude/docs/\` directory.
5334
5940
 
5335
- ## Known Gotchas
5336
- <!-- After any correction, add it here: "Update CLAUDE.md so you don't make that mistake again." -->
5337
- <!-- Prune this section when it exceeds 10 items \u2014 keep only the recurring ones. -->
5338
- - (none yet \u2014 this section grows as you work)
5941
+ ## Output Format
5339
5942
 
5340
- ## Debugging
5341
- When debugging, paste raw error output. Don't summarize \u2014 Claude works better with raw data.
5342
- Use subagents for deep investigation to keep main context clean.
5943
+ Return a JSON array of objects, each with "name" (string) and "content" (string):
5343
5944
 
5344
- ## Git Workflow
5345
- - Prefer small, focused commits (one feature or fix per commit)
5346
- - Use conventional commits: feat:, fix:, docs:, refactor:, test:
5347
- - Target < 200 lines per PR when possible
5945
+ \`\`\`json
5946
+ [
5947
+ { "name": "DECISIONS", "content": "# Decisions\\n\\n| Date | Decision | Rationale |\\n|------|----------|-----------|" },
5948
+ { "name": "LEARNINGS", "content": "# Learnings\\n\\n| Date | Learning | Impact |\\n|------|----------|--------|" }
5949
+ ]
5950
+ \`\`\`
5348
5951
 
5349
- ## Engineering Standards
5350
- - Lead with answers over reasoning. Be concise.
5351
- - Use absolute file paths in all references.
5352
- - No filler, no inner monologue, no time estimates.
5353
- - Produce load-bearing code \u2014 every line of output should be actionable.
5952
+ ## Document Templates
5354
5953
 
5355
- ## Tool Usage Policy
5356
- - Prefer Edit tool over sed/awk for file modifications
5357
- - Prefer Grep tool over rg for searching
5358
- - Prefer Read tool over cat for file reading
5359
- - Reserve Bash for: builds, installs, git, network, processes
5360
- - Read and understand existing code before modifying
5361
- - Delete unused code completely \u2014 no compatibility shims
5954
+ Each doc should follow these structural patterns:
5362
5955
 
5363
- ## Code Philosophy
5364
- - Do not create abstractions for one-time operations
5365
- - Complete the task fully \u2014 don't gold-plate, but don't leave it half-done
5366
- - Prefer editing existing files over creating new ones
5956
+ - **DECISIONS**: Markdown table with Date, Decision, Rationale columns. Track architectural and design choices.
5957
+ - **LEARNINGS**: Markdown table with Date, Learning, Impact columns. Track non-obvious discoveries and gotchas.
5958
+ - **SPRINT**: Must include an "## Acceptance Criteria" section with checkbox items (\`- [ ] ...\`) and a "## Status" section. Track current sprint goals.
5367
5959
 
5368
- ## First Turn Protocol
5960
+ ## Guidelines
5369
5961
 
5370
- At the start of every session, before doing ANY work:
5371
- 1. Run \`pwd && ls -la && git status --short\` to orient yourself
5372
- 2. Check relevant runtimes (e.g. \`node --version\`, \`python3 --version\` \u2014 pick what fits this project)
5373
- 3. Read any task-tracking files (docs/SPRINT.md, docs/DECISIONS.md)
5374
- 4. Summarize what you see in 2-3 lines, then proceed
5962
+ - Content should be tailored to the project intent provided
5963
+ - Use Markdown formatting with clear headers
5964
+ - Acceptance Criteria in SPRINT docs must use checkbox format: \`- [ ] Criterion\`
5965
+ - Keep templates practical \u2014 they'll be filled in during development
5966
+ - Return ONLY the JSON array, no surrounding text`;
5967
+ function stripCodeFences(raw) {
5968
+ let text = raw.trim();
5969
+ const openFence = /^```(?:json)?\s*\n/;
5970
+ if (openFence.test(text)) {
5971
+ text = text.replace(openFence, "");
5972
+ }
5973
+ const closeFence = /\n```\s*$/;
5974
+ if (closeFence.test(text)) {
5975
+ text = text.replace(closeFence, "");
5976
+ }
5977
+ return text.trim();
5978
+ }
5979
+ async function generateDocs(intent, skeleton, task, config) {
5980
+ if (task.items.length === 0) {
5981
+ return { agent: "doc-writer", docs: [] };
5982
+ }
5983
+ const userMessage = buildUserMessage3(intent, skeleton, task);
5984
+ const rawResponse = await callLLM(config, userMessage, {
5985
+ systemPrompt: DOC_WRITER_SYSTEM_PROMPT,
5986
+ cacheControl: true,
5987
+ maxTokens: task.max_tokens
5988
+ });
5989
+ const parsedDocs = parseDocResponse(rawResponse);
5990
+ const docs = ensureRequiredDocs(parsedDocs);
5991
+ return { agent: "doc-writer", docs };
5992
+ }
5993
+ function buildUserMessage3(intent, _skeleton, task) {
5994
+ const itemList = task.items.map((item) => `- ${item}`).join("\n");
5995
+ return `Project intent: ${intent}
5375
5996
 
5376
- This saves 2-5 exploratory turns. Never ask "what files are here?" \u2014 look first.
5997
+ Generate the following documentation files:
5998
+ ${itemList}
5377
5999
 
5378
- ## Sprint Contract
6000
+ Return a JSON array of { "name": string, "content": string } objects.`;
6001
+ }
6002
+ function isDocShape(value) {
6003
+ if (typeof value !== "object" || value === null) return false;
6004
+ const obj = value;
6005
+ return typeof obj.name === "string" && typeof obj.content === "string";
6006
+ }
6007
+ function parseDocResponse(raw) {
6008
+ const cleaned = stripCodeFences(raw);
6009
+ const parsed = JSON.parse(cleaned);
6010
+ if (!Array.isArray(parsed)) {
6011
+ return [];
6012
+ }
6013
+ return parsed.filter(isDocShape).map(({ name, content }) => ({ name, content }));
6014
+ }
6015
+ function ensureRequiredDocs(docs) {
6016
+ const result = [...docs];
6017
+ const existingNames = new Set(result.map((d) => d.name));
6018
+ for (const required of REQUIRED_DOCS) {
6019
+ if (!existingNames.has(required.name)) {
6020
+ result.push({
6021
+ name: required.name,
6022
+ content: required.defaultContent
6023
+ });
6024
+ }
6025
+ }
6026
+ return result;
6027
+ }
5379
6028
 
5380
- Before implementing, confirm acceptance criteria exist in docs/SPRINT.md.
5381
- Each criterion must be numbered, testable, and independently verifiable.
5382
- After implementing, verify EACH criterion individually. Do not mark done until all pass.
6029
+ // src/compiler/agents/command-writer.ts
6030
+ init_llm();
6031
+ init_types2();
6032
+ var BATCH_SIZE = 8;
6033
+ var BATCH_THRESHOLD = 10;
6034
+ var DEFAULT_HELP_CONTENT2 = `Show available /project: commands and their descriptions.
5383
6035
 
5384
- ## Completion Standards
6036
+ List all slash commands with a brief description of what each does.`;
6037
+ var DEFAULT_HELP_DESCRIPTION2 = "Show available commands and their descriptions";
6038
+ var SYSTEM_PROMPT3 = `You are @command-writer, a specialist agent that generates Claude Code slash commands.
5385
6039
 
5386
- Never mark a task "done" without running the Completion Verification checklist.
5387
- Tests passing is necessary but not sufficient \u2014 also verify requirements coverage,
5388
- state cleanliness, and review changes from the perspective of a test engineer,
5389
- code reviewer, and the requesting user.
6040
+ ## Output Format
6041
+ Return a JSON array of command objects. Each object has:
6042
+ - "name": the command name (no /project: prefix, just the bare name like "build", "test")
6043
+ - "description": a one-line description of what the command does
6044
+ - "content": the full command body (markdown text with optional shell integration)
6045
+
6046
+ ## Shell Integration
6047
+ Commands can execute shell commands using the ! prefix:
6048
+ - \`!npm run build\` \u2014 runs the command directly
6049
+ - \`!$ARGUMENTS\` \u2014 passes user arguments to a shell command
6050
+ - Multiple ! lines are run in sequence
6051
+
6052
+ ## Command Patterns
6053
+ - **Build/Test**: Direct shell execution with !
6054
+ - **Workflow**: Multi-step orchestration instructions in natural language
6055
+ - **Review**: Instructions for Claude to analyze code
6056
+ - **Deploy**: Safety checks + shell execution
6057
+
6058
+ ## Example Output
6059
+ \`\`\`json
6060
+ [
6061
+ {
6062
+ "name": "build",
6063
+ "description": "Build the project",
6064
+ "content": "Run the full build pipeline.\\n\\n!npm run build"
6065
+ },
6066
+ {
6067
+ "name": "test",
6068
+ "description": "Run the test suite",
6069
+ "content": "Execute all tests and report results.\\n\\n!npm test"
6070
+ },
6071
+ {
6072
+ "name": "review",
6073
+ "description": "Review staged changes",
6074
+ "content": "Review all staged git changes for:\\n- Code quality issues\\n- Security concerns\\n- Missing tests\\n\\nProvide actionable feedback."
6075
+ }
6076
+ ]
5390
6077
  \`\`\`
5391
6078
 
5392
- Do not add generic filler. Every line must be specific to the user's workflow.
5393
-
5394
- ## What You Must Always Include
5395
-
5396
- 1. A concise, workflow-specific \`claude_md\` (the CLAUDE.md content)
5397
- 2. A \`/project:help\` command that explains the environment
5398
- 3. A \`docs/DECISIONS.md\` file for architectural decisions
5399
- 4. A \`docs/LEARNINGS.md\` file for non-obvious discoveries
5400
- 5. A \`rules/continuity.md\` rule encouraging updates to DECISIONS.md and LEARNINGS.md
5401
- 6. A \`rules/security.md\` rule with essential security instructions
5402
- 7. settings.json with deny rules for \`rm -rf\`, \`curl|sh\`, reading \`.env\` and \`secrets/\`
5403
- 8. A \`/project:status\` command for code projects (uses ! for live git/SPRINT.md output)
5404
- 9. A \`/project:fix\` command for code projects (uses $ARGUMENTS for issue number)
5405
- 10. A \`docs/SPRINT.md\` file as the living spec/plan (replaces TODO.md \u2014 acceptance criteria, verification steps)
5406
- 11. A "Verification" section in CLAUDE.md with concrete verify commands for the project
5407
- 12. A "Known Gotchas" section in CLAUDE.md (starts empty, grows with corrections)
5408
- 13. A "Debugging" section in CLAUDE.md (2 lines: paste raw errors, use subagents)
5409
- 14. A "Git Workflow" section in CLAUDE.md (3 rules: small commits, conventional format, <200 lines PR)
5410
- 15. "Engineering Standards", "Tool Usage Policy", and "Code Philosophy" sections in CLAUDE.md
5411
- 16. A "First Turn Protocol" section in CLAUDE.md (orient before working: pwd, ls, git status, check relevant runtimes, read task files)
5412
- 17. A "Completion Standards" section in CLAUDE.md (never mark done without verifying: requirements met, tests passing, no debug artifacts, reviewed from 3 perspectives)
5413
- 18. A "Sprint Contract" section in CLAUDE.md (confirm acceptance criteria exist before implementing, verify each criterion after)
5414
-
5415
- ## Tool Selection Rules
6079
+ ## Rules
6080
+ - Command names are kebab-case, lowercase
6081
+ - Content should be actionable and specific to the project
6082
+ - Include shell commands (!) where appropriate for automation
6083
+ - Keep descriptions under 80 characters
6084
+ - Return ONLY the JSON array, no surrounding text`;
6085
+ function parseCommandResponse(text) {
6086
+ let cleaned = text.trim();
6087
+ if (cleaned.startsWith("```")) {
6088
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
6089
+ }
6090
+ const arrayMatch = cleaned.match(/\[[\s\S]*\]/);
6091
+ if (!arrayMatch) {
6092
+ throw new Error("@command-writer: LLM response did not contain a JSON array.");
6093
+ }
6094
+ const parsed = JSON.parse(arrayMatch[0]);
6095
+ if (!Array.isArray(parsed)) {
6096
+ throw new Error("@command-writer: parsed response is not an array.");
6097
+ }
6098
+ return parsed.map((item) => {
6099
+ const obj = item;
6100
+ if (typeof obj.name !== "string" || typeof obj.content !== "string") {
6101
+ throw new Error("@command-writer: each command must have 'name' and 'content' strings.");
6102
+ }
6103
+ return {
6104
+ name: obj.name,
6105
+ description: typeof obj.description === "string" ? obj.description : "",
6106
+ content: obj.content
6107
+ };
6108
+ });
6109
+ }
6110
+ function buildUserMessage4(intent, skeleton, batchItems, phaseAContext) {
6111
+ const lines = [];
6112
+ lines.push("## Project Context");
6113
+ lines.push(`Intent: ${intent}`);
6114
+ lines.push(`Tech stack: ${skeleton.outline.tech_stack.join(", ")}`);
6115
+ lines.push(`Workflow type: ${skeleton.outline.workflow_type}`);
6116
+ lines.push("");
6117
+ if (phaseAContext) {
6118
+ lines.push("## Reference (from Phase A)");
6119
+ lines.push(phaseAContext);
6120
+ lines.push("");
6121
+ }
6122
+ lines.push("## Commands to Generate");
6123
+ for (const item of batchItems) {
6124
+ lines.push(`- ${item}`);
6125
+ }
6126
+ lines.push("");
6127
+ lines.push("Generate the JSON array of command objects now.");
6128
+ return lines.join("\n");
6129
+ }
6130
+ function chunk(arr, size) {
6131
+ const chunks = [];
6132
+ for (let i = 0; i < arr.length; i += size) {
6133
+ chunks.push(arr.slice(i, i + size));
6134
+ }
6135
+ return chunks;
6136
+ }
6137
+ function ensureHelpCommand(commands) {
6138
+ const hasHelp = commands.some((c) => c.name === "help");
6139
+ if (hasHelp) {
6140
+ return commands;
6141
+ }
6142
+ return [
6143
+ ...commands,
6144
+ createCommandNode("help", DEFAULT_HELP_CONTENT2, DEFAULT_HELP_DESCRIPTION2)
6145
+ ];
6146
+ }
6147
+ function deduplicateCommands(commands) {
6148
+ const seen = /* @__PURE__ */ new Set();
6149
+ const result = [];
6150
+ for (const cmd of commands) {
6151
+ if (!seen.has(cmd.name)) {
6152
+ seen.add(cmd.name);
6153
+ result.push(cmd);
6154
+ }
6155
+ }
6156
+ return result;
6157
+ }
6158
+ async function generateBatch(intent, skeleton, batchItems, config, maxTokens, phaseAContext) {
6159
+ const userMessage = buildUserMessage4(intent, skeleton, batchItems, phaseAContext);
6160
+ const responseText = await callLLM(config, userMessage, {
6161
+ systemPrompt: SYSTEM_PROMPT3,
6162
+ cacheControl: true,
6163
+ maxTokens
6164
+ });
6165
+ const rawCommands = parseCommandResponse(responseText);
6166
+ return rawCommands.map(
6167
+ (c) => createCommandNode(c.name, c.content, c.description)
6168
+ );
6169
+ }
6170
+ async function generateCommands(intent, skeleton, task, config) {
6171
+ if (task.items.length === 0) {
6172
+ return { agent: "command-writer", commands: [] };
6173
+ }
6174
+ let allCommands;
6175
+ if (task.items.length > BATCH_THRESHOLD) {
6176
+ const batches = chunk(task.items, BATCH_SIZE);
6177
+ const batchResults = [];
6178
+ for (const batch of batches) {
6179
+ const nodes = await generateBatch(intent, skeleton, batch, config, task.max_tokens, task.context_hint);
6180
+ batchResults.push(nodes);
6181
+ }
6182
+ allCommands = deduplicateCommands(batchResults.flat());
6183
+ } else {
6184
+ allCommands = await generateBatch(intent, skeleton, task.items, config, task.max_tokens, task.context_hint);
6185
+ }
6186
+ allCommands = ensureHelpCommand(allCommands);
6187
+ return { agent: "command-writer", commands: allCommands };
6188
+ }
5416
6189
 
5417
- - Only select tools directly relevant to the described workflow
5418
- - Prefer free tools (auth: "none") when quality is comparable
5419
- - Tier 1 tools (Context7, Sequential Thinking, security-guidance) should be included in most environments
5420
- - For tools requiring API keys (auth: "api_key"), use \${ENV_VAR} syntax \u2014 never hardcode keys
5421
- - Maximum 6-8 MCP servers to avoid context bloat
5422
- - Include a \`reason\` for each selected tool explaining why it fits this workflow
6190
+ // src/compiler/agents/agent-writer.ts
6191
+ init_llm();
6192
+ var BATCH_THRESHOLD2 = 8;
6193
+ var BATCH_SIZE2 = 6;
6194
+ var AGENT_WRITER_SYSTEM_PROMPT = `You are an expert at designing Claude Code agent personas for the .claude/agents/ directory.
6195
+
6196
+ Each agent file uses YAML frontmatter followed by Markdown persona content.
6197
+
6198
+ ## YAML Frontmatter Conventions
6199
+ - \`model\`: optional model hint \u2014 "opus" for complex reasoning, "sonnet" for balanced, "haiku" for fast/cheap
6200
+ - \`disallowedTools\`: optional string array of tools the agent should NOT use (e.g. ["Bash", "Write"])
6201
+ - \`modelRouting\`: optional object for dynamic model selection:
6202
+ - \`default\`: base model tier ("haiku", "sonnet", or "opus")
6203
+ - \`escalateTo\`: higher tier to escalate to ("sonnet" or "opus")
6204
+ - \`escalateWhen\`: description of when to escalate
6205
+
6206
+ ## Persona Design Principles
6207
+ - Each agent has a clear, focused role (single responsibility)
6208
+ - Persona should describe expertise, approach, and boundaries
6209
+ - Include specific instructions for the agent's domain
6210
+ - Use second person ("You are...")
6211
+ - Be concrete about what the agent should and should not do
6212
+ - Include relevant workflow steps or checklists where appropriate
6213
+
6214
+ ## Model Tiering Guidelines
6215
+ - "haiku": formatting, linting, simple lookups, boilerplate generation
6216
+ - "sonnet": most development tasks, code review, testing, refactoring
6217
+ - "opus": architecture decisions, complex debugging, cross-cutting changes, security audits
5423
6218
 
5424
- ## Context Budget (STRICT)
6219
+ ## Output Format
6220
+ Return a JSON array. Each element:
6221
+ {
6222
+ "name": "agent-name-kebab-case",
6223
+ "content": "You are the ... (full persona markdown)",
6224
+ "model": "sonnet",
6225
+ "disallowedTools": ["Bash"],
6226
+ "modelRouting": { "default": "sonnet", "escalateTo": "opus", "escalateWhen": "cross-cutting changes" }
6227
+ }
5425
6228
 
5426
- - MCP servers: maximum 6. Prefer fewer.
5427
- - CLAUDE.md: maximum 150 lines.
5428
- - Rules: maximum 5 files, each under 20 lines.
5429
- - Skills: maximum 3. Only include directly relevant ones.
5430
- - Agents: maximum 5. Orchestration pipeline (/develop) agents.
5431
- - Commands: no limit (loaded on demand, zero context cost).
5432
- - Hooks: maximum 4 (auto-format, block-destructive, PostCompact, plus one contextual).
6229
+ Only include model, disallowedTools, and modelRouting when they add value. Not every agent needs all fields.
5433
6230
 
5434
- If the workflow doesn't clearly need a tool, DO NOT include it.
5435
- Each MCP server costs 500-2000 tokens of context window.
6231
+ Return ONLY the JSON array, no surrounding text or code fences.`;
6232
+ function parseAgentResponse(text) {
6233
+ let cleaned = text.trim();
6234
+ if (cleaned.startsWith("```")) {
6235
+ cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
6236
+ }
6237
+ const arrayMatch = cleaned.match(/\[[\s\S]*\]/);
6238
+ if (!arrayMatch) {
6239
+ return [];
6240
+ }
6241
+ try {
6242
+ const parsed = JSON.parse(arrayMatch[0]);
6243
+ if (!Array.isArray(parsed)) {
6244
+ return [];
6245
+ }
6246
+ return parsed;
6247
+ } catch {
6248
+ return [];
6249
+ }
6250
+ }
6251
+ function toAgentNode(raw) {
6252
+ if (typeof raw !== "object" || raw === null) {
6253
+ return null;
6254
+ }
6255
+ const obj = raw;
6256
+ if (typeof obj["name"] !== "string" || !obj["name"]) {
6257
+ return null;
6258
+ }
6259
+ if (typeof obj["content"] !== "string" || !obj["content"]) {
6260
+ return null;
6261
+ }
6262
+ const node = {
6263
+ name: obj["name"],
6264
+ content: obj["content"]
6265
+ };
6266
+ if (typeof obj["model"] === "string" && obj["model"]) {
6267
+ node.model = obj["model"];
6268
+ }
6269
+ if (Array.isArray(obj["disallowedTools"])) {
6270
+ const tools = obj["disallowedTools"].filter(
6271
+ (t) => typeof t === "string" && t.length > 0
6272
+ );
6273
+ if (tools.length > 0) {
6274
+ node.disallowedTools = tools;
6275
+ }
6276
+ }
6277
+ if (typeof obj["modelRouting"] === "object" && obj["modelRouting"] !== null) {
6278
+ const routing = obj["modelRouting"];
6279
+ const defaultModel = routing["default"];
6280
+ if (defaultModel === "haiku" || defaultModel === "sonnet" || defaultModel === "opus") {
6281
+ const modelRouting = {
6282
+ default: defaultModel
6283
+ };
6284
+ const escalateTo = routing["escalateTo"];
6285
+ if (escalateTo === "sonnet" || escalateTo === "opus") {
6286
+ modelRouting.escalateTo = escalateTo;
6287
+ }
6288
+ const escalateWhen = routing["escalateWhen"];
6289
+ if (typeof escalateWhen === "string" && escalateWhen) {
6290
+ modelRouting.escalateWhen = escalateWhen;
6291
+ }
6292
+ node.modelRouting = modelRouting;
6293
+ }
6294
+ }
6295
+ return node;
6296
+ }
6297
+ function buildUserMessage5(items, intent, phaseAContext) {
6298
+ const parts = [];
6299
+ parts.push(`## User Intent
5436
6300
 
5437
- ## Output Schema
6301
+ ${intent}`);
6302
+ if (phaseAContext) {
6303
+ parts.push(`## Project Context (from Phase A)
5438
6304
 
5439
- Return ONLY valid JSON matching this structure:
6305
+ ${phaseAContext}`);
6306
+ }
6307
+ parts.push(
6308
+ `## Agents to Generate
5440
6309
 
5441
- \`\`\`json
5442
- {
5443
- "name": "short-kebab-case-name",
5444
- "description": "One-line description of the environment",
5445
- "tools": [
5446
- { "tool_id": "id-from-registry", "reason": "why this tool fits" }
5447
- ],
5448
- "harness": {
5449
- "claude_md": "The full CLAUDE.md content (under 150 lines)",
5450
- "settings": {
5451
- "permissions": {
5452
- "allow": ["Bash(npm run *)", "Read", "Write", "Edit"],
5453
- "deny": ["Bash(rm -rf *)", "Bash(curl * | sh)", "Read(./.env)", "Read(./secrets/**)"]
6310
+ Create agent persona definitions for each of these agents:
6311
+ ${items.map((item) => `- ${item}`).join("\n")}`
6312
+ );
6313
+ parts.push(
6314
+ "Generate the JSON array now. One object per agent listed above."
6315
+ );
6316
+ return parts.join("\n\n");
6317
+ }
6318
+ function chunk2(arr, size) {
6319
+ const chunks = [];
6320
+ for (let i = 0; i < arr.length; i += size) {
6321
+ chunks.push(arr.slice(i, i + size));
6322
+ }
6323
+ return chunks;
6324
+ }
6325
+ async function generateAgents(intent, _skeleton, task, config) {
6326
+ if (task.items.length === 0) {
6327
+ return { agent: "agent-writer", agents: [] };
6328
+ }
6329
+ const needsBatching = task.items.length > BATCH_THRESHOLD2;
6330
+ const batches = needsBatching ? chunk2(task.items, BATCH_SIZE2) : [task.items];
6331
+ const allAgents = [];
6332
+ for (const batch of batches) {
6333
+ const userMessage = buildUserMessage5(batch, intent, task.context_hint);
6334
+ const response = await callLLM(config, userMessage, {
6335
+ systemPrompt: AGENT_WRITER_SYSTEM_PROMPT,
6336
+ cacheControl: true,
6337
+ maxTokens: task.max_tokens
6338
+ });
6339
+ const rawAgents = parseAgentResponse(response);
6340
+ for (const raw of rawAgents) {
6341
+ const node = toAgentNode(raw);
6342
+ if (node !== null) {
6343
+ allAgents.push(node);
5454
6344
  }
5455
- },
5456
- "mcp_config": {
5457
- "server-name": { "command": "npx", "args": ["..."], "env": {} }
5458
- },
5459
- "commands": {
5460
- "help": "markdown content for /project:help",
5461
- "develop": "markdown content for /project:develop"
5462
- },
5463
- "rules": {
5464
- "continuity": "markdown content for continuity rule",
5465
- "security": "markdown content for security rule"
5466
- },
5467
- "skills": {
5468
- "skill-name/SKILL": "markdown content with YAML frontmatter"
5469
- },
5470
- "agents": {
5471
- "architect": "agent markdown with YAML frontmatter",
5472
- "planner": "agent markdown with YAML frontmatter",
5473
- "implementer": "agent markdown with YAML frontmatter",
5474
- "fixer": "agent markdown with YAML frontmatter",
5475
- "doc-updater": "agent markdown with YAML frontmatter"
5476
- },
5477
- "docs": {
5478
- "DECISIONS": "# Decisions\\n\\nArchitectural decisions.",
5479
- "LEARNINGS": "# Learnings\\n\\nNon-obvious discoveries.",
5480
- "SPRINT": "# Sprint\\n\\nLiving spec and plan."
5481
6345
  }
5482
6346
  }
6347
+ return { agent: "agent-writer", agents: allAgents };
5483
6348
  }
5484
- \`\`\`
5485
-
5486
- Do not include any text outside the JSON object. Do not wrap in markdown code fences.`;
5487
- var CLARIFICATION_PROMPT = `You are helping a user define their project for environment compilation.
5488
6349
 
5489
- Given their initial description, generate 3-5 clarifying questions to understand:
5490
- 1. Language and framework
5491
- 2. What the project specifically does (be precise)
5492
- 3. Primary workflow (build, research, write, analyze?)
5493
- 4. Key dependencies or integrations
5494
- 5. Target audience
6350
+ // src/compiler/agents/skill-writer.ts
6351
+ init_llm();
6352
+ var SYSTEM_PROMPT4 = `You are a specialist agent that writes SKILL.md files for Claude Code environments.
5495
6353
 
5496
- For each question, provide a reasonable suggestion based on the description.
6354
+ Each skill is a structured markdown document that teaches Claude Code a repeatable workflow pattern.
5497
6355
 
5498
- Output ONLY a JSON array:
5499
- [
5500
- { "question": "Language/framework?", "suggestion": "TypeScript + Node.js" },
5501
- ...
5502
- ]
6356
+ Output format: a JSON array of objects with "name" (string) and "content" (string) fields.
5503
6357
 
5504
6358
  Rules:
5505
- - Suggestions should be reasonable guesses, clearly marked as suggestions
5506
- - Keep questions short (under 10 words)
5507
- - Maximum 5 questions
5508
- - If the description is already very detailed, ask fewer questions`;
5509
-
5510
- // src/registry/loader.ts
5511
- import fs3 from "fs/promises";
5512
- import path3 from "path";
5513
- import { fileURLToPath as fileURLToPath2 } from "url";
5514
- var __filename2 = fileURLToPath2(import.meta.url);
5515
- var __dirname2 = path3.dirname(__filename2);
5516
- async function loadBundledRegistry() {
5517
- const candidates = [
5518
- path3.resolve(__dirname2, "../registry/tools.json"),
5519
- path3.resolve(__dirname2, "../src/registry/tools.json"),
5520
- path3.resolve(__dirname2, "../../src/registry/tools.json")
5521
- ];
5522
- for (const candidate of candidates) {
5523
- try {
5524
- const data = await fs3.readFile(candidate, "utf-8");
5525
- return JSON.parse(data);
5526
- } catch {
5527
- continue;
5528
- }
6359
+ - Each skill must have a clear title heading (# Skill Name)
6360
+ - Use numbered phases (## Phase 1: NAME, ## Phase 2: NAME, etc.) for multi-step workflows
6361
+ - Content should be actionable instructions, not theory
6362
+ - Keep each skill concise: 200-400 words
6363
+ - For TDD skills, always use the 3-phase pattern: RED (write failing test), GREEN (minimal implementation), REFACTOR (clean up)
6364
+ - Output ONLY the JSON array, no surrounding text
6365
+
6366
+ Example:
6367
+ [
6368
+ {
6369
+ "name": "tdd",
6370
+ "content": "# TDD Skill\\n\\n## Phase 1: RED\\nWrite a failing test first...\\n## Phase 2: GREEN\\nWrite minimal code to make the test pass...\\n## Phase 3: REFACTOR\\nClean up duplication and improve naming..."
6371
+ }
6372
+ ]`;
6373
+ function stripCodeFences2(raw) {
6374
+ const trimmed = raw.trim();
6375
+ const fencePattern = /^```(?:json|JSON)?\s*\n?([\s\S]*?)\n?\s*```$/;
6376
+ const match = trimmed.match(fencePattern);
6377
+ if (match) {
6378
+ return match[1].trim();
5529
6379
  }
5530
- throw new Error("Could not find tools.json registry");
6380
+ return trimmed;
5531
6381
  }
5532
- async function loadUserRegistry() {
5533
- try {
5534
- const data = await fs3.readFile(getUserRegistryPath(), "utf-8");
5535
- return JSON.parse(data);
5536
- } catch {
5537
- return [];
6382
+ function parseSkillNodes(raw) {
6383
+ const cleaned = stripCodeFences2(raw);
6384
+ const parsed = JSON.parse(cleaned);
6385
+ if (!Array.isArray(parsed)) {
6386
+ throw new Error("Expected JSON array of skills from LLM response");
5538
6387
  }
6388
+ const skills = [];
6389
+ for (const item of parsed) {
6390
+ if (typeof item !== "object" || item === null || typeof item.name !== "string" || typeof item.content !== "string") {
6391
+ throw new Error(
6392
+ "Each skill must have a string 'name' and string 'content' field"
6393
+ );
6394
+ }
6395
+ skills.push({
6396
+ name: item.name,
6397
+ content: item.content
6398
+ });
6399
+ }
6400
+ return skills;
5539
6401
  }
5540
- async function saveUserRegistry(tools) {
5541
- await fs3.writeFile(getUserRegistryPath(), JSON.stringify(tools, null, 2), "utf-8");
5542
- }
5543
- async function loadRegistry() {
5544
- const bundled = await loadBundledRegistry();
5545
- const user = await loadUserRegistry();
5546
- if (user.length === 0) return bundled;
5547
- const merged = /* @__PURE__ */ new Map();
5548
- for (const tool of bundled) {
5549
- merged.set(tool.id, tool);
6402
+ async function generateSkills(_intent, _skeleton, task, config) {
6403
+ if (task.items.length === 0) {
6404
+ return { agent: "skill-writer", skills: [] };
5550
6405
  }
5551
- for (const tool of user) {
5552
- merged.set(tool.id, tool);
6406
+ const userMessage = `Generate SKILL.md content for the following skills:
6407
+
6408
+ ${task.items.map((name) => `- ${name}`).join("\n")}`;
6409
+ const raw = await callLLM(config, userMessage, {
6410
+ systemPrompt: SYSTEM_PROMPT4,
6411
+ cacheControl: true,
6412
+ maxTokens: task.max_tokens
6413
+ });
6414
+ const skills = parseSkillNodes(raw);
6415
+ return { agent: "skill-writer", skills };
6416
+ }
6417
+
6418
+ // src/compiler/agents/dispatch.ts
6419
+ async function dispatchAgent(task, config, intent, skeleton) {
6420
+ switch (task.agent) {
6421
+ case "sections-writer":
6422
+ return generateSections(intent, skeleton, task, config);
6423
+ case "rule-writer":
6424
+ return generateRules(intent, skeleton, task, config);
6425
+ case "doc-writer":
6426
+ return generateDocs(intent, skeleton, task, config);
6427
+ case "command-writer":
6428
+ return generateCommands(intent, skeleton, task, config);
6429
+ case "agent-writer":
6430
+ return generateAgents(intent, skeleton, task, config);
6431
+ case "skill-writer":
6432
+ return generateSkills(intent, skeleton, task, config);
6433
+ default:
6434
+ throw new Error(`Unknown agent: ${task.agent}`);
5553
6435
  }
5554
- return Array.from(merged.values());
5555
6436
  }
5556
6437
 
5557
6438
  // src/compiler/compile.ts
5558
- init_providers();
5559
- init_llm();
6439
+ init_renderer();
5560
6440
 
5561
6441
  // src/intent/patterns.ts
5562
6442
  var SYNONYM_MAP = {
@@ -5928,19 +6808,6 @@ ${registrySummary}
5928
6808
 
5929
6809
  Generate the skeleton JSON now.`;
5930
6810
  }
5931
- function buildHarnessMessage(intent, skeleton, concise) {
5932
- const skeletonJson = JSON.stringify(skeleton, null, 2);
5933
- const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
5934
- return `## User Intent
5935
-
5936
- ${intent}
5937
-
5938
- ## Project Skeleton
5939
-
5940
- ${skeletonJson}
5941
-
5942
- Generate the harness content JSON now.${conciseNote}`;
5943
- }
5944
6811
  function parseSkeletonResponse(text) {
5945
6812
  let cleaned = text.trim();
5946
6813
  if (cleaned.startsWith("```")) {
@@ -5962,29 +6829,8 @@ function parseSkeletonResponse(text) {
5962
6829
  );
5963
6830
  }
5964
6831
  }
5965
- function parseHarnessResponse(text) {
5966
- let cleaned = text.trim();
5967
- if (cleaned.startsWith("```")) {
5968
- cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
5969
- }
5970
- const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
5971
- if (!jsonMatch) {
5972
- throw new Error("Pass 2 (harness) did not return valid JSON.");
5973
- }
5974
- try {
5975
- const parsed = JSON.parse(jsonMatch[0]);
5976
- if (!parsed.claude_md || !parsed.commands) {
5977
- throw new Error("Harness missing required fields: claude_md, commands");
5978
- }
5979
- return parsed;
5980
- } catch (err) {
5981
- throw new Error(
5982
- `Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
5983
- );
5984
- }
5985
- }
5986
6832
  function buildSettings(skeleton, registry) {
5987
- const selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
6833
+ const _selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
5988
6834
  const allow = ["Read", "Write", "Edit", "Bash(npm run *)", "Bash(npx *)"];
5989
6835
  const deny = [
5990
6836
  "Bash(rm -rf *)",
@@ -6083,50 +6929,60 @@ async function compile(intent, onProgress) {
6083
6929
  detail: toolNames,
6084
6930
  elapsed: (Date.now() - startTime) / 1e3
6085
6931
  });
6086
- onProgress?.({ phase: "pass2", status: "running", message: "Pass 2: Generating CLAUDE.md, commands, agents..." });
6087
- const harnessMsg = buildHarnessMessage(intent, skeleton);
6088
- let harness;
6089
- try {
6090
- const harnessText = await callLLM(config, harnessMsg, {
6091
- maxTokens: 8192,
6092
- systemPrompt: HARNESS_PROMPT
6093
- });
6094
- harness = parseHarnessResponse(harnessText);
6095
- } catch {
6096
- onProgress?.({ phase: "pass2-retry", status: "warning", message: "Pass 2: Response too large, retrying in concise mode..." });
6097
- const retryMsg = buildHarnessMessage(intent, skeleton, true);
6098
- const retryText = await callLLM(config, retryMsg, {
6099
- maxTokens: 8192,
6100
- systemPrompt: HARNESS_PROMPT
6101
- });
6102
- harness = parseHarnessResponse(retryText);
6103
- }
6104
- const cmdCount = Object.keys(harness.commands).length;
6105
- const agentCount = Object.keys(harness.agents ?? {}).length;
6106
- const ruleCount = Object.keys(harness.rules).length;
6932
+ onProgress?.({ phase: "plan", status: "running", message: "Pass 2: Planning compilation..." });
6933
+ const plan = await generatePlan(intent, skeleton, config);
6934
+ const agentCount = plan.phases.reduce((sum, p) => sum + p.agents.length, 0);
6107
6935
  onProgress?.({
6108
- phase: "pass2",
6936
+ phase: "plan",
6109
6937
  status: "success",
6110
- message: `Pass 2: Generated ${cmdCount} commands, ${agentCount} agents, ${ruleCount} rules`,
6938
+ message: `Pass 2: Compilation plan \u2014 ${agentCount} agents across ${plan.phases.length} phases`,
6111
6939
  elapsed: (Date.now() - startTime) / 1e3
6112
6940
  });
6113
- onProgress?.({ phase: "pass3", status: "running", message: "Pass 3: Configuring MCP servers & settings..." });
6941
+ const concurrency = config.auth_type === "claude-code-oauth" ? 2 : 3;
6942
+ const executeAgent = (task) => dispatchAgent(task, config, intent, skeleton);
6943
+ const batchProgress = (bp) => {
6944
+ if (bp.status === "start") {
6945
+ const phaseLabel = bp.phaseId;
6946
+ onProgress?.({ phase: phaseLabel, status: "running", message: `Pass 3 (${bp.phaseId}): Running ${bp.agentCount} agents...` });
6947
+ } else if (bp.status === "complete") {
6948
+ const phaseLabel = bp.phaseId;
6949
+ onProgress?.({ phase: phaseLabel, status: "success", message: `Pass 3 (${bp.phaseId}): Complete`, elapsed: (Date.now() - startTime) / 1e3 });
6950
+ }
6951
+ };
6952
+ const rawIR = await executePlan(plan, executeAgent, concurrency, batchProgress);
6953
+ onProgress?.({ phase: "phase-c", status: "running", message: "Pass 3c: Cross-reference validation..." });
6954
+ const { ir: linkedIR, report } = linkHarness(rawIR);
6955
+ const ir = linkedIR;
6956
+ if (report.warnings.length > 0) {
6957
+ for (const w of report.warnings) {
6958
+ onProgress?.({ phase: "phase-c", status: "warning", message: `\u26A0 ${w}` });
6959
+ }
6960
+ }
6961
+ onProgress?.({ phase: "phase-c", status: "success", message: "Pass 3c: Cross-reference validation", elapsed: (Date.now() - startTime) / 1e3 });
6962
+ onProgress?.({ phase: "assembly", status: "running", message: "Pass 4: Configuring MCP servers & settings..." });
6114
6963
  const settings = buildSettings(skeleton, registry);
6115
6964
  const mcpConfig = buildMcpConfig(skeleton, registry);
6965
+ const commandsRecord = {};
6966
+ for (const cmd of ir.commands) {
6967
+ commandsRecord[cmd.name] = cmd.content;
6968
+ }
6969
+ const agentsRecord = {};
6970
+ for (const agent of ir.agents) {
6971
+ agentsRecord[agent.name] = agent.content;
6972
+ }
6116
6973
  const projectProfile = {
6117
6974
  language: skeleton.outline.tech_stack[0] ?? "unknown",
6118
6975
  framework: skeleton.outline.tech_stack[1] ?? "none",
6119
6976
  scripts: {}
6120
- // scripts come from project scanning, not compilation
6121
6977
  };
6122
6978
  const intentPatterns = generateIntentPatterns(
6123
- harness.commands,
6124
- harness.agents ?? {},
6979
+ commandsRecord,
6980
+ agentsRecord,
6125
6981
  projectProfile
6126
6982
  );
6127
6983
  const intentPromptTemplate = compileIntentPrompt(
6128
- harness.commands,
6129
- harness.agents ?? {}
6984
+ commandsRecord,
6985
+ agentsRecord
6130
6986
  );
6131
6987
  const generationTimestamp = (/* @__PURE__ */ new Date()).toISOString();
6132
6988
  const intentHooks = {};
@@ -6134,7 +6990,27 @@ async function compile(intent, onProgress) {
6134
6990
  intentHooks["intent-router"] = renderIntentRouter(intentPatterns, generationTimestamp);
6135
6991
  intentHooks["intent-learner"] = renderIntentLearner();
6136
6992
  }
6137
- onProgress?.({ phase: "pass3", status: "success", message: "Pass 3: Configured MCP servers & settings" });
6993
+ onProgress?.({ phase: "assembly", status: "success", message: "Pass 4: Configured MCP servers & settings" });
6994
+ const commands = {};
6995
+ for (const cmd of ir.commands) {
6996
+ commands[cmd.name] = cmd.content;
6997
+ }
6998
+ const rules = {};
6999
+ for (const rule of ir.rules) {
7000
+ rules[rule.name] = rule.content;
7001
+ }
7002
+ const agents = {};
7003
+ for (const agent of ir.agents) {
7004
+ agents[agent.name] = agent.content;
7005
+ }
7006
+ const skills = {};
7007
+ for (const skill of ir.skills) {
7008
+ skills[skill.name] = skill.content;
7009
+ }
7010
+ const docs = {};
7011
+ for (const doc of ir.docs) {
7012
+ docs[doc.name] = doc.content;
7013
+ }
6138
7014
  const spec = {
6139
7015
  id: `env_${crypto.randomUUID()}`,
6140
7016
  intent,
@@ -6143,15 +7019,16 @@ async function compile(intent, onProgress) {
6143
7019
  description: skeleton.description,
6144
7020
  autonomy_level: 1,
6145
7021
  tools: skeleton.tools,
7022
+ ir,
6146
7023
  harness: {
6147
- claude_md: harness.claude_md,
7024
+ claude_md: renderClaudeMd(ir.meta, ir.sections),
6148
7025
  settings,
6149
7026
  mcp_config: mcpConfig,
6150
- commands: harness.commands,
6151
- rules: harness.rules,
6152
- skills: harness.skills ?? {},
6153
- agents: harness.agents ?? {},
6154
- docs: harness.docs,
7027
+ commands,
7028
+ rules,
7029
+ skills,
7030
+ agents,
7031
+ docs,
6155
7032
  hooks: intentHooks,
6156
7033
  intent_patterns: intentPatterns,
6157
7034
  intent_prompt_template: intentPromptTemplate
@@ -6535,6 +7412,9 @@ function applyAutonomyLevel(spec) {
6535
7412
  const agents = spec.harness.agents ?? {};
6536
7413
  const docs = spec.harness.docs ?? {};
6537
7414
  const settings = spec.harness.settings ?? {};
7415
+ if (!("persistence_routing" in settings)) {
7416
+ settings.persistence_routing = level >= 3 ? "auto" : "manual";
7417
+ }
6538
7418
  if (level >= 1) {
6539
7419
  if (!("tour" in commands)) {
6540
7420
  commands.tour = TOUR_COMMAND;
@@ -6606,6 +7486,85 @@ var ENV_LOADER_HOOK = {
6606
7486
  command: 'if [ -f .env ] && [ -n "$CLAUDE_ENV_FILE" ]; then grep -v "^#" .env | grep -v "^$" | grep "=" >> "$CLAUDE_ENV_FILE"; fi'
6607
7487
  }]
6608
7488
  };
7489
+ var PERSIST_ROUTER_TEMPLATE = `import { readFileSync } from 'fs';
7490
+
7491
+ const input = JSON.parse(readFileSync('/dev/stdin', 'utf8'));
7492
+ const prompt = (input.prompt ?? '').trim();
7493
+
7494
+ // Pass-through patterns (fast exit)
7495
+ const PASSTHROUGH = /^(what|how|why|where|when|can you|does|is |show me|find |search |list |\\/project:)/i;
7496
+ const SINGLE_FILE = /^(edit|fix the typo|update the comment|change the|rename) .{3,60}$/i;
7497
+
7498
+ if (PASSTHROUGH.test(prompt) || SINGLE_FILE.test(prompt) || prompt.length < 20) {
7499
+ process.stdout.write(JSON.stringify({ continue: true }));
7500
+ process.exit(0);
7501
+ }
7502
+
7503
+ // Check config for routing mode
7504
+ let routingMode = 'auto';
7505
+ try {
7506
+ const settings = JSON.parse(readFileSync('.claude/settings.json', 'utf8'));
7507
+ routingMode = settings.persistence_routing ?? 'auto';
7508
+ } catch { /* default to auto */ }
7509
+
7510
+ if (routingMode === 'off') {
7511
+ process.stdout.write(JSON.stringify({ continue: true }));
7512
+ process.exit(0);
7513
+ }
7514
+
7515
+ // Complexity signals
7516
+ const signals = [];
7517
+
7518
+ if (/\\b(then|after that|and also|next|finally|step \\d|first .* then)\\b/i.test(prompt)) {
7519
+ signals.push('multi-step');
7520
+ }
7521
+ if (/\\b(add|implement|build|create|integrate|set up)\\b.*\\b(feature|auth|api|endpoint|page|component|module|service|database|migration)\\b/i.test(prompt)) {
7522
+ signals.push('feature-scope');
7523
+ }
7524
+ if (/\\b(migrate|convert|replace|upgrade|refactor|rewrite|restructure)\\b/i.test(prompt)) {
7525
+ signals.push('refactor-scope');
7526
+ }
7527
+ if (/\\b(when .* happens|steps to reproduce|broken|crash|regression|fails when)\\b/i.test(prompt)) {
7528
+ signals.push('bug-with-repro');
7529
+ }
7530
+ if (/\\b(persist|keep working|don't stop|until done|until .* pass)\\b/i.test(prompt)) {
7531
+ signals.push('explicit');
7532
+ }
7533
+ if (prompt.split(/\\s+/).length > 50) {
7534
+ signals.push('long-prompt');
7535
+ }
7536
+
7537
+ const shouldRoute = routingMode === 'manual'
7538
+ ? signals.includes('explicit')
7539
+ : signals.length >= 2 || signals.includes('explicit');
7540
+
7541
+ if (shouldRoute) {
7542
+ process.stdout.write(JSON.stringify({
7543
+ continue: true,
7544
+ hookSpecificOutput: {
7545
+ hookEventName: 'UserPromptSubmit',
7546
+ additionalContext: [
7547
+ 'PERSISTENCE ROUTING: This task has complexity signals (' + signals.join(', ') + ').',
7548
+ 'Execute this using the /project:persist workflow:',
7549
+ '1. Ensure acceptance criteria exist in docs/SPRINT.md (create from this prompt if needed)',
7550
+ '2. Initialize .claude/progress.json',
7551
+ '3. Work criterion-by-criterion until all pass',
7552
+ '4. Run review gate before marking complete',
7553
+ ].join('\\n'),
7554
+ },
7555
+ }));
7556
+ } else {
7557
+ process.stdout.write(JSON.stringify({ continue: true }));
7558
+ }
7559
+ `;
7560
+ var PERSIST_ROUTER_HOOK = {
7561
+ matcher: "",
7562
+ hooks: [{
7563
+ type: "command",
7564
+ command: 'node "$CLAUDE_PROJECT_DIR/.claude/hooks/persist-router.mjs"',
7565
+ timeout: 5
7566
+ }]
7567
+ };
6609
7568
  function resolveSettings(spec, options) {
6610
7569
  const settings = spec.harness.settings;
6611
7570
  const base = settings && Object.keys(settings).length > 0 ? { ...settings } : {};
@@ -6619,6 +7578,13 @@ function resolveSettings(spec, options) {
6619
7578
  hooks.SessionStart = sessionStart;
6620
7579
  base.hooks = hooks;
6621
7580
  }
7581
+ if (isCodeProject(spec) && (spec.autonomy_level ?? 1) >= 3) {
7582
+ const hooks = base.hooks ?? {};
7583
+ const userPromptSubmit = hooks.UserPromptSubmit ?? [];
7584
+ userPromptSubmit.push(PERSIST_ROUTER_HOOK);
7585
+ hooks.UserPromptSubmit = userPromptSubmit;
7586
+ base.hooks = hooks;
7587
+ }
6622
7588
  const hasIntentHooks = spec.harness.hooks && Object.keys(spec.harness.hooks).length > 0;
6623
7589
  if (hasIntentHooks) {
6624
7590
  const hooks = base.hooks ?? {};
@@ -6710,6 +7676,9 @@ function buildFileMap(spec, options) {
6710
7676
  files.set(".claude/hooks/intent-log.jsonl", "");
6711
7677
  }
6712
7678
  }
7679
+ if (isCodeProject(spec) && (spec.autonomy_level ?? 1) >= 3) {
7680
+ files.set(".claude/hooks/persist-router.mjs", PERSIST_ROUTER_TEMPLATE);
7681
+ }
6713
7682
  return files;
6714
7683
  }
6715
7684
  async function writeEnvironment(spec, targetDir, options) {
@@ -6780,6 +7749,11 @@ async function writeEnvironment(spec, targetDir, options) {
6780
7749
  written.push(".claude/hooks/intent-log.jsonl");
6781
7750
  }
6782
7751
  }
7752
+ if (isCodeProject(spec) && (spec.autonomy_level ?? 1) >= 3) {
7753
+ const p = path5.join(claudeDir, "hooks", "persist-router.mjs");
7754
+ await writeFile(p, PERSIST_ROUTER_TEMPLATE);
7755
+ written.push(".claude/hooks/persist-router.mjs");
7756
+ }
6783
7757
  return written;
6784
7758
  }
6785
7759
  function summarizeSpec(spec, registry) {
@@ -6802,12 +7776,20 @@ function summarizeSpec(spec, registry) {
6802
7776
  }
6803
7777
  }
6804
7778
  }
6805
- return {
6806
- toolCount: spec.tools.length,
7779
+ const counts = spec.ir ? {
7780
+ commandCount: spec.ir.commands.length,
7781
+ ruleCount: spec.ir.rules.length,
7782
+ skillCount: spec.ir.skills.length,
7783
+ agentCount: spec.ir.agents.length
7784
+ } : {
6807
7785
  commandCount: Object.keys(spec.harness.commands || {}).length,
6808
7786
  ruleCount: Object.keys(spec.harness.rules || {}).length,
6809
7787
  skillCount: Object.keys(spec.harness.skills || {}).length,
6810
- agentCount: Object.keys(spec.harness.agents || {}).length,
7788
+ agentCount: Object.keys(spec.harness.agents || {}).length
7789
+ };
7790
+ return {
7791
+ toolCount: spec.tools.length,
7792
+ ...counts,
6811
7793
  pluginCommands,
6812
7794
  envSetup
6813
7795
  };
@@ -8478,14 +9460,20 @@ var EVAL_TEMPLATES = {
8478
9460
  name: "Intent Routing",
8479
9461
  description: "Test that natural language prompts route to the correct workflow command via intent hooks",
8480
9462
  bestFor: ["feature-development", "full-stack", "api-building"]
9463
+ },
9464
+ "persistence-completion": {
9465
+ id: "persistence-completion",
9466
+ name: "Persistence Completion",
9467
+ description: "Can the agent complete a multi-criterion task using the persistence loop?",
9468
+ bestFor: ["feature-development", "full-stack", "api-building", "maintenance"]
8481
9469
  }
8482
9470
  };
8483
9471
  function selectTemplatesForWorkflow(workflowType) {
8484
9472
  const mapping = {
8485
- "feature-development": ["add-feature", "test-writing", "convention-adherence", "workflow-compliance", "intent-routing"],
8486
- "api-building": ["add-feature", "fix-bug", "test-writing", "convention-adherence"],
8487
- "full-stack": ["add-feature", "fix-bug", "test-writing", "convention-adherence"],
8488
- "maintenance": ["fix-bug", "refactor", "test-writing", "rule-compliance"],
9473
+ "feature-development": ["add-feature", "test-writing", "convention-adherence", "workflow-compliance", "intent-routing", "persistence-completion"],
9474
+ "api-building": ["add-feature", "fix-bug", "test-writing", "convention-adherence", "persistence-completion"],
9475
+ "full-stack": ["add-feature", "fix-bug", "test-writing", "convention-adherence", "persistence-completion"],
9476
+ "maintenance": ["fix-bug", "refactor", "test-writing", "rule-compliance", "persistence-completion"],
8489
9477
  "debugging": ["fix-bug", "test-writing", "rule-compliance"],
8490
9478
  "qa": ["fix-bug", "test-writing", "add-feature", "workflow-compliance"],
8491
9479
  "architecture": ["refactor", "test-writing", "config-change", "convention-adherence"],
@@ -8506,6 +9494,7 @@ IMPORTANT: For harness-aware templates (convention-adherence, workflow-complianc
8506
9494
  - convention-adherence: Task must require following specific conventions from CLAUDE.md (naming, file structure, patterns). Judge by whether output matches the conventions.
8507
9495
  - workflow-compliance: Task must require using project slash commands or workflow steps defined in .claude/commands/. Judge by whether the agent followed the defined workflow.
8508
9496
  - rule-compliance: Task must create a scenario where .claude/rules/ content is relevant. Judge by whether the agent respected all rules.
9497
+ - persistence-completion: Task MUST have 3+ acceptance criteria that require sequential implementation. The task description should be a realistic feature request \u2014 the agent must parse it into criteria. Judge by: (a) all criteria met (progress.json status: complete), (b) structured tracking used (progress.json exists with 3+ criteria), (c) tests pass, (d) review gate executed (progress.json review field present).
8509
9498
 
8510
9499
  These harness-aware tasks are critical \u2014 they test whether the .claude/ environment actually improves agent behavior.
8511
9500