kairn-cli 1.10.0 → 1.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +341 -58
- package/dist/cli.js.map +1 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -460,17 +460,72 @@ import Anthropic2 from "@anthropic-ai/sdk";
|
|
|
460
460
|
import OpenAI2 from "openai";
|
|
461
461
|
|
|
462
462
|
// src/compiler/prompt.ts
|
|
463
|
-
var
|
|
463
|
+
var SKELETON_PROMPT = `You are the Kairn skeleton compiler. Your job is to select tools and outline the project structure from a user's natural language description.
|
|
464
464
|
|
|
465
465
|
You will receive:
|
|
466
466
|
1. The user's intent (what they want to build/do)
|
|
467
467
|
2. A tool registry (available MCP servers, plugins, and hooks)
|
|
468
468
|
|
|
469
|
-
You must output a JSON object matching the
|
|
469
|
+
You must output a JSON object matching the SkeletonSpec schema.
|
|
470
470
|
|
|
471
471
|
## Core Principles
|
|
472
472
|
|
|
473
473
|
- **Minimalism over completeness.** Fewer, well-chosen tools beat many generic ones. Each MCP server costs 500-2000 context tokens.
|
|
474
|
+
- **Workflow-specific, not generic.** Select tools that directly support the user's actual workflow.
|
|
475
|
+
- **Security by default.** Essential for all projects.
|
|
476
|
+
|
|
477
|
+
## Tool Selection Rules
|
|
478
|
+
|
|
479
|
+
- Only select tools directly relevant to the described workflow
|
|
480
|
+
- Prefer free tools (auth: "none") when quality is comparable
|
|
481
|
+
- Tier 1 tools (Context7, Sequential Thinking, security-guidance) should be included in most environments
|
|
482
|
+
- For tools requiring API keys (auth: "api_key"), use \${ENV_VAR} syntax \u2014 never hardcode keys
|
|
483
|
+
- Maximum 6-8 MCP servers to avoid context bloat
|
|
484
|
+
- Include a \`reason\` for each selected tool explaining why it fits this workflow
|
|
485
|
+
|
|
486
|
+
## Context Budget (STRICT)
|
|
487
|
+
|
|
488
|
+
- MCP servers: maximum 6. Prefer fewer.
|
|
489
|
+
- Skills: maximum 3. Only include directly relevant ones.
|
|
490
|
+
- Agents: maximum 3. QA pipeline + one specialist.
|
|
491
|
+
- Hooks: maximum 4 (auto-format, block-destructive, PostCompact, plus one contextual).
|
|
492
|
+
|
|
493
|
+
If the workflow doesn't clearly need a tool, DO NOT include it.
|
|
494
|
+
Each MCP server costs 500-2000 tokens of context window.
|
|
495
|
+
|
|
496
|
+
## Output Schema
|
|
497
|
+
|
|
498
|
+
Return ONLY valid JSON matching this structure:
|
|
499
|
+
|
|
500
|
+
\`\`\`json
|
|
501
|
+
{
|
|
502
|
+
"name": "short-kebab-case-name",
|
|
503
|
+
"description": "One-line description",
|
|
504
|
+
"tools": [
|
|
505
|
+
{ "tool_id": "id-from-registry", "reason": "why this tool fits" }
|
|
506
|
+
],
|
|
507
|
+
"outline": {
|
|
508
|
+
"tech_stack": ["Python", "pandas"],
|
|
509
|
+
"workflow_type": "data-analysis",
|
|
510
|
+
"key_commands": ["ingest", "analyze", "report"],
|
|
511
|
+
"custom_rules": ["data-integrity"],
|
|
512
|
+
"custom_agents": ["data-reviewer"],
|
|
513
|
+
"custom_skills": ["ms-data-analysis"]
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
\`\`\`
|
|
517
|
+
|
|
518
|
+
Return ONLY valid JSON. No markdown fences. No text outside the JSON.`;
|
|
519
|
+
var HARNESS_PROMPT = `You are the Kairn harness compiler. Your job is to generate the full environment content from a project skeleton.
|
|
520
|
+
|
|
521
|
+
You will receive:
|
|
522
|
+
1. The skeleton (tool selections + project outline)
|
|
523
|
+
2. The user's original intent
|
|
524
|
+
|
|
525
|
+
You must generate all harness content: CLAUDE.md, commands, rules, agents, skills, and docs.
|
|
526
|
+
|
|
527
|
+
## Core Principles
|
|
528
|
+
|
|
474
529
|
- **Workflow-specific, not generic.** Every instruction, command, and rule must relate to the user's actual workflow.
|
|
475
530
|
- **Concise CLAUDE.md.** Under 120 lines. No generic text like "be helpful." Include build/test commands, reference docs/ and skills/.
|
|
476
531
|
- **Security by default.** Always include deny rules for destructive commands and secret file access.
|
|
@@ -656,28 +711,6 @@ All projects should include a PostCompact hook to restore context after compacti
|
|
|
656
711
|
|
|
657
712
|
Merge this into the settings hooks alongside the PreToolUse and PostToolUse hooks.
|
|
658
713
|
|
|
659
|
-
## Tool Selection Rules
|
|
660
|
-
|
|
661
|
-
- Only select tools directly relevant to the described workflow
|
|
662
|
-
- Prefer free tools (auth: "none") when quality is comparable
|
|
663
|
-
- Tier 1 tools (Context7, Sequential Thinking, security-guidance) should be included in most environments
|
|
664
|
-
- For tools requiring API keys (auth: "api_key"), use \${ENV_VAR} syntax \u2014 never hardcode keys
|
|
665
|
-
- Maximum 6-8 MCP servers to avoid context bloat
|
|
666
|
-
- Include a \`reason\` for each selected tool explaining why it fits this workflow
|
|
667
|
-
|
|
668
|
-
## Context Budget (STRICT)
|
|
669
|
-
|
|
670
|
-
- MCP servers: maximum 6. Prefer fewer.
|
|
671
|
-
- CLAUDE.md: maximum 120 lines.
|
|
672
|
-
- Rules: maximum 5 files, each under 20 lines.
|
|
673
|
-
- Skills: maximum 3. Only include directly relevant ones.
|
|
674
|
-
- Agents: maximum 3. QA pipeline + one specialist.
|
|
675
|
-
- Commands: no limit (loaded on demand, zero context cost).
|
|
676
|
-
- Hooks: maximum 4 (auto-format, block-destructive, PostCompact, plus one contextual).
|
|
677
|
-
|
|
678
|
-
If the workflow doesn't clearly need a tool, DO NOT include it.
|
|
679
|
-
Each MCP server costs 500-2000 tokens of context window.
|
|
680
|
-
|
|
681
714
|
## For Code Projects, Additionally Include
|
|
682
715
|
|
|
683
716
|
- \`/project:plan\` command (plan before coding)
|
|
@@ -741,6 +774,133 @@ If no autonomy level is specified, assume Level 1 (Guided).
|
|
|
741
774
|
|
|
742
775
|
Return ONLY valid JSON matching this structure:
|
|
743
776
|
|
|
777
|
+
\`\`\`json
|
|
778
|
+
{
|
|
779
|
+
"claude_md": "Full CLAUDE.md content (under 120 lines)",
|
|
780
|
+
"commands": { "help": "...", "tasks": "...", "status": "...", "fix": "...", "sprint": "...", "spec": "...", "prove": "...", "grill": "...", "reset": "..." },
|
|
781
|
+
"rules": { "continuity": "...", "security": "..." },
|
|
782
|
+
"agents": { "qa-orchestrator": "...", "linter": "...", "e2e-tester": "..." },
|
|
783
|
+
"skills": { "skill-name/SKILL": "..." },
|
|
784
|
+
"docs": { "TODO": "...", "DECISIONS": "...", "LEARNINGS": "...", "SPRINT": "..." }
|
|
785
|
+
}
|
|
786
|
+
\`\`\`
|
|
787
|
+
|
|
788
|
+
Return ONLY valid JSON. No markdown fences. No text outside the JSON.`;
|
|
789
|
+
var SYSTEM_PROMPT = `You are the Kairn environment compiler. Your job is to generate a minimal, optimal Claude Code agent environment from a user's natural language description of what they want their agent to do.
|
|
790
|
+
|
|
791
|
+
You will receive:
|
|
792
|
+
1. The user's intent (what they want to build/do)
|
|
793
|
+
2. A tool registry (available MCP servers, plugins, and hooks)
|
|
794
|
+
|
|
795
|
+
You must output a JSON object matching the EnvironmentSpec schema.
|
|
796
|
+
|
|
797
|
+
## Core Principles
|
|
798
|
+
|
|
799
|
+
- **Minimalism over completeness.** Fewer, well-chosen tools beat many generic ones. Each MCP server costs 500-2000 context tokens.
|
|
800
|
+
- **Workflow-specific, not generic.** Every instruction, command, and rule must relate to the user's actual workflow.
|
|
801
|
+
- **Concise CLAUDE.md.** Under 120 lines. No generic text like "be helpful." Include build/test commands, reference docs/ and skills/.
|
|
802
|
+
- **Security by default.** Always include deny rules for destructive commands and secret file access.
|
|
803
|
+
|
|
804
|
+
## CLAUDE.md Template (mandatory structure)
|
|
805
|
+
|
|
806
|
+
The \`claude_md\` field MUST follow this exact structure (max 120 lines):
|
|
807
|
+
|
|
808
|
+
\`\`\`
|
|
809
|
+
# {Project Name}
|
|
810
|
+
|
|
811
|
+
## Purpose
|
|
812
|
+
{one-line description}
|
|
813
|
+
|
|
814
|
+
## Tech Stack
|
|
815
|
+
{bullet list of frameworks/languages}
|
|
816
|
+
|
|
817
|
+
## Commands
|
|
818
|
+
{concrete build/test/lint/dev commands}
|
|
819
|
+
|
|
820
|
+
## Architecture
|
|
821
|
+
{brief folder structure, max 10 lines}
|
|
822
|
+
|
|
823
|
+
## Conventions
|
|
824
|
+
{3-5 specific coding rules}
|
|
825
|
+
|
|
826
|
+
## Key Commands
|
|
827
|
+
{list /project: commands with descriptions}
|
|
828
|
+
|
|
829
|
+
## Output
|
|
830
|
+
{where results go, key files}
|
|
831
|
+
|
|
832
|
+
## Verification
|
|
833
|
+
After implementing any change, verify it works:
|
|
834
|
+
- {build command} \u2014 must pass with no errors
|
|
835
|
+
- {test command} \u2014 all tests must pass
|
|
836
|
+
- {lint command} \u2014 no warnings or errors
|
|
837
|
+
- {type check command} \u2014 no type errors
|
|
838
|
+
|
|
839
|
+
If any verification step fails, fix the issue before moving on.
|
|
840
|
+
Do NOT skip verification steps.
|
|
841
|
+
|
|
842
|
+
## Known Gotchas
|
|
843
|
+
<!-- After any correction, add it here: "Update CLAUDE.md so you don't make that mistake again." -->
|
|
844
|
+
<!-- Prune this section when it exceeds 10 items \u2014 keep only the recurring ones. -->
|
|
845
|
+
- (none yet \u2014 this section grows as you work)
|
|
846
|
+
|
|
847
|
+
## Debugging
|
|
848
|
+
When debugging, paste raw error output. Don't summarize \u2014 Claude works better with raw data.
|
|
849
|
+
Use subagents for deep investigation to keep main context clean.
|
|
850
|
+
|
|
851
|
+
## Git Workflow
|
|
852
|
+
- Prefer small, focused commits (one feature or fix per commit)
|
|
853
|
+
- Use conventional commits: feat:, fix:, docs:, refactor:, test:
|
|
854
|
+
- Target < 200 lines per PR when possible
|
|
855
|
+
\`\`\`
|
|
856
|
+
|
|
857
|
+
Do not add generic filler. Every line must be specific to the user's workflow.
|
|
858
|
+
|
|
859
|
+
## What You Must Always Include
|
|
860
|
+
|
|
861
|
+
1. A concise, workflow-specific \`claude_md\` (the CLAUDE.md content)
|
|
862
|
+
2. A \`/project:help\` command that explains the environment
|
|
863
|
+
3. A \`/project:tasks\` command for task management via TODO.md
|
|
864
|
+
4. A \`docs/TODO.md\` file for continuity
|
|
865
|
+
5. A \`docs/DECISIONS.md\` file for architectural decisions
|
|
866
|
+
6. A \`docs/LEARNINGS.md\` file for non-obvious discoveries
|
|
867
|
+
7. A \`rules/continuity.md\` rule encouraging updates to DECISIONS.md and LEARNINGS.md
|
|
868
|
+
8. A \`rules/security.md\` rule with essential security instructions
|
|
869
|
+
9. settings.json with deny rules for \`rm -rf\`, \`curl|sh\`, reading \`.env\` and \`secrets/\`
|
|
870
|
+
10. A \`/project:status\` command for code projects (uses ! for live git/test output)
|
|
871
|
+
11. A \`/project:fix\` command for code projects (uses $ARGUMENTS for issue number)
|
|
872
|
+
12. A \`docs/SPRINT.md\` file for sprint contracts (acceptance criteria, verification steps)
|
|
873
|
+
13. A "Verification" section in CLAUDE.md with concrete verify commands for the project
|
|
874
|
+
14. A "Known Gotchas" section in CLAUDE.md (starts empty, grows with corrections)
|
|
875
|
+
15. A "Debugging" section in CLAUDE.md (2 lines: paste raw errors, use subagents)
|
|
876
|
+
16. A "Git Workflow" section in CLAUDE.md (3 rules: small commits, conventional format, <200 lines PR)
|
|
877
|
+
|
|
878
|
+
## Tool Selection Rules
|
|
879
|
+
|
|
880
|
+
- Only select tools directly relevant to the described workflow
|
|
881
|
+
- Prefer free tools (auth: "none") when quality is comparable
|
|
882
|
+
- Tier 1 tools (Context7, Sequential Thinking, security-guidance) should be included in most environments
|
|
883
|
+
- For tools requiring API keys (auth: "api_key"), use \${ENV_VAR} syntax \u2014 never hardcode keys
|
|
884
|
+
- Maximum 6-8 MCP servers to avoid context bloat
|
|
885
|
+
- Include a \`reason\` for each selected tool explaining why it fits this workflow
|
|
886
|
+
|
|
887
|
+
## Context Budget (STRICT)
|
|
888
|
+
|
|
889
|
+
- MCP servers: maximum 6. Prefer fewer.
|
|
890
|
+
- CLAUDE.md: maximum 120 lines.
|
|
891
|
+
- Rules: maximum 5 files, each under 20 lines.
|
|
892
|
+
- Skills: maximum 3. Only include directly relevant ones.
|
|
893
|
+
- Agents: maximum 3. QA pipeline + one specialist.
|
|
894
|
+
- Commands: no limit (loaded on demand, zero context cost).
|
|
895
|
+
- Hooks: maximum 4 (auto-format, block-destructive, PostCompact, plus one contextual).
|
|
896
|
+
|
|
897
|
+
If the workflow doesn't clearly need a tool, DO NOT include it.
|
|
898
|
+
Each MCP server costs 500-2000 tokens of context window.
|
|
899
|
+
|
|
900
|
+
## Output Schema
|
|
901
|
+
|
|
902
|
+
Return ONLY valid JSON matching this structure:
|
|
903
|
+
|
|
744
904
|
\`\`\`json
|
|
745
905
|
{
|
|
746
906
|
"name": "short-kebab-case-name",
|
|
@@ -761,14 +921,7 @@ Return ONLY valid JSON matching this structure:
|
|
|
761
921
|
},
|
|
762
922
|
"commands": {
|
|
763
923
|
"help": "markdown content for /project:help",
|
|
764
|
-
"tasks": "markdown content for /project:tasks"
|
|
765
|
-
"status": "Show project status:\\n\\n!git status --short\\n\\n!git log --oneline -5\\n\\nRead TODO.md and summarize progress.",
|
|
766
|
-
"fix": "Fix issue #$ARGUMENTS:\\n\\n1. Read the issue and understand the problem\\n2. Plan the fix\\n3. Implement the fix\\n4. Run tests:\\n\\n!npm test 2>&1 | tail -20\\n\\n5. Commit with: fix: resolve #$ARGUMENTS",
|
|
767
|
-
"sprint": "Define a sprint contract for the next feature:\\n\\n1. Read docs/TODO.md for context:\\n\\n!cat docs/TODO.md 2>/dev/null\\n\\n2. Write a CONTRACT to docs/SPRINT.md with: feature name, acceptance criteria, verification steps, files to modify, scope estimate.\\n3. Do NOT start coding until contract is confirmed.",
|
|
768
|
-
"spec": "Before building this feature, interview me to create a complete spec.\\n\\nAsk me 5-8 questions, one at a time:\\n1. What specifically should this feature do?\\n2. Who uses it and how?\\n3. What are the edge cases or error states?\\n4. How will we know it works? (acceptance criteria)\\n5. What should it explicitly NOT do? (scope boundaries)\\n6. Any dependencies, APIs, or constraints?\\n7. How does it fit with existing code?\\n8. Priority: speed, quality, or flexibility?\\n\\nAfter my answers, write a structured spec to docs/SPRINT.md:\\n- Feature name\\n- Description (from my answers, not invented)\\n- Acceptance criteria (testable)\\n- Out of scope\\n- Technical approach\\n\\nDo NOT start coding until I confirm the spec.",
|
|
769
|
-
"prove": "Prove the current implementation works.\\n\\n1. Run the full test suite:\\n\\n!npm test 2>&1\\n\\n2. Compare against main:\\n\\n!git diff main --stat 2>/dev/null\\n\\n3. Show evidence:\\n - Test results (pass/fail counts)\\n - Behavioral diff (main vs this branch)\\n - Edge cases tested\\n - Error handling verified\\n\\n4. Rate confidence:\\n - HIGH: All tests pass, edge cases covered, no regressions\\n - MEDIUM: Core works, some edges untested\\n - LOW: Needs more verification\\n\\nIf LOW or MEDIUM, explain what's missing and fix it.",
|
|
770
|
-
"grill": "Review the current changes adversarially.\\n\\n!git diff --staged 2>/dev/null || git diff HEAD 2>/dev/null\\n\\nAct as a senior engineer. For each file changed:\\n\\n1. \\"Why this approach over X?\\"\\n2. \\"What happens if Y input?\\"\\n3. \\"Performance impact of Z?\\"\\n4. \\"This could break if...\\"\\n\\nFor each concern:\\n- Severity: BLOCKER / SHOULD-FIX / NITPICK\\n- The exact scenario that could fail\\n- A suggested alternative\\n\\nDo NOT approve until all BLOCKERs are resolved.",
|
|
771
|
-
"reset": "Stop. Read docs/DECISIONS.md and docs/LEARNINGS.md.\\n\\nConsidering everything we've learned:\\n1. What was the original approach?\\n2. What went wrong or feels inelegant?\\n3. What would the clean solution look like?\\n\\nPropose the new approach. Do NOT implement yet.\\nIf I approve, stash current changes:\\n git stash -m \\"pre-reset: $(date +%Y%m%d-%H%M)\\"\\n\\nThen implement the elegant solution."
|
|
924
|
+
"tasks": "markdown content for /project:tasks"
|
|
772
925
|
},
|
|
773
926
|
"rules": {
|
|
774
927
|
"continuity": "markdown content for continuity rule",
|
|
@@ -778,15 +931,13 @@ Return ONLY valid JSON matching this structure:
|
|
|
778
931
|
"skill-name/SKILL": "markdown content with YAML frontmatter"
|
|
779
932
|
},
|
|
780
933
|
"agents": {
|
|
781
|
-
"qa-orchestrator": "
|
|
782
|
-
"linter": "---\\nname: linter\\ndescription: Fast static analysis\\nmodel: haiku\\n---\\nRun available linters (eslint, prettier, biome, ruff, mypy, semgrep). Report issues.",
|
|
783
|
-
"e2e-tester": "---\\nname: e2e-tester\\ndescription: Browser-based QA via Playwright\\nmodel: sonnet\\n---\\nTest user flows via Playwright. Verify behavior, not just DOM. Screenshot failures."
|
|
934
|
+
"qa-orchestrator": "agent markdown with YAML frontmatter"
|
|
784
935
|
},
|
|
785
936
|
"docs": {
|
|
786
|
-
"TODO": "# TODO\\n\\n- [ ] First task
|
|
787
|
-
"DECISIONS": "# Decisions\\n\\nArchitectural decisions
|
|
788
|
-
"LEARNINGS": "# Learnings\\n\\nNon-obvious discoveries
|
|
789
|
-
"SPRINT": "# Sprint Contract\\n\\nDefine acceptance criteria
|
|
937
|
+
"TODO": "# TODO\\n\\n- [ ] First task",
|
|
938
|
+
"DECISIONS": "# Decisions\\n\\nArchitectural decisions.",
|
|
939
|
+
"LEARNINGS": "# Learnings\\n\\nNon-obvious discoveries.",
|
|
940
|
+
"SPRINT": "# Sprint Contract\\n\\nDefine acceptance criteria."
|
|
790
941
|
}
|
|
791
942
|
}
|
|
792
943
|
}
|
|
@@ -864,7 +1015,7 @@ async function loadRegistry() {
|
|
|
864
1015
|
}
|
|
865
1016
|
|
|
866
1017
|
// src/compiler/compile.ts
|
|
867
|
-
function
|
|
1018
|
+
function buildSkeletonMessage(intent, registry) {
|
|
868
1019
|
const registrySummary = registry.map(
|
|
869
1020
|
(t) => `- ${t.id} (${t.type}, tier ${t.tier}, auth: ${t.auth}): ${t.description} [best_for: ${t.best_for.join(", ")}]`
|
|
870
1021
|
).join("\n");
|
|
@@ -876,25 +1027,60 @@ ${intent}
|
|
|
876
1027
|
|
|
877
1028
|
${registrySummary}
|
|
878
1029
|
|
|
879
|
-
Generate the
|
|
1030
|
+
Generate the skeleton JSON now.`;
|
|
1031
|
+
}
|
|
1032
|
+
function buildHarnessMessage(intent, skeleton, concise) {
|
|
1033
|
+
const skeletonJson = JSON.stringify(skeleton, null, 2);
|
|
1034
|
+
const conciseNote = concise ? "\n\nIMPORTANT: Be concise. Maximum 80 lines for claude_md. Maximum 5 commands. Keep all content brief." : "";
|
|
1035
|
+
return `## User Intent
|
|
1036
|
+
|
|
1037
|
+
${intent}
|
|
1038
|
+
|
|
1039
|
+
## Project Skeleton
|
|
1040
|
+
|
|
1041
|
+
${skeletonJson}
|
|
1042
|
+
|
|
1043
|
+
Generate the harness content JSON now.${conciseNote}`;
|
|
880
1044
|
}
|
|
881
|
-
function
|
|
1045
|
+
function parseSkeletonResponse(text) {
|
|
882
1046
|
let cleaned = text.trim();
|
|
883
1047
|
if (cleaned.startsWith("```")) {
|
|
884
1048
|
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
885
1049
|
}
|
|
886
1050
|
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
887
1051
|
if (!jsonMatch) {
|
|
1052
|
+
throw new Error("Pass 1 (skeleton) did not return valid JSON.");
|
|
1053
|
+
}
|
|
1054
|
+
try {
|
|
1055
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
1056
|
+
if (!parsed.name || !parsed.tools || !Array.isArray(parsed.tools)) {
|
|
1057
|
+
throw new Error("Skeleton missing required fields: name, tools");
|
|
1058
|
+
}
|
|
1059
|
+
return parsed;
|
|
1060
|
+
} catch (err) {
|
|
888
1061
|
throw new Error(
|
|
889
|
-
|
|
1062
|
+
`Failed to parse skeleton JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
890
1063
|
);
|
|
891
1064
|
}
|
|
1065
|
+
}
|
|
1066
|
+
function parseHarnessResponse(text) {
|
|
1067
|
+
let cleaned = text.trim();
|
|
1068
|
+
if (cleaned.startsWith("```")) {
|
|
1069
|
+
cleaned = cleaned.replace(/^```(?:json)?\n?/, "").replace(/\n?```$/, "");
|
|
1070
|
+
}
|
|
1071
|
+
const jsonMatch = cleaned.match(/\{[\s\S]*\}/);
|
|
1072
|
+
if (!jsonMatch) {
|
|
1073
|
+
throw new Error("Pass 2 (harness) did not return valid JSON.");
|
|
1074
|
+
}
|
|
892
1075
|
try {
|
|
893
|
-
|
|
1076
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
1077
|
+
if (!parsed.claude_md || !parsed.commands) {
|
|
1078
|
+
throw new Error("Harness missing required fields: claude_md, commands");
|
|
1079
|
+
}
|
|
1080
|
+
return parsed;
|
|
894
1081
|
} catch (err) {
|
|
895
1082
|
throw new Error(
|
|
896
|
-
`Failed to parse
|
|
897
|
-
Response started with: ${cleaned.slice(0, 200)}...`
|
|
1083
|
+
`Failed to parse harness JSON: ${err instanceof Error ? err.message : String(err)}`
|
|
898
1084
|
);
|
|
899
1085
|
}
|
|
900
1086
|
}
|
|
@@ -928,15 +1114,17 @@ function classifyError(err, provider) {
|
|
|
928
1114
|
}
|
|
929
1115
|
return `${provider} API error: ${msg}`;
|
|
930
1116
|
}
|
|
931
|
-
async function callLLM(config, userMessage) {
|
|
1117
|
+
async function callLLM(config, userMessage, options) {
|
|
1118
|
+
const maxTokens = options?.maxTokens ?? 8192;
|
|
1119
|
+
const systemPrompt = options?.systemPrompt ?? SYSTEM_PROMPT;
|
|
932
1120
|
const providerName = getProviderName(config.provider);
|
|
933
1121
|
if (config.provider === "anthropic") {
|
|
934
1122
|
const client2 = new Anthropic2({ apiKey: config.api_key });
|
|
935
1123
|
try {
|
|
936
1124
|
const response = await client2.messages.create({
|
|
937
1125
|
model: config.model,
|
|
938
|
-
max_tokens:
|
|
939
|
-
system:
|
|
1126
|
+
max_tokens: maxTokens,
|
|
1127
|
+
system: systemPrompt,
|
|
940
1128
|
messages: [{ role: "user", content: userMessage }]
|
|
941
1129
|
});
|
|
942
1130
|
const textBlock = response.content.find((block) => block.type === "text");
|
|
@@ -955,9 +1143,9 @@ async function callLLM(config, userMessage) {
|
|
|
955
1143
|
try {
|
|
956
1144
|
const response = await client.chat.completions.create({
|
|
957
1145
|
model: config.model,
|
|
958
|
-
max_tokens:
|
|
1146
|
+
max_tokens: maxTokens,
|
|
959
1147
|
messages: [
|
|
960
|
-
{ role: "system", content:
|
|
1148
|
+
{ role: "system", content: systemPrompt },
|
|
961
1149
|
{ role: "user", content: userMessage }
|
|
962
1150
|
]
|
|
963
1151
|
});
|
|
@@ -970,6 +1158,66 @@ async function callLLM(config, userMessage) {
|
|
|
970
1158
|
throw new Error(classifyError(err, providerName));
|
|
971
1159
|
}
|
|
972
1160
|
}
|
|
1161
|
+
function buildSettings(skeleton, registry) {
|
|
1162
|
+
const selectedTools = skeleton.tools.map((t) => registry.find((r) => r.id === t.tool_id)).filter(Boolean);
|
|
1163
|
+
const allow = ["Read", "Write", "Edit", "Bash(npm run *)", "Bash(npx *)"];
|
|
1164
|
+
const deny = [
|
|
1165
|
+
"Bash(rm -rf *)",
|
|
1166
|
+
"Bash(curl * | sh)",
|
|
1167
|
+
"Bash(wget * | sh)",
|
|
1168
|
+
"Read(./.env)",
|
|
1169
|
+
"Read(./secrets/**)"
|
|
1170
|
+
];
|
|
1171
|
+
const hooks = {
|
|
1172
|
+
PreToolUse: [
|
|
1173
|
+
{
|
|
1174
|
+
matcher: "Bash",
|
|
1175
|
+
hooks: [
|
|
1176
|
+
{
|
|
1177
|
+
type: "command",
|
|
1178
|
+
command: `CMD=$(cat | jq -r '.tool_input.command // empty') && echo "$CMD" | grep -qiE 'rm\\s+-rf\\s+/|DROP\\s+TABLE|curl.*\\|\\s*sh' && echo 'Blocked destructive command' >&2 && exit 2 || true`
|
|
1179
|
+
}
|
|
1180
|
+
]
|
|
1181
|
+
}
|
|
1182
|
+
],
|
|
1183
|
+
PostCompact: [
|
|
1184
|
+
{
|
|
1185
|
+
matcher: "",
|
|
1186
|
+
hooks: [
|
|
1187
|
+
{
|
|
1188
|
+
type: "prompt",
|
|
1189
|
+
prompt: "Re-read CLAUDE.md and docs/SPRINT.md (if it exists) to restore project context after compaction."
|
|
1190
|
+
}
|
|
1191
|
+
]
|
|
1192
|
+
}
|
|
1193
|
+
]
|
|
1194
|
+
};
|
|
1195
|
+
const techStack = skeleton.outline.tech_stack.map((t) => t.toLowerCase());
|
|
1196
|
+
if (techStack.some((t) => t.includes("typescript") || t.includes("javascript") || t.includes("react") || t.includes("next"))) {
|
|
1197
|
+
hooks.PostToolUse = [
|
|
1198
|
+
{
|
|
1199
|
+
matcher: "Edit|Write",
|
|
1200
|
+
hooks: [
|
|
1201
|
+
{
|
|
1202
|
+
type: "command",
|
|
1203
|
+
command: `FILE=$(cat | jq -r '.tool_input.file_path // empty') && [ -n "$FILE" ] && npx prettier --write "$FILE" 2>/dev/null || true`
|
|
1204
|
+
}
|
|
1205
|
+
]
|
|
1206
|
+
}
|
|
1207
|
+
];
|
|
1208
|
+
}
|
|
1209
|
+
return { permissions: { allow, deny }, hooks };
|
|
1210
|
+
}
|
|
1211
|
+
function buildMcpConfig(skeleton, registry) {
|
|
1212
|
+
const config = {};
|
|
1213
|
+
for (const tool of skeleton.tools) {
|
|
1214
|
+
const reg = registry.find((r) => r.id === tool.tool_id);
|
|
1215
|
+
if (reg?.install.mcp_config) {
|
|
1216
|
+
config[tool.tool_id] = reg.install.mcp_config;
|
|
1217
|
+
}
|
|
1218
|
+
}
|
|
1219
|
+
return config;
|
|
1220
|
+
}
|
|
973
1221
|
function validateSpec(spec, onProgress) {
|
|
974
1222
|
const warnings = [];
|
|
975
1223
|
if (spec.tools.length > 8) {
|
|
@@ -995,17 +1243,52 @@ async function compile(intent, onProgress) {
|
|
|
995
1243
|
}
|
|
996
1244
|
onProgress?.("Loading tool registry...");
|
|
997
1245
|
const registry = await loadRegistry();
|
|
998
|
-
onProgress?.(
|
|
999
|
-
const
|
|
1000
|
-
const
|
|
1001
|
-
|
|
1002
|
-
|
|
1246
|
+
onProgress?.("Analyzing workflow...");
|
|
1247
|
+
const skeletonMsg = buildSkeletonMessage(intent, registry);
|
|
1248
|
+
const skeletonText = await callLLM(config, skeletonMsg, {
|
|
1249
|
+
maxTokens: 2048,
|
|
1250
|
+
systemPrompt: SKELETON_PROMPT
|
|
1251
|
+
});
|
|
1252
|
+
const skeleton = parseSkeletonResponse(skeletonText);
|
|
1253
|
+
onProgress?.("Generating environment...");
|
|
1254
|
+
const harnessMsg = buildHarnessMessage(intent, skeleton);
|
|
1255
|
+
let harness;
|
|
1256
|
+
try {
|
|
1257
|
+
const harnessText = await callLLM(config, harnessMsg, {
|
|
1258
|
+
maxTokens: 8192,
|
|
1259
|
+
systemPrompt: HARNESS_PROMPT
|
|
1260
|
+
});
|
|
1261
|
+
harness = parseHarnessResponse(harnessText);
|
|
1262
|
+
} catch {
|
|
1263
|
+
onProgress?.("Retrying with concise mode...");
|
|
1264
|
+
const retryMsg = buildHarnessMessage(intent, skeleton, true);
|
|
1265
|
+
const retryText = await callLLM(config, retryMsg, {
|
|
1266
|
+
maxTokens: 8192,
|
|
1267
|
+
systemPrompt: HARNESS_PROMPT
|
|
1268
|
+
});
|
|
1269
|
+
harness = parseHarnessResponse(retryText);
|
|
1270
|
+
}
|
|
1271
|
+
onProgress?.("Configuring tools...");
|
|
1272
|
+
const settings = buildSettings(skeleton, registry);
|
|
1273
|
+
const mcpConfig = buildMcpConfig(skeleton, registry);
|
|
1003
1274
|
const spec = {
|
|
1004
1275
|
id: `env_${crypto.randomUUID()}`,
|
|
1005
1276
|
intent,
|
|
1006
1277
|
created_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
1007
|
-
|
|
1008
|
-
|
|
1278
|
+
name: skeleton.name,
|
|
1279
|
+
description: skeleton.description,
|
|
1280
|
+
autonomy_level: 1,
|
|
1281
|
+
tools: skeleton.tools,
|
|
1282
|
+
harness: {
|
|
1283
|
+
claude_md: harness.claude_md,
|
|
1284
|
+
settings,
|
|
1285
|
+
mcp_config: mcpConfig,
|
|
1286
|
+
commands: harness.commands,
|
|
1287
|
+
rules: harness.rules,
|
|
1288
|
+
skills: harness.skills ?? {},
|
|
1289
|
+
agents: harness.agents ?? {},
|
|
1290
|
+
docs: harness.docs
|
|
1291
|
+
}
|
|
1009
1292
|
};
|
|
1010
1293
|
validateSpec(spec, onProgress);
|
|
1011
1294
|
await ensureDirs();
|