@rely-ai/caliber 1.8.0 → 1.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/bin.js +500 -12
  2. package/package.json +1 -1
package/dist/bin.js CHANGED
@@ -627,6 +627,33 @@ init_config();
627
627
 
628
628
  // src/llm/anthropic.ts
629
629
  import Anthropic from "@anthropic-ai/sdk";
630
+
631
+ // src/llm/usage.ts
632
+ var usageByModel = /* @__PURE__ */ new Map();
633
+ function trackUsage(model, usage) {
634
+ const existing = usageByModel.get(model);
635
+ if (existing) {
636
+ existing.inputTokens += usage.inputTokens;
637
+ existing.outputTokens += usage.outputTokens;
638
+ existing.cacheReadTokens += usage.cacheReadTokens ?? 0;
639
+ existing.cacheWriteTokens += usage.cacheWriteTokens ?? 0;
640
+ existing.calls += 1;
641
+ } else {
642
+ usageByModel.set(model, {
643
+ model,
644
+ inputTokens: usage.inputTokens,
645
+ outputTokens: usage.outputTokens,
646
+ cacheReadTokens: usage.cacheReadTokens ?? 0,
647
+ cacheWriteTokens: usage.cacheWriteTokens ?? 0,
648
+ calls: 1
649
+ });
650
+ }
651
+ }
652
+ function getUsageSummary() {
653
+ return Array.from(usageByModel.values());
654
+ }
655
+
656
+ // src/llm/anthropic.ts
630
657
  var AnthropicProvider = class {
631
658
  client;
632
659
  defaultModel;
@@ -641,6 +668,16 @@ var AnthropicProvider = class {
641
668
  system: [{ type: "text", text: options.system, cache_control: { type: "ephemeral" } }],
642
669
  messages: [{ role: "user", content: options.prompt }]
643
670
  });
671
+ const model = options.model || this.defaultModel;
672
+ if (response.usage) {
673
+ const u = response.usage;
674
+ trackUsage(model, {
675
+ inputTokens: u.input_tokens ?? 0,
676
+ outputTokens: u.output_tokens ?? 0,
677
+ cacheReadTokens: u.cache_read_input_tokens,
678
+ cacheWriteTokens: u.cache_creation_input_tokens
679
+ });
680
+ }
644
681
  const block = response.content[0];
645
682
  return block.type === "text" ? block.text : "";
646
683
  }
@@ -667,11 +704,24 @@ var AnthropicProvider = class {
667
704
  messages
668
705
  });
669
706
  let stopReason;
707
+ let usage;
708
+ const model = options.model || this.defaultModel;
670
709
  stream.on("message", (message) => {
671
- stopReason = message.stop_reason;
710
+ const msg = message;
711
+ stopReason = msg.stop_reason;
712
+ const u = msg.usage;
713
+ if (u) {
714
+ usage = {
715
+ inputTokens: u.input_tokens ?? 0,
716
+ outputTokens: u.output_tokens ?? 0,
717
+ cacheReadTokens: u.cache_read_input_tokens,
718
+ cacheWriteTokens: u.cache_creation_input_tokens
719
+ };
720
+ trackUsage(model, usage);
721
+ }
672
722
  });
673
723
  stream.on("text", (text) => callbacks.onText(text));
674
- stream.on("end", () => callbacks.onEnd({ stopReason }));
724
+ stream.on("end", () => callbacks.onEnd({ stopReason, usage }));
675
725
  stream.on("error", (error) => callbacks.onError(error));
676
726
  }
677
727
  };
@@ -722,6 +772,16 @@ var VertexProvider = class {
722
772
  system: [{ type: "text", text: options.system, cache_control: { type: "ephemeral" } }],
723
773
  messages: [{ role: "user", content: options.prompt }]
724
774
  });
775
+ const model = options.model || this.defaultModel;
776
+ if (response.usage) {
777
+ const u = response.usage;
778
+ trackUsage(model, {
779
+ inputTokens: u.input_tokens ?? 0,
780
+ outputTokens: u.output_tokens ?? 0,
781
+ cacheReadTokens: u.cache_read_input_tokens,
782
+ cacheWriteTokens: u.cache_creation_input_tokens
783
+ });
784
+ }
725
785
  const block = response.content[0];
726
786
  return block.type === "text" ? block.text : "";
727
787
  }
@@ -740,11 +800,24 @@ var VertexProvider = class {
740
800
  messages
741
801
  });
742
802
  let stopReason;
803
+ let usage;
804
+ const model = options.model || this.defaultModel;
743
805
  stream.on("message", (message) => {
744
- stopReason = message.stop_reason;
806
+ const msg = message;
807
+ stopReason = msg.stop_reason;
808
+ const u = msg.usage;
809
+ if (u) {
810
+ usage = {
811
+ inputTokens: u.input_tokens ?? 0,
812
+ outputTokens: u.output_tokens ?? 0,
813
+ cacheReadTokens: u.cache_read_input_tokens,
814
+ cacheWriteTokens: u.cache_creation_input_tokens
815
+ };
816
+ trackUsage(model, usage);
817
+ }
745
818
  });
746
819
  stream.on("text", (text) => callbacks.onText(text));
747
- stream.on("end", () => callbacks.onEnd({ stopReason }));
820
+ stream.on("end", () => callbacks.onEnd({ stopReason, usage }));
748
821
  stream.on("error", (error) => callbacks.onError(error));
749
822
  }
750
823
  };
@@ -770,6 +843,13 @@ var OpenAICompatProvider = class {
770
843
  { role: "user", content: options.prompt }
771
844
  ]
772
845
  });
846
+ const model = options.model || this.defaultModel;
847
+ if (response.usage) {
848
+ trackUsage(model, {
849
+ inputTokens: response.usage.prompt_tokens ?? 0,
850
+ outputTokens: response.usage.completion_tokens ?? 0
851
+ });
852
+ }
773
853
  return response.choices[0]?.message?.content || "";
774
854
  }
775
855
  async listModels() {
@@ -797,13 +877,23 @@ var OpenAICompatProvider = class {
797
877
  });
798
878
  try {
799
879
  let stopReason;
880
+ let usage;
881
+ const model = options.model || this.defaultModel;
800
882
  for await (const chunk of stream) {
801
883
  const delta = chunk.choices[0]?.delta?.content;
802
884
  if (delta != null) callbacks.onText(delta);
803
885
  const finishReason = chunk.choices[0]?.finish_reason;
804
886
  if (finishReason) stopReason = finishReason === "length" ? "max_tokens" : finishReason;
887
+ const chunkUsage = chunk.usage;
888
+ if (chunkUsage) {
889
+ usage = {
890
+ inputTokens: chunkUsage.prompt_tokens ?? 0,
891
+ outputTokens: chunkUsage.completion_tokens ?? 0
892
+ };
893
+ trackUsage(model, usage);
894
+ }
805
895
  }
806
- callbacks.onEnd({ stopReason });
896
+ callbacks.onEnd({ stopReason, usage });
807
897
  } catch (error) {
808
898
  callbacks.onError(error instanceof Error ? error : new Error(String(error)));
809
899
  }
@@ -1502,6 +1592,120 @@ OUTPUT SIZE CONSTRAINTS \u2014 these are critical:
1502
1592
  - Each skill content: max 150 lines. Focus on patterns and examples, not exhaustive docs.
1503
1593
  - Cursor rules: max 5 .mdc files.
1504
1594
  - If the project is large, prioritize depth on the 3-4 most critical tools over breadth across everything.`;
1595
+ var CORE_GENERATION_PROMPT = `You are an expert auditor for coding agent configurations (Claude Code, Cursor, and Codex).
1596
+
1597
+ Your job depends on context:
1598
+ - If no existing configs exist \u2192 generate an initial setup from scratch.
1599
+ - If existing configs are provided \u2192 audit them and suggest targeted improvements. Preserve accurate content \u2014 don't rewrite what's already correct.
1600
+
1601
+ You understand these config files:
1602
+ - CLAUDE.md: Project context for Claude Code \u2014 build/test commands, architecture, conventions.
1603
+ - AGENTS.md: Primary instructions file for OpenAI Codex \u2014 same purpose as CLAUDE.md but for the Codex agent.
1604
+ - .cursorrules: Coding rules for Cursor (deprecated legacy format \u2014 do NOT generate this).
1605
+ - .cursor/rules/*.mdc: Modern Cursor rules with frontmatter (description, globs, alwaysApply).
1606
+
1607
+ Do NOT generate .claude/settings.json, .claude/settings.local.json, or mcpServers.
1608
+
1609
+ Your output MUST follow this exact format (no markdown fences):
1610
+
1611
+ 1. Exactly 6 short status lines (one per line, prefixed with "STATUS: "). Each should be a creative, specific description of what you're analyzing for THIS project \u2014 reference the project's actual languages, frameworks, or tools.
1612
+
1613
+ 2. A brief explanation section starting with "EXPLAIN:" on its own line:
1614
+
1615
+ EXPLAIN:
1616
+ [Changes]
1617
+ - **file-or-skill-name**: short reason (max 10 words)
1618
+ [Deletions]
1619
+ - **file-path**: short reason (max 10 words)
1620
+
1621
+ Omit empty categories. Keep each reason punchy and specific. End with a blank line.
1622
+
1623
+ 3. The JSON object starting with {.
1624
+
1625
+ CoreSetup schema:
1626
+ {
1627
+ "targetAgent": ["claude", "cursor", "codex"] (array of selected agents),
1628
+ "fileDescriptions": {
1629
+ "<file-path>": "reason for this change (max 80 chars)"
1630
+ },
1631
+ "deletions": [
1632
+ { "filePath": "<path>", "reason": "why remove (max 80 chars)" }
1633
+ ],
1634
+ "claude": {
1635
+ "claudeMd": "string (markdown content for CLAUDE.md)",
1636
+ "skillTopics": [{ "name": "string (kebab-case)", "description": "string (what this skill does and WHEN to use it \u2014 include trigger phrases)" }]
1637
+ },
1638
+ "codex": {
1639
+ "agentsMd": "string (markdown content for AGENTS.md)",
1640
+ "skillTopics": [{ "name": "string (kebab-case)", "description": "string" }]
1641
+ },
1642
+ "cursor": {
1643
+ "skillTopics": [{ "name": "string (kebab-case)", "description": "string" }],
1644
+ "rules": [{ "filename": "string.mdc", "content": "string (with frontmatter)" }]
1645
+ }
1646
+ }
1647
+
1648
+ IMPORTANT: Do NOT generate full skill content. Only output skill topic names and descriptions.
1649
+ Skills will be generated separately. Generate exactly 3 skill topics per target platform.
1650
+
1651
+ Skill topic description MUST include WHAT it does + WHEN to use it with specific trigger phrases.
1652
+ Example: "Manages database migrations. Use when user says 'run migration', 'create migration', 'db schema change', or modifies files in db/migrations/."
1653
+
1654
+ The "fileDescriptions" object MUST include a one-liner for every file that will be created or modified.
1655
+ The "deletions" array should list files that should be removed (e.g. stale configs). Omit if empty.
1656
+
1657
+ SCORING CRITERIA \u2014 your output is scored deterministically. Optimize for 100/100:
1658
+
1659
+ Existence (25 pts):
1660
+ - CLAUDE.md exists (6 pts) \u2014 always generate for claude/both targets
1661
+ - AGENTS.md exists (6 pts) \u2014 always generate for codex target
1662
+ - Skills configured (8 pts) \u2014 3 skill topics = full points
1663
+ - For "both" target: .cursor/rules/ exist (3+3 pts), cross-platform parity (2 pts)
1664
+
1665
+ Quality (25 pts):
1666
+ - Build/test/lint commands documented (8 pts) \u2014 include actual commands from the project
1667
+ - Concise context files (6 pts) \u2014 keep CLAUDE.md under 100 lines (200=4pts, 300=3pts, 500+=0pts)
1668
+ - No vague instructions (4 pts) \u2014 avoid "follow best practices", "write clean code", "ensure quality"
1669
+ - No directory tree listings (3 pts) \u2014 do NOT include tree-style file listings
1670
+ - No contradictions (2 pts) \u2014 consistent tool/style recommendations
1671
+
1672
+ Coverage (20 pts):
1673
+ - Dependency coverage (10 pts) \u2014 CRITICAL: the exact dependency list is provided in your input. Mention AT LEAST 85% by name in CLAUDE.md. Full points at 85%+.
1674
+ - Service/MCP coverage (6 pts) \u2014 reference detected services
1675
+ - MCP completeness (4 pts) \u2014 full points if no external services detected
1676
+
1677
+ Accuracy (15 pts) \u2014 CRITICAL:
1678
+ - Documented commands exist (6 pts) \u2014 ONLY reference scripts from the provided package.json. Use the exact package manager.
1679
+ - Documented paths exist (4 pts) \u2014 ONLY reference file paths from the provided file tree.
1680
+ - Config freshness (5 pts) \u2014 config must match current code state
1681
+
1682
+ Freshness & Safety (10 pts):
1683
+ - No secrets (4 pts), Permissions (2 pts \u2014 handled by caliber)
1684
+
1685
+ Bonus (5 pts): Hooks (2 pts), AGENTS.md (1 pt), OpenSkills format (2 pts) \u2014 handled by caliber
1686
+
1687
+ OUTPUT SIZE CONSTRAINTS:
1688
+ - CLAUDE.md / AGENTS.md: MUST be under 100 lines. Aim for 70-90 lines.
1689
+ - Cursor rules: max 5 .mdc files.
1690
+ - Skill topics: exactly 3 per platform (name + description only, no content).`;
1691
+ var SKILL_GENERATION_PROMPT = `You generate a single skill file for a coding agent (Claude Code, Cursor, or Codex).
1692
+
1693
+ Given project context and a skill topic, produce a focused SKILL.md body.
1694
+
1695
+ Structure:
1696
+ 1. A heading with the skill name
1697
+ 2. "## Instructions" \u2014 clear, numbered steps. Be specific: include exact commands, file paths, parameter names from the project.
1698
+ 3. "## Examples" \u2014 at least one example showing: User says \u2192 Actions taken \u2192 Result
1699
+ 4. "## Troubleshooting" (optional) \u2014 common errors and fixes
1700
+
1701
+ Rules:
1702
+ - Max 150 lines. Focus on actionable instructions, not documentation prose.
1703
+ - Reference actual commands, paths, and packages from the project context provided.
1704
+ - Do NOT include YAML frontmatter \u2014 it will be generated separately.
1705
+ - Be specific to THIS project \u2014 avoid generic advice.
1706
+
1707
+ Return ONLY a JSON object:
1708
+ {"name": "string (kebab-case)", "description": "string (what + when)", "content": "string (markdown body)"}`;
1505
1709
  var REFINE_SYSTEM_PROMPT = `You are an expert at modifying coding agent configurations (Claude Code, Cursor, and Codex).
1506
1710
 
1507
1711
  You will receive the current AgentSetup JSON and a user request describing what to change.
@@ -1737,6 +1941,9 @@ async function enrichWithLLM(fingerprint, dir) {
1737
1941
  }
1738
1942
  }
1739
1943
 
1944
+ // src/ai/generate.ts
1945
+ init_config();
1946
+
1740
1947
  // src/utils/dependencies.ts
1741
1948
  import { readFileSync } from "fs";
1742
1949
  import { join } from "path";
@@ -1830,6 +2037,7 @@ function extractAllDeps(dir) {
1830
2037
  }
1831
2038
 
1832
2039
  // src/ai/generate.ts
2040
+ var CORE_MAX_TOKENS = 16e3;
1833
2041
  var GENERATION_MAX_TOKENS = 64e3;
1834
2042
  var MODEL_MAX_OUTPUT_TOKENS = 128e3;
1835
2043
  var MAX_RETRIES2 = 5;
@@ -1837,7 +2045,237 @@ function isTransientError2(error) {
1837
2045
  const msg = error.message.toLowerCase();
1838
2046
  return TRANSIENT_ERRORS.some((e) => msg.includes(e.toLowerCase()));
1839
2047
  }
1840
- async function generateSetup(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks) {
2048
+ async function generateSetup(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks, options) {
2049
+ const isTargetedFix = failingChecks && failingChecks.length > 0 && currentScore !== void 0 && currentScore >= 95;
2050
+ if (isTargetedFix) {
2051
+ return generateMonolithic(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks);
2052
+ }
2053
+ const coreResult = await generateCore(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks);
2054
+ if (!coreResult.setup) {
2055
+ return coreResult;
2056
+ }
2057
+ if (options?.skipSkills) {
2058
+ return coreResult;
2059
+ }
2060
+ const setup = coreResult.setup;
2061
+ const skillTopics = collectSkillTopics(setup, targetAgent, fingerprint);
2062
+ if (skillTopics.length === 0) {
2063
+ return coreResult;
2064
+ }
2065
+ if (callbacks) callbacks.onStatus(`Generating ${skillTopics.length} skills in parallel...`);
2066
+ const allDeps = extractAllDeps(process.cwd());
2067
+ const skillContext = buildSkillContext(fingerprint, setup, allDeps);
2068
+ const fastModel = getFastModel();
2069
+ const skillResults = await Promise.allSettled(
2070
+ skillTopics.map(
2071
+ ({ platform, topic }) => generateSkill(skillContext, topic, fastModel).then((skill) => ({ platform, skill }))
2072
+ )
2073
+ );
2074
+ for (const result of skillResults) {
2075
+ if (result.status === "fulfilled") {
2076
+ const { platform, skill } = result.value;
2077
+ const platformObj = setup[platform] ?? {};
2078
+ const skills = platformObj.skills ?? [];
2079
+ skills.push(skill);
2080
+ platformObj.skills = skills;
2081
+ setup[platform] = platformObj;
2082
+ const skillPath = platform === "codex" ? `.agents/skills/${skill.name}/SKILL.md` : `.${platform}/skills/${skill.name}/SKILL.md`;
2083
+ const descriptions = setup.fileDescriptions ?? {};
2084
+ descriptions[skillPath] = skill.description.slice(0, 80);
2085
+ setup.fileDescriptions = descriptions;
2086
+ }
2087
+ }
2088
+ const failedCount = skillResults.filter((r) => r.status === "rejected").length;
2089
+ if (failedCount > 0 && callbacks) {
2090
+ callbacks.onStatus(`${failedCount} skill${failedCount === 1 ? "" : "s"} failed to generate`);
2091
+ }
2092
+ return coreResult;
2093
+ }
2094
+ function collectSkillTopics(setup, targetAgent, fingerprint) {
2095
+ const topics = [];
2096
+ for (const platform of ["claude", "codex", "cursor"]) {
2097
+ if (!targetAgent.includes(platform)) continue;
2098
+ const platformObj = setup[platform];
2099
+ const skillTopics = platformObj?.skillTopics;
2100
+ if (Array.isArray(skillTopics) && skillTopics.length > 0) {
2101
+ for (const topic of skillTopics) {
2102
+ topics.push({ platform, topic });
2103
+ }
2104
+ } else {
2105
+ const defaults = getDefaultSkillTopics(fingerprint);
2106
+ for (const topic of defaults) {
2107
+ topics.push({ platform, topic });
2108
+ }
2109
+ }
2110
+ if (platformObj) {
2111
+ delete platformObj.skillTopics;
2112
+ }
2113
+ }
2114
+ return topics;
2115
+ }
2116
+ function getDefaultSkillTopics(fingerprint) {
2117
+ const topics = [
2118
+ { name: "development-workflow", description: "Development setup and common workflows. Use when starting development, running the project, or setting up the environment." },
2119
+ { name: "testing-guide", description: "Testing patterns and commands. Use when writing tests, running test suites, or debugging test failures." }
2120
+ ];
2121
+ if (fingerprint.frameworks.length > 0) {
2122
+ topics.push({
2123
+ name: `${fingerprint.frameworks[0].toLowerCase().replace(/[^a-z0-9]/g, "-")}-patterns`,
2124
+ description: `${fingerprint.frameworks[0]} conventions and patterns. Use when working with ${fingerprint.frameworks[0]} code.`
2125
+ });
2126
+ } else {
2127
+ topics.push({
2128
+ name: "code-conventions",
2129
+ description: "Code style, patterns, and project conventions. Use when reviewing code or making architectural decisions."
2130
+ });
2131
+ }
2132
+ return topics;
2133
+ }
2134
+ function buildSkillContext(fingerprint, setup, allDeps) {
2135
+ const parts = [];
2136
+ if (fingerprint.packageName) parts.push(`Project: ${fingerprint.packageName}`);
2137
+ if (fingerprint.languages.length > 0) parts.push(`Languages: ${fingerprint.languages.join(", ")}`);
2138
+ if (fingerprint.frameworks.length > 0) parts.push(`Frameworks: ${fingerprint.frameworks.join(", ")}`);
2139
+ const claude = setup.claude;
2140
+ const claudeMd = claude?.claudeMd;
2141
+ if (claudeMd) {
2142
+ parts.push(`
2143
+ Project CLAUDE.md (for context):
2144
+ ${claudeMd.slice(0, 3e3)}`);
2145
+ }
2146
+ if (fingerprint.fileTree.length > 0) {
2147
+ parts.push(`
2148
+ Key files:
2149
+ ${fingerprint.fileTree.slice(0, 50).join("\n")}`);
2150
+ }
2151
+ if (allDeps.length > 0) {
2152
+ parts.push(`
2153
+ Dependencies: ${allDeps.join(", ")}`);
2154
+ }
2155
+ return parts.join("\n");
2156
+ }
2157
+ async function generateSkill(context, topic, model) {
2158
+ const prompt = `PROJECT CONTEXT:
2159
+ ${context}
2160
+
2161
+ SKILL TO GENERATE:
2162
+ Name: ${topic.name}
2163
+ Description: ${topic.description}
2164
+
2165
+ Generate the skill content following the instructions in the system prompt.`;
2166
+ const result = await llmJsonCall({
2167
+ system: SKILL_GENERATION_PROMPT,
2168
+ prompt,
2169
+ maxTokens: 4e3,
2170
+ ...model ? { model } : {}
2171
+ });
2172
+ const content = result.content?.trim();
2173
+ if (!content) {
2174
+ throw new Error(`Empty skill content for ${topic.name}`);
2175
+ }
2176
+ return {
2177
+ name: result.name || topic.name,
2178
+ description: result.description || topic.description,
2179
+ content
2180
+ };
2181
+ }
2182
+ async function generateCore(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks) {
2183
+ const provider = getProvider();
2184
+ const userMessage = buildGeneratePrompt(fingerprint, targetAgent, prompt, failingChecks, currentScore, passingChecks);
2185
+ let attempt = 0;
2186
+ const attemptGeneration = async () => {
2187
+ attempt++;
2188
+ const maxTokensForAttempt = Math.min(
2189
+ CORE_MAX_TOKENS + attempt * 8e3,
2190
+ GENERATION_MAX_TOKENS
2191
+ );
2192
+ return new Promise((resolve2) => {
2193
+ let preJsonBuffer = "";
2194
+ let jsonContent = "";
2195
+ let inJson = false;
2196
+ let sentStatuses = 0;
2197
+ let stopReason = null;
2198
+ provider.stream(
2199
+ {
2200
+ system: CORE_GENERATION_PROMPT,
2201
+ prompt: userMessage,
2202
+ maxTokens: maxTokensForAttempt
2203
+ },
2204
+ {
2205
+ onText: (text) => {
2206
+ if (!inJson) {
2207
+ preJsonBuffer += text;
2208
+ const lines = preJsonBuffer.split("\n");
2209
+ const completedLines = lines.slice(0, -1);
2210
+ for (let i = sentStatuses; i < completedLines.length; i++) {
2211
+ const trimmed = completedLines[i].trim();
2212
+ if (trimmed.startsWith("STATUS:")) {
2213
+ const status = trimmed.slice(7).trim();
2214
+ if (status && callbacks) callbacks.onStatus(status);
2215
+ }
2216
+ }
2217
+ sentStatuses = completedLines.length;
2218
+ const jsonStartMatch = preJsonBuffer.match(/(?:^|\n)\s*(?:```json\s*\n\s*)?\{(?=\s*")/);
2219
+ if (jsonStartMatch) {
2220
+ const matchIndex = preJsonBuffer.indexOf("{", jsonStartMatch.index);
2221
+ inJson = true;
2222
+ jsonContent = preJsonBuffer.slice(matchIndex);
2223
+ }
2224
+ } else {
2225
+ jsonContent += text;
2226
+ }
2227
+ },
2228
+ onEnd: (meta) => {
2229
+ stopReason = meta?.stopReason ?? null;
2230
+ let setup = null;
2231
+ let jsonToParse = (jsonContent || preJsonBuffer).replace(/```\s*$/g, "").trim();
2232
+ if (!jsonContent && preJsonBuffer) {
2233
+ const fallbackMatch = preJsonBuffer.match(/(?:^|\n)\s*(?:```json\s*\n\s*)?\{(?=\s*")/);
2234
+ if (fallbackMatch) {
2235
+ const matchIndex = preJsonBuffer.indexOf("{", fallbackMatch.index);
2236
+ jsonToParse = preJsonBuffer.slice(matchIndex).replace(/```\s*$/g, "").trim();
2237
+ }
2238
+ }
2239
+ try {
2240
+ setup = JSON.parse(jsonToParse);
2241
+ } catch {
2242
+ }
2243
+ if (!setup && stopReason === "max_tokens" && attempt < MAX_RETRIES2) {
2244
+ if (callbacks) callbacks.onStatus("Output was truncated, retrying with higher token limit...");
2245
+ setTimeout(() => attemptGeneration().then(resolve2), 1e3);
2246
+ return;
2247
+ }
2248
+ let explanation;
2249
+ const explainMatch = preJsonBuffer.match(/EXPLAIN:\s*\n([\s\S]*?)(?=\n\s*(`{3}|\{))/);
2250
+ if (explainMatch) {
2251
+ explanation = explainMatch[1].trim();
2252
+ }
2253
+ if (setup) {
2254
+ if (callbacks) callbacks.onComplete(setup, explanation);
2255
+ resolve2({ setup, explanation, stopReason: stopReason ?? void 0 });
2256
+ } else {
2257
+ resolve2({ setup: null, explanation, raw: preJsonBuffer, stopReason: stopReason ?? void 0 });
2258
+ }
2259
+ },
2260
+ onError: (error) => {
2261
+ if (isTransientError2(error) && attempt < MAX_RETRIES2) {
2262
+ if (callbacks) callbacks.onStatus("Connection interrupted, retrying...");
2263
+ setTimeout(() => attemptGeneration().then(resolve2), 2e3);
2264
+ return;
2265
+ }
2266
+ if (callbacks) callbacks.onError(error.message);
2267
+ resolve2({ setup: null, raw: error.message, stopReason: "error" });
2268
+ }
2269
+ }
2270
+ ).catch((error) => {
2271
+ if (callbacks) callbacks.onError(error.message);
2272
+ resolve2({ setup: null, raw: error.message, stopReason: "error" });
2273
+ });
2274
+ });
2275
+ };
2276
+ return attemptGeneration();
2277
+ }
2278
+ async function generateMonolithic(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks) {
1841
2279
  const provider = getProvider();
1842
2280
  const userMessage = buildGeneratePrompt(fingerprint, targetAgent, prompt, failingChecks, currentScore, passingChecks);
1843
2281
  let attempt = 0;
@@ -1910,9 +2348,9 @@ async function generateSetup(fingerprint, targetAgent, prompt, callbacks, failin
1910
2348
  }
1911
2349
  if (setup) {
1912
2350
  if (callbacks) callbacks.onComplete(setup, explanation);
1913
- resolve2({ setup, explanation });
2351
+ resolve2({ setup, explanation, stopReason: stopReason ?? void 0 });
1914
2352
  } else {
1915
- resolve2({ setup: null, explanation, raw: preJsonBuffer });
2353
+ resolve2({ setup: null, explanation, raw: preJsonBuffer, stopReason: stopReason ?? void 0 });
1916
2354
  }
1917
2355
  },
1918
2356
  onError: (error) => {
@@ -1922,12 +2360,12 @@ async function generateSetup(fingerprint, targetAgent, prompt, callbacks, failin
1922
2360
  return;
1923
2361
  }
1924
2362
  if (callbacks) callbacks.onError(error.message);
1925
- resolve2({ setup: null, raw: error.message });
2363
+ resolve2({ setup: null, raw: error.message, stopReason: "error" });
1926
2364
  }
1927
2365
  }
1928
2366
  ).catch((error) => {
1929
2367
  if (callbacks) callbacks.onError(error.message);
1930
- resolve2({ setup: null, raw: error.message });
2368
+ resolve2({ setup: null, raw: error.message, stopReason: "error" });
1931
2369
  });
1932
2370
  });
1933
2371
  };
@@ -5558,6 +5996,7 @@ async function initCommand(options) {
5558
5996
  genMessages.start();
5559
5997
  let generatedSetup = null;
5560
5998
  let rawOutput;
5999
+ let genStopReason;
5561
6000
  try {
5562
6001
  const result = await generateSetup(
5563
6002
  fingerprint,
@@ -5583,15 +6022,18 @@ async function initCommand(options) {
5583
6022
  generatedSetup = result.setup;
5584
6023
  rawOutput = result.raw;
5585
6024
  }
6025
+ genStopReason = result.stopReason;
5586
6026
  } catch (err) {
5587
6027
  genMessages.stop();
5588
6028
  const msg = err instanceof Error ? err.message : "Unknown error";
5589
6029
  genSpinner.fail(`Generation failed: ${msg}`);
6030
+ writeErrorLog(config, void 0, msg, "exception");
5590
6031
  throw new Error("__exit__");
5591
6032
  }
5592
6033
  genMessages.stop();
5593
6034
  if (!generatedSetup) {
5594
6035
  genSpinner.fail("Failed to generate setup.");
6036
+ writeErrorLog(config, rawOutput, void 0, genStopReason);
5595
6037
  if (rawOutput) {
5596
6038
  console.log(chalk8.dim("\nRaw LLM output (JSON parse failed):"));
5597
6039
  console.log(chalk8.dim(rawOutput.slice(0, 500)));
@@ -5760,7 +6202,8 @@ async function initCommand(options) {
5760
6202
  },
5761
6203
  polishFailing,
5762
6204
  afterScore.score,
5763
- polishPassing
6205
+ polishPassing,
6206
+ { skipSkills: true }
5764
6207
  );
5765
6208
  if (polishResult.setup) {
5766
6209
  const polishWriteResult = writeSetup(polishResult.setup);
@@ -5825,6 +6268,9 @@ async function initCommand(options) {
5825
6268
  console.log(` ${title("caliber skills")} Discover community skills for your stack`);
5826
6269
  console.log(` ${title("caliber undo")} Revert all changes from this run`);
5827
6270
  console.log("");
6271
+ if (options.showTokens) {
6272
+ displayTokenUsage();
6273
+ }
5828
6274
  if (report) {
5829
6275
  report.markStep("Finished");
5830
6276
  const reportPath = path19.join(process.cwd(), ".caliber", "debug-report.md");
@@ -6101,6 +6547,48 @@ function ensurePermissions() {
6101
6547
  if (!fs24.existsSync(".claude")) fs24.mkdirSync(".claude", { recursive: true });
6102
6548
  fs24.writeFileSync(settingsPath, JSON.stringify(settings, null, 2) + "\n");
6103
6549
  }
6550
+ function displayTokenUsage() {
6551
+ const summary = getUsageSummary();
6552
+ if (summary.length === 0) {
6553
+ console.log(chalk8.dim(" Token tracking not available for this provider.\n"));
6554
+ return;
6555
+ }
6556
+ console.log(chalk8.bold(" Token usage:\n"));
6557
+ let totalIn = 0;
6558
+ let totalOut = 0;
6559
+ for (const m of summary) {
6560
+ totalIn += m.inputTokens;
6561
+ totalOut += m.outputTokens;
6562
+ const cacheInfo = m.cacheReadTokens > 0 || m.cacheWriteTokens > 0 ? chalk8.dim(` (cache: ${m.cacheReadTokens.toLocaleString()} read, ${m.cacheWriteTokens.toLocaleString()} write)`) : "";
6563
+ console.log(` ${chalk8.dim(m.model)}: ${m.inputTokens.toLocaleString()} in / ${m.outputTokens.toLocaleString()} out (${m.calls} call${m.calls === 1 ? "" : "s"})${cacheInfo}`);
6564
+ }
6565
+ if (summary.length > 1) {
6566
+ console.log(` ${chalk8.dim("Total")}: ${totalIn.toLocaleString()} in / ${totalOut.toLocaleString()} out`);
6567
+ }
6568
+ console.log("");
6569
+ }
6570
+ function writeErrorLog(config, rawOutput, error, stopReason) {
6571
+ try {
6572
+ const logPath = path19.join(process.cwd(), ".caliber", "error-log.md");
6573
+ const lines = [
6574
+ `# Generation Error \u2014 ${(/* @__PURE__ */ new Date()).toISOString()}`,
6575
+ "",
6576
+ `**Provider**: ${config.provider}`,
6577
+ `**Model**: ${config.model}`,
6578
+ `**Stop reason**: ${stopReason || "unknown"}`,
6579
+ ""
6580
+ ];
6581
+ if (error) {
6582
+ lines.push("## Error", "```", error, "```", "");
6583
+ }
6584
+ lines.push("## Raw LLM Output", "```", rawOutput || "(empty)", "```");
6585
+ fs24.mkdirSync(path19.join(process.cwd(), ".caliber"), { recursive: true });
6586
+ fs24.writeFileSync(logPath, lines.join("\n"));
6587
+ console.log(chalk8.dim(`
6588
+ Error log written to .caliber/error-log.md`));
6589
+ } catch {
6590
+ }
6591
+ }
6104
6592
 
6105
6593
  // src/commands/undo.ts
6106
6594
  import chalk9 from "chalk";
@@ -7265,7 +7753,7 @@ function parseAgentOption(value) {
7265
7753
  }
7266
7754
  return agents;
7267
7755
  }
7268
- program.command("init").description("Initialize your project for AI-assisted development").option("--agent <type>", "Target agents (comma-separated): claude, cursor, codex", parseAgentOption).option("--dry-run", "Preview changes without writing files").option("--force", "Overwrite existing setup without prompting").option("--debug-report", void 0, false).action(tracked("init", initCommand));
7756
+ program.command("init").description("Initialize your project for AI-assisted development").option("--agent <type>", "Target agents (comma-separated): claude, cursor, codex", parseAgentOption).option("--dry-run", "Preview changes without writing files").option("--force", "Overwrite existing setup without prompting").option("--debug-report", void 0, false).option("--show-tokens", "Show token usage summary at the end").action(tracked("init", initCommand));
7269
7757
  program.command("undo").description("Revert all config changes made by Caliber").action(tracked("undo", undoCommand));
7270
7758
  program.command("status").description("Show current Caliber setup status").option("--json", "Output as JSON").action(tracked("status", statusCommand));
7271
7759
  program.command("regenerate").alias("regen").alias("re").description("Re-analyze project and regenerate setup").option("--dry-run", "Preview changes without writing files").action(tracked("regenerate", regenerateCommand));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rely-ai/caliber",
3
- "version": "1.8.0",
3
+ "version": "1.10.0",
4
4
  "description": "Analyze your codebase and generate optimized AI agent configs (CLAUDE.md, .cursorrules, skills) — no API key needed",
5
5
  "type": "module",
6
6
  "bin": {