@rely-ai/caliber 1.9.0 → 1.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/bin.js +355 -3
  2. package/package.json +1 -1
package/dist/bin.js CHANGED
@@ -1592,6 +1592,120 @@ OUTPUT SIZE CONSTRAINTS \u2014 these are critical:
1592
1592
  - Each skill content: max 150 lines. Focus on patterns and examples, not exhaustive docs.
1593
1593
  - Cursor rules: max 5 .mdc files.
1594
1594
  - If the project is large, prioritize depth on the 3-4 most critical tools over breadth across everything.`;
1595
+ var CORE_GENERATION_PROMPT = `You are an expert auditor for coding agent configurations (Claude Code, Cursor, and Codex).
1596
+
1597
+ Your job depends on context:
1598
+ - If no existing configs exist \u2192 generate an initial setup from scratch.
1599
+ - If existing configs are provided \u2192 audit them and suggest targeted improvements. Preserve accurate content \u2014 don't rewrite what's already correct.
1600
+
1601
+ You understand these config files:
1602
+ - CLAUDE.md: Project context for Claude Code \u2014 build/test commands, architecture, conventions.
1603
+ - AGENTS.md: Primary instructions file for OpenAI Codex \u2014 same purpose as CLAUDE.md but for the Codex agent.
1604
+ - .cursorrules: Coding rules for Cursor (deprecated legacy format \u2014 do NOT generate this).
1605
+ - .cursor/rules/*.mdc: Modern Cursor rules with frontmatter (description, globs, alwaysApply).
1606
+
1607
+ Do NOT generate .claude/settings.json, .claude/settings.local.json, or mcpServers.
1608
+
1609
+ Your output MUST follow this exact format (no markdown fences):
1610
+
1611
+ 1. Exactly 6 short status lines (one per line, prefixed with "STATUS: "). Each should be a creative, specific description of what you're analyzing for THIS project \u2014 reference the project's actual languages, frameworks, or tools.
1612
+
1613
+ 2. A brief explanation section starting with "EXPLAIN:" on its own line:
1614
+
1615
+ EXPLAIN:
1616
+ [Changes]
1617
+ - **file-or-skill-name**: short reason (max 10 words)
1618
+ [Deletions]
1619
+ - **file-path**: short reason (max 10 words)
1620
+
1621
+ Omit empty categories. Keep each reason punchy and specific. End with a blank line.
1622
+
1623
+ 3. The JSON object starting with {.
1624
+
1625
+ CoreSetup schema:
1626
+ {
1627
+ "targetAgent": ["claude", "cursor", "codex"] (array of selected agents),
1628
+ "fileDescriptions": {
1629
+ "<file-path>": "reason for this change (max 80 chars)"
1630
+ },
1631
+ "deletions": [
1632
+ { "filePath": "<path>", "reason": "why remove (max 80 chars)" }
1633
+ ],
1634
+ "claude": {
1635
+ "claudeMd": "string (markdown content for CLAUDE.md)",
1636
+ "skillTopics": [{ "name": "string (kebab-case)", "description": "string (what this skill does and WHEN to use it \u2014 include trigger phrases)" }]
1637
+ },
1638
+ "codex": {
1639
+ "agentsMd": "string (markdown content for AGENTS.md)",
1640
+ "skillTopics": [{ "name": "string (kebab-case)", "description": "string" }]
1641
+ },
1642
+ "cursor": {
1643
+ "skillTopics": [{ "name": "string (kebab-case)", "description": "string" }],
1644
+ "rules": [{ "filename": "string.mdc", "content": "string (with frontmatter)" }]
1645
+ }
1646
+ }
1647
+
1648
+ IMPORTANT: Do NOT generate full skill content. Only output skill topic names and descriptions.
1649
+ Skills will be generated separately. Generate exactly 3 skill topics per target platform.
1650
+
1651
+ Skill topic description MUST include WHAT it does + WHEN to use it with specific trigger phrases.
1652
+ Example: "Manages database migrations. Use when user says 'run migration', 'create migration', 'db schema change', or modifies files in db/migrations/."
1653
+
1654
+ The "fileDescriptions" object MUST include a one-liner for every file that will be created or modified.
1655
+ The "deletions" array should list files that should be removed (e.g. stale configs). Omit if empty.
1656
+
1657
+ SCORING CRITERIA \u2014 your output is scored deterministically. Optimize for 100/100:
1658
+
1659
+ Existence (25 pts):
1660
+ - CLAUDE.md exists (6 pts) \u2014 always generate for claude/both targets
1661
+ - AGENTS.md exists (6 pts) \u2014 always generate for codex target
1662
+ - Skills configured (8 pts) \u2014 3 skill topics = full points
1663
+ - For "both" target: .cursor/rules/ exist (3+3 pts), cross-platform parity (2 pts)
1664
+
1665
+ Quality (25 pts):
1666
+ - Build/test/lint commands documented (8 pts) \u2014 include actual commands from the project
1667
+ - Concise context files (6 pts) \u2014 keep CLAUDE.md under 100 lines (200=4pts, 300=3pts, 500+=0pts)
1668
+ - No vague instructions (4 pts) \u2014 avoid "follow best practices", "write clean code", "ensure quality"
1669
+ - No directory tree listings (3 pts) \u2014 do NOT include tree-style file listings
1670
+ - No contradictions (2 pts) \u2014 consistent tool/style recommendations
1671
+
1672
+ Coverage (20 pts):
1673
+ - Dependency coverage (10 pts) \u2014 CRITICAL: the exact dependency list is provided in your input. Mention AT LEAST 85% by name in CLAUDE.md. Full points at 85%+.
1674
+ - Service/MCP coverage (6 pts) \u2014 reference detected services
1675
+ - MCP completeness (4 pts) \u2014 full points if no external services detected
1676
+
1677
+ Accuracy (15 pts) \u2014 CRITICAL:
1678
+ - Documented commands exist (6 pts) \u2014 ONLY reference scripts from the provided package.json. Use the exact package manager.
1679
+ - Documented paths exist (4 pts) \u2014 ONLY reference file paths from the provided file tree.
1680
+ - Config freshness (5 pts) \u2014 config must match current code state
1681
+
1682
+ Freshness & Safety (10 pts):
1683
+ - No secrets (4 pts), Permissions (2 pts \u2014 handled by caliber)
1684
+
1685
+ Bonus (5 pts): Hooks (2 pts), AGENTS.md (1 pt), OpenSkills format (2 pts) \u2014 handled by caliber
1686
+
1687
+ OUTPUT SIZE CONSTRAINTS:
1688
+ - CLAUDE.md / AGENTS.md: MUST be under 100 lines. Aim for 70-90 lines.
1689
+ - Cursor rules: max 5 .mdc files.
1690
+ - Skill topics: exactly 3 per platform (name + description only, no content).`;
1691
+ var SKILL_GENERATION_PROMPT = `You generate a single skill file for a coding agent (Claude Code, Cursor, or Codex).
1692
+
1693
+ Given project context and a skill topic, produce a focused SKILL.md body.
1694
+
1695
+ Structure:
1696
+ 1. A heading with the skill name
1697
+ 2. "## Instructions" \u2014 clear, numbered steps. Be specific: include exact commands, file paths, parameter names from the project.
1698
+ 3. "## Examples" \u2014 at least one example showing: User says \u2192 Actions taken \u2192 Result
1699
+ 4. "## Troubleshooting" (optional) \u2014 common errors and fixes
1700
+
1701
+ Rules:
1702
+ - Max 150 lines. Focus on actionable instructions, not documentation prose.
1703
+ - Reference actual commands, paths, and packages from the project context provided.
1704
+ - Do NOT include YAML frontmatter \u2014 it will be generated separately.
1705
+ - Be specific to THIS project \u2014 avoid generic advice.
1706
+
1707
+ Return ONLY a JSON object:
1708
+ {"name": "string (kebab-case)", "description": "string (what + when)", "content": "string (markdown body)"}`;
1595
1709
  var REFINE_SYSTEM_PROMPT = `You are an expert at modifying coding agent configurations (Claude Code, Cursor, and Codex).
1596
1710
 
1597
1711
  You will receive the current AgentSetup JSON and a user request describing what to change.
@@ -1827,6 +1941,9 @@ async function enrichWithLLM(fingerprint, dir) {
1827
1941
  }
1828
1942
  }
1829
1943
 
1944
+ // src/ai/generate.ts
1945
+ init_config();
1946
+
1830
1947
  // src/utils/dependencies.ts
1831
1948
  import { readFileSync } from "fs";
1832
1949
  import { join } from "path";
@@ -1920,6 +2037,7 @@ function extractAllDeps(dir) {
1920
2037
  }
1921
2038
 
1922
2039
  // src/ai/generate.ts
2040
+ var CORE_MAX_TOKENS = 16e3;
1923
2041
  var GENERATION_MAX_TOKENS = 64e3;
1924
2042
  var MODEL_MAX_OUTPUT_TOKENS = 128e3;
1925
2043
  var MAX_RETRIES2 = 5;
@@ -1927,7 +2045,237 @@ function isTransientError2(error) {
1927
2045
  const msg = error.message.toLowerCase();
1928
2046
  return TRANSIENT_ERRORS.some((e) => msg.includes(e.toLowerCase()));
1929
2047
  }
1930
- async function generateSetup(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks) {
2048
+ async function generateSetup(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks, options) {
2049
+ const isTargetedFix = failingChecks && failingChecks.length > 0 && currentScore !== void 0 && currentScore >= 95 || options?.forceTargetedFix;
2050
+ if (isTargetedFix) {
2051
+ return generateMonolithic(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks);
2052
+ }
2053
+ const coreResult = await generateCore(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks);
2054
+ if (!coreResult.setup) {
2055
+ return coreResult;
2056
+ }
2057
+ if (options?.skipSkills) {
2058
+ return coreResult;
2059
+ }
2060
+ const setup = coreResult.setup;
2061
+ const skillTopics = collectSkillTopics(setup, targetAgent, fingerprint);
2062
+ if (skillTopics.length === 0) {
2063
+ return coreResult;
2064
+ }
2065
+ if (callbacks) callbacks.onStatus(`Generating ${skillTopics.length} skills in parallel...`);
2066
+ const allDeps = extractAllDeps(process.cwd());
2067
+ const skillContext = buildSkillContext(fingerprint, setup, allDeps);
2068
+ const fastModel = getFastModel();
2069
+ const skillResults = await Promise.allSettled(
2070
+ skillTopics.map(
2071
+ ({ platform, topic }) => generateSkill(skillContext, topic, fastModel).then((skill) => ({ platform, skill }))
2072
+ )
2073
+ );
2074
+ for (const result of skillResults) {
2075
+ if (result.status === "fulfilled") {
2076
+ const { platform, skill } = result.value;
2077
+ const platformObj = setup[platform] ?? {};
2078
+ const skills = platformObj.skills ?? [];
2079
+ skills.push(skill);
2080
+ platformObj.skills = skills;
2081
+ setup[platform] = platformObj;
2082
+ const skillPath = platform === "codex" ? `.agents/skills/${skill.name}/SKILL.md` : `.${platform}/skills/${skill.name}/SKILL.md`;
2083
+ const descriptions = setup.fileDescriptions ?? {};
2084
+ descriptions[skillPath] = skill.description.slice(0, 80);
2085
+ setup.fileDescriptions = descriptions;
2086
+ }
2087
+ }
2088
+ const failedCount = skillResults.filter((r) => r.status === "rejected").length;
2089
+ if (failedCount > 0 && callbacks) {
2090
+ callbacks.onStatus(`${failedCount} skill${failedCount === 1 ? "" : "s"} failed to generate`);
2091
+ }
2092
+ return coreResult;
2093
+ }
2094
+ function collectSkillTopics(setup, targetAgent, fingerprint) {
2095
+ const topics = [];
2096
+ for (const platform of ["claude", "codex", "cursor"]) {
2097
+ if (!targetAgent.includes(platform)) continue;
2098
+ const platformObj = setup[platform];
2099
+ const skillTopics = platformObj?.skillTopics;
2100
+ if (Array.isArray(skillTopics) && skillTopics.length > 0) {
2101
+ for (const topic of skillTopics) {
2102
+ topics.push({ platform, topic });
2103
+ }
2104
+ } else {
2105
+ const defaults = getDefaultSkillTopics(fingerprint);
2106
+ for (const topic of defaults) {
2107
+ topics.push({ platform, topic });
2108
+ }
2109
+ }
2110
+ if (platformObj) {
2111
+ delete platformObj.skillTopics;
2112
+ }
2113
+ }
2114
+ return topics;
2115
+ }
2116
+ function getDefaultSkillTopics(fingerprint) {
2117
+ const topics = [
2118
+ { name: "development-workflow", description: "Development setup and common workflows. Use when starting development, running the project, or setting up the environment." },
2119
+ { name: "testing-guide", description: "Testing patterns and commands. Use when writing tests, running test suites, or debugging test failures." }
2120
+ ];
2121
+ if (fingerprint.frameworks.length > 0) {
2122
+ topics.push({
2123
+ name: `${fingerprint.frameworks[0].toLowerCase().replace(/[^a-z0-9]/g, "-")}-patterns`,
2124
+ description: `${fingerprint.frameworks[0]} conventions and patterns. Use when working with ${fingerprint.frameworks[0]} code.`
2125
+ });
2126
+ } else {
2127
+ topics.push({
2128
+ name: "code-conventions",
2129
+ description: "Code style, patterns, and project conventions. Use when reviewing code or making architectural decisions."
2130
+ });
2131
+ }
2132
+ return topics;
2133
+ }
2134
+ function buildSkillContext(fingerprint, setup, allDeps) {
2135
+ const parts = [];
2136
+ if (fingerprint.packageName) parts.push(`Project: ${fingerprint.packageName}`);
2137
+ if (fingerprint.languages.length > 0) parts.push(`Languages: ${fingerprint.languages.join(", ")}`);
2138
+ if (fingerprint.frameworks.length > 0) parts.push(`Frameworks: ${fingerprint.frameworks.join(", ")}`);
2139
+ const claude = setup.claude;
2140
+ const claudeMd = claude?.claudeMd;
2141
+ if (claudeMd) {
2142
+ parts.push(`
2143
+ Project CLAUDE.md (for context):
2144
+ ${claudeMd.slice(0, 3e3)}`);
2145
+ }
2146
+ if (fingerprint.fileTree.length > 0) {
2147
+ parts.push(`
2148
+ Key files:
2149
+ ${fingerprint.fileTree.slice(0, 50).join("\n")}`);
2150
+ }
2151
+ if (allDeps.length > 0) {
2152
+ parts.push(`
2153
+ Dependencies: ${allDeps.join(", ")}`);
2154
+ }
2155
+ return parts.join("\n");
2156
+ }
2157
+ async function generateSkill(context, topic, model) {
2158
+ const prompt = `PROJECT CONTEXT:
2159
+ ${context}
2160
+
2161
+ SKILL TO GENERATE:
2162
+ Name: ${topic.name}
2163
+ Description: ${topic.description}
2164
+
2165
+ Generate the skill content following the instructions in the system prompt.`;
2166
+ const result = await llmJsonCall({
2167
+ system: SKILL_GENERATION_PROMPT,
2168
+ prompt,
2169
+ maxTokens: 4e3,
2170
+ ...model ? { model } : {}
2171
+ });
2172
+ const content = result.content?.trim();
2173
+ if (!content) {
2174
+ throw new Error(`Empty skill content for ${topic.name}`);
2175
+ }
2176
+ return {
2177
+ name: result.name || topic.name,
2178
+ description: result.description || topic.description,
2179
+ content
2180
+ };
2181
+ }
2182
+ async function generateCore(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks) {
2183
+ const provider = getProvider();
2184
+ const userMessage = buildGeneratePrompt(fingerprint, targetAgent, prompt, failingChecks, currentScore, passingChecks);
2185
+ let attempt = 0;
2186
+ const attemptGeneration = async () => {
2187
+ attempt++;
2188
+ const maxTokensForAttempt = Math.min(
2189
+ CORE_MAX_TOKENS + attempt * 8e3,
2190
+ GENERATION_MAX_TOKENS
2191
+ );
2192
+ return new Promise((resolve2) => {
2193
+ let preJsonBuffer = "";
2194
+ let jsonContent = "";
2195
+ let inJson = false;
2196
+ let sentStatuses = 0;
2197
+ let stopReason = null;
2198
+ provider.stream(
2199
+ {
2200
+ system: CORE_GENERATION_PROMPT,
2201
+ prompt: userMessage,
2202
+ maxTokens: maxTokensForAttempt
2203
+ },
2204
+ {
2205
+ onText: (text) => {
2206
+ if (!inJson) {
2207
+ preJsonBuffer += text;
2208
+ const lines = preJsonBuffer.split("\n");
2209
+ const completedLines = lines.slice(0, -1);
2210
+ for (let i = sentStatuses; i < completedLines.length; i++) {
2211
+ const trimmed = completedLines[i].trim();
2212
+ if (trimmed.startsWith("STATUS:")) {
2213
+ const status = trimmed.slice(7).trim();
2214
+ if (status && callbacks) callbacks.onStatus(status);
2215
+ }
2216
+ }
2217
+ sentStatuses = completedLines.length;
2218
+ const jsonStartMatch = preJsonBuffer.match(/(?:^|\n)\s*(?:```json\s*\n\s*)?\{(?=\s*")/);
2219
+ if (jsonStartMatch) {
2220
+ const matchIndex = preJsonBuffer.indexOf("{", jsonStartMatch.index);
2221
+ inJson = true;
2222
+ jsonContent = preJsonBuffer.slice(matchIndex);
2223
+ }
2224
+ } else {
2225
+ jsonContent += text;
2226
+ }
2227
+ },
2228
+ onEnd: (meta) => {
2229
+ stopReason = meta?.stopReason ?? null;
2230
+ let setup = null;
2231
+ let jsonToParse = (jsonContent || preJsonBuffer).replace(/```\s*$/g, "").trim();
2232
+ if (!jsonContent && preJsonBuffer) {
2233
+ const fallbackMatch = preJsonBuffer.match(/(?:^|\n)\s*(?:```json\s*\n\s*)?\{(?=\s*")/);
2234
+ if (fallbackMatch) {
2235
+ const matchIndex = preJsonBuffer.indexOf("{", fallbackMatch.index);
2236
+ jsonToParse = preJsonBuffer.slice(matchIndex).replace(/```\s*$/g, "").trim();
2237
+ }
2238
+ }
2239
+ try {
2240
+ setup = JSON.parse(jsonToParse);
2241
+ } catch {
2242
+ }
2243
+ if (!setup && stopReason === "max_tokens" && attempt < MAX_RETRIES2) {
2244
+ if (callbacks) callbacks.onStatus("Output was truncated, retrying with higher token limit...");
2245
+ setTimeout(() => attemptGeneration().then(resolve2), 1e3);
2246
+ return;
2247
+ }
2248
+ let explanation;
2249
+ const explainMatch = preJsonBuffer.match(/EXPLAIN:\s*\n([\s\S]*?)(?=\n\s*(`{3}|\{))/);
2250
+ if (explainMatch) {
2251
+ explanation = explainMatch[1].trim();
2252
+ }
2253
+ if (setup) {
2254
+ if (callbacks) callbacks.onComplete(setup, explanation);
2255
+ resolve2({ setup, explanation, stopReason: stopReason ?? void 0 });
2256
+ } else {
2257
+ resolve2({ setup: null, explanation, raw: preJsonBuffer, stopReason: stopReason ?? void 0 });
2258
+ }
2259
+ },
2260
+ onError: (error) => {
2261
+ if (isTransientError2(error) && attempt < MAX_RETRIES2) {
2262
+ if (callbacks) callbacks.onStatus("Connection interrupted, retrying...");
2263
+ setTimeout(() => attemptGeneration().then(resolve2), 2e3);
2264
+ return;
2265
+ }
2266
+ if (callbacks) callbacks.onError(error.message);
2267
+ resolve2({ setup: null, raw: error.message, stopReason: "error" });
2268
+ }
2269
+ }
2270
+ ).catch((error) => {
2271
+ if (callbacks) callbacks.onError(error.message);
2272
+ resolve2({ setup: null, raw: error.message, stopReason: "error" });
2273
+ });
2274
+ });
2275
+ };
2276
+ return attemptGeneration();
2277
+ }
2278
+ async function generateMonolithic(fingerprint, targetAgent, prompt, callbacks, failingChecks, currentScore, passingChecks) {
1931
2279
  const provider = getProvider();
1932
2280
  const userMessage = buildGeneratePrompt(fingerprint, targetAgent, prompt, failingChecks, currentScore, passingChecks);
1933
2281
  let attempt = 0;
@@ -5854,7 +6202,8 @@ async function initCommand(options) {
5854
6202
  },
5855
6203
  polishFailing,
5856
6204
  afterScore.score,
5857
- polishPassing
6205
+ polishPassing,
6206
+ { skipSkills: true, forceTargetedFix: true }
5858
6207
  );
5859
6208
  if (polishResult.setup) {
5860
6209
  const polishWriteResult = writeSetup(polishResult.setup);
@@ -6200,7 +6549,10 @@ function ensurePermissions() {
6200
6549
  }
6201
6550
  function displayTokenUsage() {
6202
6551
  const summary = getUsageSummary();
6203
- if (summary.length === 0) return;
6552
+ if (summary.length === 0) {
6553
+ console.log(chalk8.dim(" Token tracking not available for this provider.\n"));
6554
+ return;
6555
+ }
6204
6556
  console.log(chalk8.bold(" Token usage:\n"));
6205
6557
  let totalIn = 0;
6206
6558
  let totalOut = 0;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rely-ai/caliber",
3
- "version": "1.9.0",
3
+ "version": "1.10.1",
4
4
  "description": "Analyze your codebase and generate optimized AI agent configs (CLAUDE.md, .cursorrules, skills) — no API key needed",
5
5
  "type": "module",
6
6
  "bin": {