npm - @smartmemory/compose - Versions diffs - 0.2.24-beta → 0.2.25-beta - Mend

@smartmemory/compose 0.2.24-beta → 0.2.25-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/.claude/skills/context-budget/SKILL.md +19 -0
package/lib/context-budget.js +99 -24
package/package.json +1 -1

package/.claude/skills/context-budget/SKILL.md CHANGED Viewed

@@ -43,6 +43,25 @@ node <compose-root>/lib/context-budget.js <project-root> \
 - Token estimate is a dependency-free ~4-chars-per-token heuristic — **relative budgeting,
   not billing-accurate**. Use it to rank, not to bill.
+### Surface vs. live — read this before recommending cuts
+The report prints **two numbers per component**: `surface` (full file on disk) and `live`
+(what actually loads into context at session start). They differ because of **progressive
+disclosure**:
+- **Skills & agents** load only their **frontmatter (name + description)** at startup; the body
+  loads when the skill/agent is invoked. So a 5K-token skill costs ~40 live tokens until used.
+  **Deleting it reclaims its description, not its body.**
+- **Rules & the CLAUDE.md chain** are inlined into the system prompt at startup → `live == surface`.
+- **MCP tool schemas** load fully *when eagerly loaded*, but tool-deferral harnesses (e.g.
+  ToolSearch) load them on demand — flagged `mcp-may-defer`. Treat their live cost as an upper bound.
+**Always reason about cuts in `live` tokens, not `surface`.** TOP 5 RECLAIMS is ranked by live.
+The common trap: a catalog of 50 skills shows a huge `surface` total but a tiny `live` total —
+mass-deleting them reclaims almost nothing while destroying capability. The real micro-levers are
+usually **trimming verbose agent/skill descriptions**, **removing genuinely-unused entries** (their
+descriptions are pure live cost), and **disabling unused MCP servers** (the biggest live line items).
 ### Step 3 — Interpret the report
 The report prints three buckets and a TOP 5 RECLAIMS list. Walk the user through:

package/lib/context-budget.js CHANGED Viewed

@@ -52,6 +52,48 @@ function contentHash(text) {
   return createHash('sha1').update(text || '').digest('hex');
 }
+/**
+ * Extract the YAML frontmatter block (including the `---` fences) from a skill or
+ * agent file. This is what Claude Code surfaces at session start — name +
+ * description — under progressive disclosure; the body loads only on invocation.
+ * Returns null if there is no leading frontmatter.
+ */
+export function extractFrontmatter(text) {
+  if (!text || !text.startsWith('---')) return null;
+  const end = text.indexOf('\n---', 3);
+  if (end === -1) return null;
+  return text.slice(0, end + 4);
+}
+/**
+ * The text that is actually loaded into context at session start for a component.
+ * - skill / agent: progressive disclosure → only the frontmatter (name+description)
+ *   loads until the component is invoked. Falls back to the first line if no
+ *   frontmatter is present.
+ * - rule / claude-md: inlined into the CLAUDE.md context at startup → full text.
+ * - mcp-server: handled in scanMcpServers (full schema estimate).
+ */
+function matchFrontmatterField(fm, key) {
+  const re = new RegExp(`^${key}:[ \\t]*(.*)$`, 'mi');
+  const m = fm.match(re);
+  return m ? m[1].trim() : null;
+}
+function liveTextFor(kind, text) {
+  if (kind === 'skill' || kind === 'agent') {
+    const fm = extractFrontmatter(text);
+    if (fm == null) return (text || '').split('\n').find((l) => l.trim()) || '';
+    // Only name + description surface at startup — count those fields specifically
+    // (robust to extra frontmatter keys like allowed-tools). If neither is present
+    // (unusual shape), fall back to the whole block as a conservative estimate.
+    const name = matchFrontmatterField(fm, 'name');
+    const desc = matchFrontmatterField(fm, 'description');
+    if (name == null && desc == null) return fm;
+    return [name, desc].filter(Boolean).join(' ');
+  }
+  return text || '';
+}
 function makeComponent(kind, path, label, text, extraFlags = []) {
   const lines = lineCount(text);
   const flags = [...extraFlags];
@@ -62,7 +104,8 @@ function makeComponent(kind, path, label, text, extraFlags = []) {
     path,
     label,
     lines,
-    tokens: estimateTokens(text),
+    tokens: estimateTokens(text), // on-disk surface (full file)
+    liveTokens: estimateTokens(liveTextFor(kind, text)), // loaded at startup
     hash: contentHash(text),
     flags,
   };
@@ -190,12 +233,16 @@ function scanMcpServers(mcpConfigPath, toolCounts = {}) {
     } else {
       flags.push('tool-count-unknown');
     }
+    // MCP tool schemas load fully at startup in most harnesses, but tool-deferral
+    // harnesses (e.g. ToolSearch) load them on demand — so the live cost may be 0.
+    flags.push('mcp-may-defer');
     out.push({
       kind: 'mcp-server',
       path: mcpConfigPath,
       label: `mcp-server:${name}`,
       lines: 0,
       tokens,
+      liveTokens: tokens, // full schema when eagerly loaded (see mcp-may-defer)
       hash: contentHash(`mcp:${name}`),
       flags,
       toolCount: hasCount ? count : null,
@@ -261,7 +308,7 @@ export function dedupeSkills(components) {
     if (c.kind !== 'skill') return c;
     const key = `${c.label}::${c.hash}`;
     if (seen.has(key)) {
-      return { ...c, duplicateOf: seen.get(key).path, tokens: 0, flags: [...c.flags, 'duplicate'] };
+      return { ...c, duplicateOf: seen.get(key).path, tokens: 0, liveTokens: 0, flags: [...c.flags, 'duplicate'] };
     }
     seen.set(key, c);
     return c;
@@ -343,69 +390,97 @@ function formatTokens(n) {
  * components already carrying a `bucket`.
  */
 export function buildReport(components, ctx = {}) {
-  // Ensure each component is classified.
+  // Ensure each component is classified and carries a liveTokens estimate.
+  // scanSurface() always sets liveTokens. For a hand-built component that omits
+  // it, default CONSERVATIVELY to the full surface tokens — a budget tool should
+  // over-report cost, never hide it. (We can't recompute a description-only
+  // estimate here without the source text.)
   const classified = components.map((c) => {
-    if (c.bucket) return c;
-    const { bucket, reason } = classifyComponent(c, ctx);
-    return { ...c, bucket, reason };
+    const withLive = c.liveTokens == null ? { ...c, liveTokens: c.tokens } : c;
+    if (withLive.bucket) return withLive;
+    const { bucket, reason } = classifyComponent(withLive, ctx);
+    return { ...withLive, bucket, reason };
   });
   const buckets = { always: [], sometimes: [], rarely: [] };
   for (const c of classified) buckets[c.bucket].push(c);
-  const totalTokens = classified.reduce((a, c) => a + c.tokens, 0);
+  const totalTokens = classified.reduce((a, c) => a + c.tokens, 0); // on-disk surface
+  const totalLiveTokens = classified.reduce((a, c) => a + c.liveTokens, 0); // loaded at startup
-  // Top reclaims: highest-token candidates among sometimes+rarely.
+  // Top reclaims: ranked by LIVE tokens — the savings you actually get back by
+  // cutting it (progressive disclosure means a big on-disk skill reclaims only
+  // its description). Among sometimes+rarely with non-zero live cost.
   const topReclaims = [...buckets.sometimes, ...buckets.rarely]
-    .filter((c) => c.tokens > 0)
-    .sort((a, b) => b.tokens - a.tokens)
+    .filter((c) => c.liveTokens > 0)
+    .sort((a, b) => b.liveTokens - a.liveTokens)
     .slice(0, 5);
-  const text = renderReport({ buckets, totalTokens, topReclaims });
-  return { totalTokens, buckets, topReclaims, classified, text };
+  const text = renderReport({ buckets, totalTokens, totalLiveTokens, topReclaims });
+  return { totalTokens, totalLiveTokens, buckets, topReclaims, classified, text };
 }
 function renderBucketLines(list) {
   return list
     .slice()
-    .sort((a, b) => b.tokens - a.tokens)
+    .sort((a, b) => b.liveTokens - a.liveTokens || b.tokens - a.tokens)
     .map((c) => {
       const flagStr = c.flags && c.flags.length ? ` [${c.flags.join(', ')}]` : '';
-      return `  - ${c.label} (${c.lines} lines, ~${formatTokens(c.tokens)} tokens) — ${c.reason}${flagStr}`;
+      return `  - ${c.label} (${c.lines} lines, ~${formatTokens(c.tokens)} surface / ~${formatTokens(c.liveTokens)} live) — ${c.reason}${flagStr}`;
     })
     .join('\n');
 }
-function bucketTotal(list) {
+function bucketSurface(list) {
   return list.reduce((a, c) => a + c.tokens, 0);
 }
+function bucketLive(list) {
+  return list.reduce((a, c) => a + c.liveTokens, 0);
+}
-function renderReport({ buckets, totalTokens, topReclaims }) {
+function renderReport({ buckets, totalTokens, totalLiveTokens, topReclaims }) {
   const lines = [];
-  lines.push(`CONTEXT BUDGET — current load: ~${formatTokens(totalTokens)} tokens`);
+  lines.push(
+    `CONTEXT BUDGET — ~${formatTokens(totalTokens)} tokens on disk / ~${formatTokens(totalLiveTokens)} loaded at startup`
+  );
+  lines.push(
+    '  (skills & agents are progressive-disclosure: only their description loads until invoked,'
+  );
+  lines.push(
+    '   so "live" is the real session-start cost; MCP schemas may also defer — see mcp-may-defer)'
+  );
   lines.push('');
-  lines.push(`ALWAYS NEEDED (keep, total ~${formatTokens(bucketTotal(buckets.always))} tokens)`);
+  lines.push(
+    `ALWAYS NEEDED (keep, ~${formatTokens(bucketSurface(buckets.always))} surface / ~${formatTokens(bucketLive(buckets.always))} live)`
+  );
   lines.push(renderBucketLines(buckets.always) || '  (none)');
   lines.push('');
   lines.push(
-    `SOMETIMES NEEDED (consider lazy-load, total ~${formatTokens(bucketTotal(buckets.sometimes))} tokens)`
+    `SOMETIMES NEEDED (consider lazy-load, ~${formatTokens(bucketSurface(buckets.sometimes))} surface / ~${formatTokens(bucketLive(buckets.sometimes))} live)`
   );
   lines.push(renderBucketLines(buckets.sometimes) || '  (none)');
   lines.push('');
-  lines.push(`RARELY NEEDED (recommend cut, total ~${formatTokens(bucketTotal(buckets.rarely))} tokens)`);
+  lines.push(
+    `RARELY NEEDED (recommend cut, ~${formatTokens(bucketSurface(buckets.rarely))} surface / ~${formatTokens(bucketLive(buckets.rarely))} live)`
+  );
   lines.push(renderBucketLines(buckets.rarely) || '  (none)');
   lines.push('');
-  lines.push('TOP 5 RECLAIMS:');
+  lines.push('TOP 5 RECLAIMS (by live tokens — what you actually get back):');
   if (topReclaims.length === 0) {
     lines.push('  (none)');
   } else {
     topReclaims.forEach((c, i) => {
-      lines.push(`  ${i + 1}. ${c.label} (~${formatTokens(c.tokens)} tokens) — ${c.reason}`);
+      lines.push(
+        `  ${i + 1}. ${c.label} (~${formatTokens(c.liveTokens)} live / ~${formatTokens(c.tokens)} surface) — ${c.reason}`
+      );
     });
   }
-  const potential = bucketTotal(buckets.sometimes) + bucketTotal(buckets.rarely);
+  const potentialLive = bucketLive(buckets.sometimes) + bucketLive(buckets.rarely);
+  const potentialSurface = bucketSurface(buckets.sometimes) + bucketSurface(buckets.rarely);
   lines.push('');
-  lines.push(`Potential reclaim if all sometimes+rarely cut: ~${formatTokens(potential)} tokens`);
+  lines.push(
+    `Potential reclaim if all sometimes+rarely cut: ~${formatTokens(potentialLive)} live (~${formatTokens(potentialSurface)} surface)`
+  );
   return lines.join('\n');
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@smartmemory/compose",
-  "version": "0.2.24-beta",
+  "version": "0.2.25-beta",
   "description": "Structured AI dev pipeline — goal-to-product orchestration with gates, iteration loops, and feature lifecycle management.",
   "author": "SmartMemory",
   "license": "MIT",