@smartmemory/compose 0.2.24-beta → 0.2.25-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -43,6 +43,25 @@ node <compose-root>/lib/context-budget.js <project-root> \
43
43
  - Token estimate is a dependency-free ~4-chars-per-token heuristic — **relative budgeting,
44
44
  not billing-accurate**. Use it to rank, not to bill.
45
45
 
46
+ ### Surface vs. live — read this before recommending cuts
47
+
48
+ The report prints **two numbers per component**: `surface` (full file on disk) and `live`
49
+ (what actually loads into context at session start). They differ because of **progressive
50
+ disclosure**:
51
+
52
+ - **Skills & agents** load only their **frontmatter (name + description)** at startup; the body
53
+ loads when the skill/agent is invoked. So a 5K-token skill costs ~40 live tokens until used.
54
+ **Deleting it reclaims its description, not its body.**
55
+ - **Rules & the CLAUDE.md chain** are inlined into the system prompt at startup → `live == surface`.
56
+ - **MCP tool schemas** load fully *when eagerly loaded*, but tool-deferral harnesses (e.g.
57
+ ToolSearch) load them on demand — flagged `mcp-may-defer`. Treat their live cost as an upper bound.
58
+
59
+ **Always reason about cuts in `live` tokens, not `surface`.** TOP 5 RECLAIMS is ranked by live.
60
+ The common trap: a catalog of 50 skills shows a huge `surface` total but a tiny `live` total —
61
+ mass-deleting them reclaims almost nothing while destroying capability. The real micro-levers are
62
+ usually **trimming verbose agent/skill descriptions**, **removing genuinely-unused entries** (their
63
+ descriptions are pure live cost), and **disabling unused MCP servers** (the biggest live line items).
64
+
46
65
  ### Step 3 — Interpret the report
47
66
 
48
67
  The report prints three buckets and a TOP 5 RECLAIMS list. Walk the user through:
@@ -52,6 +52,48 @@ function contentHash(text) {
52
52
  return createHash('sha1').update(text || '').digest('hex');
53
53
  }
54
54
 
55
+ /**
56
+ * Extract the YAML frontmatter block (including the `---` fences) from a skill or
57
+ * agent file. This is what Claude Code surfaces at session start — name +
58
+ * description — under progressive disclosure; the body loads only on invocation.
59
+ * Returns null if there is no leading frontmatter.
60
+ */
61
+ export function extractFrontmatter(text) {
62
+ if (!text || !text.startsWith('---')) return null;
63
+ const end = text.indexOf('\n---', 3);
64
+ if (end === -1) return null;
65
+ return text.slice(0, end + 4);
66
+ }
67
+
68
+ /**
69
+ * The text that is actually loaded into context at session start for a component.
70
+ * - skill / agent: progressive disclosure → only the frontmatter (name+description)
71
+ * loads until the component is invoked. Falls back to the first line if no
72
+ * frontmatter is present.
73
+ * - rule / claude-md: inlined into the CLAUDE.md context at startup → full text.
74
+ * - mcp-server: handled in scanMcpServers (full schema estimate).
75
+ */
76
+ function matchFrontmatterField(fm, key) {
77
+ const re = new RegExp(`^${key}:[ \\t]*(.*)$`, 'mi');
78
+ const m = fm.match(re);
79
+ return m ? m[1].trim() : null;
80
+ }
81
+
82
+ function liveTextFor(kind, text) {
83
+ if (kind === 'skill' || kind === 'agent') {
84
+ const fm = extractFrontmatter(text);
85
+ if (fm == null) return (text || '').split('\n').find((l) => l.trim()) || '';
86
+ // Only name + description surface at startup — count those fields specifically
87
+ // (robust to extra frontmatter keys like allowed-tools). If neither is present
88
+ // (unusual shape), fall back to the whole block as a conservative estimate.
89
+ const name = matchFrontmatterField(fm, 'name');
90
+ const desc = matchFrontmatterField(fm, 'description');
91
+ if (name == null && desc == null) return fm;
92
+ return [name, desc].filter(Boolean).join(' ');
93
+ }
94
+ return text || '';
95
+ }
96
+
55
97
  function makeComponent(kind, path, label, text, extraFlags = []) {
56
98
  const lines = lineCount(text);
57
99
  const flags = [...extraFlags];
@@ -62,7 +104,8 @@ function makeComponent(kind, path, label, text, extraFlags = []) {
62
104
  path,
63
105
  label,
64
106
  lines,
65
- tokens: estimateTokens(text),
107
+ tokens: estimateTokens(text), // on-disk surface (full file)
108
+ liveTokens: estimateTokens(liveTextFor(kind, text)), // loaded at startup
66
109
  hash: contentHash(text),
67
110
  flags,
68
111
  };
@@ -190,12 +233,16 @@ function scanMcpServers(mcpConfigPath, toolCounts = {}) {
190
233
  } else {
191
234
  flags.push('tool-count-unknown');
192
235
  }
236
+ // MCP tool schemas load fully at startup in most harnesses, but tool-deferral
237
+ // harnesses (e.g. ToolSearch) load them on demand — so the live cost may be 0.
238
+ flags.push('mcp-may-defer');
193
239
  out.push({
194
240
  kind: 'mcp-server',
195
241
  path: mcpConfigPath,
196
242
  label: `mcp-server:${name}`,
197
243
  lines: 0,
198
244
  tokens,
245
+ liveTokens: tokens, // full schema when eagerly loaded (see mcp-may-defer)
199
246
  hash: contentHash(`mcp:${name}`),
200
247
  flags,
201
248
  toolCount: hasCount ? count : null,
@@ -261,7 +308,7 @@ export function dedupeSkills(components) {
261
308
  if (c.kind !== 'skill') return c;
262
309
  const key = `${c.label}::${c.hash}`;
263
310
  if (seen.has(key)) {
264
- return { ...c, duplicateOf: seen.get(key).path, tokens: 0, flags: [...c.flags, 'duplicate'] };
311
+ return { ...c, duplicateOf: seen.get(key).path, tokens: 0, liveTokens: 0, flags: [...c.flags, 'duplicate'] };
265
312
  }
266
313
  seen.set(key, c);
267
314
  return c;
@@ -343,69 +390,97 @@ function formatTokens(n) {
343
390
  * components already carrying a `bucket`.
344
391
  */
345
392
  export function buildReport(components, ctx = {}) {
346
- // Ensure each component is classified.
393
+ // Ensure each component is classified and carries a liveTokens estimate.
394
+ // scanSurface() always sets liveTokens. For a hand-built component that omits
395
+ // it, default CONSERVATIVELY to the full surface tokens — a budget tool should
396
+ // over-report cost, never hide it. (We can't recompute a description-only
397
+ // estimate here without the source text.)
347
398
  const classified = components.map((c) => {
348
- if (c.bucket) return c;
349
- const { bucket, reason } = classifyComponent(c, ctx);
350
- return { ...c, bucket, reason };
399
+ const withLive = c.liveTokens == null ? { ...c, liveTokens: c.tokens } : c;
400
+ if (withLive.bucket) return withLive;
401
+ const { bucket, reason } = classifyComponent(withLive, ctx);
402
+ return { ...withLive, bucket, reason };
351
403
  });
352
404
 
353
405
  const buckets = { always: [], sometimes: [], rarely: [] };
354
406
  for (const c of classified) buckets[c.bucket].push(c);
355
407
 
356
- const totalTokens = classified.reduce((a, c) => a + c.tokens, 0);
408
+ const totalTokens = classified.reduce((a, c) => a + c.tokens, 0); // on-disk surface
409
+ const totalLiveTokens = classified.reduce((a, c) => a + c.liveTokens, 0); // loaded at startup
357
410
 
358
- // Top reclaims: highest-token candidates among sometimes+rarely.
411
+ // Top reclaims: ranked by LIVE tokens — the savings you actually get back by
412
+ // cutting it (progressive disclosure means a big on-disk skill reclaims only
413
+ // its description). Among sometimes+rarely with non-zero live cost.
359
414
  const topReclaims = [...buckets.sometimes, ...buckets.rarely]
360
- .filter((c) => c.tokens > 0)
361
- .sort((a, b) => b.tokens - a.tokens)
415
+ .filter((c) => c.liveTokens > 0)
416
+ .sort((a, b) => b.liveTokens - a.liveTokens)
362
417
  .slice(0, 5);
363
418
 
364
- const text = renderReport({ buckets, totalTokens, topReclaims });
365
- return { totalTokens, buckets, topReclaims, classified, text };
419
+ const text = renderReport({ buckets, totalTokens, totalLiveTokens, topReclaims });
420
+ return { totalTokens, totalLiveTokens, buckets, topReclaims, classified, text };
366
421
  }
367
422
 
368
423
  function renderBucketLines(list) {
369
424
  return list
370
425
  .slice()
371
- .sort((a, b) => b.tokens - a.tokens)
426
+ .sort((a, b) => b.liveTokens - a.liveTokens || b.tokens - a.tokens)
372
427
  .map((c) => {
373
428
  const flagStr = c.flags && c.flags.length ? ` [${c.flags.join(', ')}]` : '';
374
- return ` - ${c.label} (${c.lines} lines, ~${formatTokens(c.tokens)} tokens) — ${c.reason}${flagStr}`;
429
+ return ` - ${c.label} (${c.lines} lines, ~${formatTokens(c.tokens)} surface / ~${formatTokens(c.liveTokens)} live) — ${c.reason}${flagStr}`;
375
430
  })
376
431
  .join('\n');
377
432
  }
378
433
 
379
- function bucketTotal(list) {
434
+ function bucketSurface(list) {
380
435
  return list.reduce((a, c) => a + c.tokens, 0);
381
436
  }
437
+ function bucketLive(list) {
438
+ return list.reduce((a, c) => a + c.liveTokens, 0);
439
+ }
382
440
 
383
- function renderReport({ buckets, totalTokens, topReclaims }) {
441
+ function renderReport({ buckets, totalTokens, totalLiveTokens, topReclaims }) {
384
442
  const lines = [];
385
- lines.push(`CONTEXT BUDGET — current load: ~${formatTokens(totalTokens)} tokens`);
443
+ lines.push(
444
+ `CONTEXT BUDGET — ~${formatTokens(totalTokens)} tokens on disk / ~${formatTokens(totalLiveTokens)} loaded at startup`
445
+ );
446
+ lines.push(
447
+ ' (skills & agents are progressive-disclosure: only their description loads until invoked,'
448
+ );
449
+ lines.push(
450
+ ' so "live" is the real session-start cost; MCP schemas may also defer — see mcp-may-defer)'
451
+ );
386
452
  lines.push('');
387
- lines.push(`ALWAYS NEEDED (keep, total ~${formatTokens(bucketTotal(buckets.always))} tokens)`);
453
+ lines.push(
454
+ `ALWAYS NEEDED (keep, ~${formatTokens(bucketSurface(buckets.always))} surface / ~${formatTokens(bucketLive(buckets.always))} live)`
455
+ );
388
456
  lines.push(renderBucketLines(buckets.always) || ' (none)');
389
457
  lines.push('');
390
458
  lines.push(
391
- `SOMETIMES NEEDED (consider lazy-load, total ~${formatTokens(bucketTotal(buckets.sometimes))} tokens)`
459
+ `SOMETIMES NEEDED (consider lazy-load, ~${formatTokens(bucketSurface(buckets.sometimes))} surface / ~${formatTokens(bucketLive(buckets.sometimes))} live)`
392
460
  );
393
461
  lines.push(renderBucketLines(buckets.sometimes) || ' (none)');
394
462
  lines.push('');
395
- lines.push(`RARELY NEEDED (recommend cut, total ~${formatTokens(bucketTotal(buckets.rarely))} tokens)`);
463
+ lines.push(
464
+ `RARELY NEEDED (recommend cut, ~${formatTokens(bucketSurface(buckets.rarely))} surface / ~${formatTokens(bucketLive(buckets.rarely))} live)`
465
+ );
396
466
  lines.push(renderBucketLines(buckets.rarely) || ' (none)');
397
467
  lines.push('');
398
- lines.push('TOP 5 RECLAIMS:');
468
+ lines.push('TOP 5 RECLAIMS (by live tokens — what you actually get back):');
399
469
  if (topReclaims.length === 0) {
400
470
  lines.push(' (none)');
401
471
  } else {
402
472
  topReclaims.forEach((c, i) => {
403
- lines.push(` ${i + 1}. ${c.label} (~${formatTokens(c.tokens)} tokens) — ${c.reason}`);
473
+ lines.push(
474
+ ` ${i + 1}. ${c.label} (~${formatTokens(c.liveTokens)} live / ~${formatTokens(c.tokens)} surface) — ${c.reason}`
475
+ );
404
476
  });
405
477
  }
406
- const potential = bucketTotal(buckets.sometimes) + bucketTotal(buckets.rarely);
478
+ const potentialLive = bucketLive(buckets.sometimes) + bucketLive(buckets.rarely);
479
+ const potentialSurface = bucketSurface(buckets.sometimes) + bucketSurface(buckets.rarely);
407
480
  lines.push('');
408
- lines.push(`Potential reclaim if all sometimes+rarely cut: ~${formatTokens(potential)} tokens`);
481
+ lines.push(
482
+ `Potential reclaim if all sometimes+rarely cut: ~${formatTokens(potentialLive)} live (~${formatTokens(potentialSurface)} surface)`
483
+ );
409
484
  return lines.join('\n');
410
485
  }
411
486
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@smartmemory/compose",
3
- "version": "0.2.24-beta",
3
+ "version": "0.2.25-beta",
4
4
  "description": "Structured AI dev pipeline — goal-to-product orchestration with gates, iteration loops, and feature lifecycle management.",
5
5
  "author": "SmartMemory",
6
6
  "license": "MIT",