akm-cli 0.1.3 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,8 +9,9 @@
9
9
  */
10
10
  import fs from "node:fs";
11
11
  import path from "node:path";
12
- import { _setAssetTypeHooks, deriveCanonicalAssetNameFromStashRoot } from "./asset-spec";
13
- import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, openDatabase, searchFts, searchVec, } from "./db";
12
+ import { ACTION_BUILDERS, TYPE_TO_RENDERER } from "./asset-registry";
13
+ import { deriveCanonicalAssetNameFromStashRoot } from "./asset-spec";
14
+ import { closeDatabase, getAllEntries, getEntryById, getEntryCount, getMeta, getUtilityScoresByIds, openDatabase, searchFts, searchVec, } from "./db";
14
15
  import { getRenderer } from "./file-context";
15
16
  import { buildSearchText } from "./indexer";
16
17
  import { generateMetadataFlat, loadStashFile } from "./metadata";
@@ -19,32 +20,6 @@ import { buildEditHint, findSourceForPath, isEditable } from "./search-source";
19
20
  import { makeAssetRef } from "./stash-ref";
20
21
  import { walkStashFlat } from "./walker";
21
22
  import { warn } from "./warn";
22
- // ── Type renderer/action maps (re-exported so stash-search.ts can register) ──
23
- /** Map asset types to their primary renderer names. */
24
- export const TYPE_TO_RENDERER = {
25
- script: "script-source",
26
- skill: "skill-md",
27
- command: "command-md",
28
- agent: "agent-md",
29
- knowledge: "knowledge-md",
30
- memory: "memory-md",
31
- };
32
- export const ACTION_BUILDERS = {
33
- script: (ref) => `akm show ${ref} -> execute the run command`,
34
- skill: (ref) => `akm show ${ref} -> follow the instructions`,
35
- command: (ref) => `akm show ${ref} -> fill placeholders and dispatch`,
36
- agent: (ref) => `akm show ${ref} -> dispatch with full prompt`,
37
- knowledge: (ref) => `akm show ${ref} -> read reference material`,
38
- memory: (ref) => `akm show ${ref} -> recall context`,
39
- };
40
- // Wire asset-spec's deferred hooks so that registerAssetType() automatically
41
- // populates TYPE_TO_RENDERER and ACTION_BUILDERS when the optional spec fields
42
- // rendererName / actionBuilder are provided.
43
- _setAssetTypeHooks((type, rendererName) => {
44
- TYPE_TO_RENDERER[type] = rendererName;
45
- }, (type, builder) => {
46
- ACTION_BUILDERS[type] = builder;
47
- });
48
23
  export async function rendererForType(type) {
49
24
  const name = TYPE_TO_RENDERER[type];
50
25
  return name ? getRenderer(name) : undefined;
@@ -121,74 +96,133 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
121
96
  })));
122
97
  return { hits };
123
98
  }
124
- // Score using FTS5 (BM25) and optionally sqlite-vec
99
+ // Start the async embedding request without awaiting, then run FTS
100
+ // synchronously while the HTTP/local embedding request is in-flight.
101
+ const typeFilter = searchType === "any" ? undefined : searchType;
125
102
  const tEmbed0 = Date.now();
126
- const embeddingScores = await tryVecScores(db, query, limit * 3, config);
103
+ const embeddingPromise = tryVecScores(db, query, limit * 3, config);
104
+ const ftsResults = searchFts(db, query, limit * 3, typeFilter);
105
+ const embeddingScores = await embeddingPromise;
127
106
  const embedMs = Date.now() - tEmbed0;
128
107
  const tRank0 = Date.now();
129
- const typeFilter = searchType === "any" ? undefined : searchType;
130
- const ftsResults = searchFts(db, query, limit * 3, typeFilter);
131
- // Reciprocal Rank Fusion (RRF) constant
132
- const RRF_K = 60;
133
- // Build FTS rank map: rank 1 = best BM25, rank 2 = second best, etc.
134
- const ftsRankMap = new Map();
135
- for (let i = 0; i < ftsResults.length; i++) {
136
- const r = ftsResults[i];
137
- ftsRankMap.set(r.id, { rank: i + 1, result: r });
108
+ // ── Score normalization ──────────────────────────────────────────────
109
+ // Normalized BM25 + cosine similarity with weighted addition
110
+ // (FTS 0.7, vector 0.3) for well-differentiated combined scores.
111
+ // Normalize FTS BM25 scores to 0-1 range
112
+ const ftsScoreMap = new Map();
113
+ if (ftsResults.length > 0) {
114
+ // BM25 scores are negative; most negative = best match
115
+ const bestBm25 = ftsResults[0].bm25Score; // most negative (best)
116
+ const worstBm25 = ftsResults[ftsResults.length - 1].bm25Score; // least negative (worst)
117
+ const range = bestBm25 - worstBm25; // negative range
118
+ for (const r of ftsResults) {
119
+ // Normalize: best match = 1.0, worst match approaches 0
120
+ // When range is 0 (all same score), all get 1.0
121
+ const normalized = range !== 0 ? (r.bm25Score - worstBm25) / range : 1.0;
122
+ // Scale to 0.3-1.0 range so even the worst FTS hit has a meaningful base score
123
+ const ftsScore = 0.3 + normalized * 0.7;
124
+ ftsScoreMap.set(r.id, { score: ftsScore, result: r });
125
+ }
138
126
  }
139
- // Build embedding rank map: sort by cosine similarity descending
140
- const embedRankMap = new Map();
127
+ // Build embedding score map (cosine similarities already 0-1)
128
+ const embedScoreMap = new Map();
141
129
  if (embeddingScores) {
142
- const sortedEmbeddings = [...embeddingScores.entries()].sort((a, b) => b[1] - a[1]);
143
- for (let i = 0; i < sortedEmbeddings.length; i++) {
144
- embedRankMap.set(sortedEmbeddings[i][0], i + 1);
130
+ for (const [id, cosine] of embeddingScores) {
131
+ embedScoreMap.set(id, cosine);
145
132
  }
146
133
  }
147
- // Merge results using RRF
148
- // Issue #15: "hybrid" for results appearing in both FTS and vec results.
134
+ // ── Combine FTS + vector scores ──────────────────────────────────────
135
+ const FTS_WEIGHT = 0.7;
136
+ const VEC_WEIGHT = 0.3;
137
+ const MAX_BOOST_SUM = 3.0;
149
138
  const scored = [];
150
139
  const seenIds = new Set();
151
140
  // Process FTS results
152
- for (const [id, { rank, result }] of ftsRankMap) {
141
+ for (const [id, { score: ftsScore, result }] of ftsScoreMap) {
153
142
  seenIds.add(id);
154
- const ftsRrf = 1 / (RRF_K + rank);
155
- const embedRank = embedRankMap.get(id);
156
- const embedRrf = embedRank !== undefined ? 1 / (RRF_K + embedRank) : 0;
157
- const rrfScore = ftsRrf + embedRrf;
158
- // Issue #15: combined FTS+vec results are "hybrid", not "semantic"
159
- const rankingMode = embedRrf > 0 ? "hybrid" : "fts";
160
- scored.push({ id, entry: result.entry, filePath: result.filePath, score: rrfScore, rankingMode });
143
+ const embedScore = embedScoreMap.get(id);
144
+ let combinedScore;
145
+ let rankingMode;
146
+ if (embedScore !== undefined) {
147
+ combinedScore = ftsScore * FTS_WEIGHT + embedScore * VEC_WEIGHT;
148
+ rankingMode = "hybrid";
149
+ }
150
+ else {
151
+ combinedScore = ftsScore;
152
+ rankingMode = "fts";
153
+ }
154
+ scored.push({ id, entry: result.entry, filePath: result.filePath, score: combinedScore, rankingMode });
161
155
  }
162
156
  // Add vec-only results not already in FTS results
163
157
  if (embeddingScores) {
164
- for (const [id] of embeddingScores) {
158
+ for (const [id, cosine] of embeddingScores) {
165
159
  if (seenIds.has(id))
166
160
  continue;
167
- const embedRank = embedRankMap.get(id);
168
- if (embedRank === undefined)
169
- continue;
170
161
  const found = getEntryById(db, id);
171
162
  if (found) {
172
163
  if (typeFilter && found.entry.type !== typeFilter)
173
164
  continue;
174
- const rrfScore = 1 / (RRF_K + embedRank);
175
165
  scored.push({
176
166
  id,
177
167
  entry: found.entry,
178
168
  filePath: found.filePath,
179
- score: rrfScore,
169
+ score: cosine * VEC_WEIGHT, // Only vector score, no FTS
180
170
  rankingMode: "semantic",
181
171
  });
182
172
  }
183
173
  }
184
174
  }
175
+ // ── Scoring Phase ──────────────────────────────────────────────────────
185
176
  // Apply boosts as multiplicative factors (all boosts in a single phase
186
- // so that sort order and displayed scores are always consistent — Issue #1).
177
+ // so that sort order and displayed scores are always consistent).
178
+ //
179
+ // Ranking philosophy: the goal is to surface the MOST USEFUL result for the
180
+ // user's intent. An exact name match is the strongest signal. Actionable
181
+ // asset types (skills, commands, agents) are more useful than passive
182
+ // reference docs. Curated metadata is more reliable than auto-generated.
187
183
  const queryTokens = query.toLowerCase().split(/\s+/).filter(Boolean);
184
+ const queryLower = query.toLowerCase().trim();
188
185
  for (const item of scored) {
189
186
  const entry = item.entry;
190
187
  let boostSum = 0;
191
- // Tag boost capped at 0.30 (Issue #7)
188
+ // ── 1. Exact / near-exact name match (strongest signal) ──
189
+ // If the query IS the asset name (or very close), this is almost certainly
190
+ // what the user wants. This is the single most important ranking signal.
191
+ const nameLower = entry.name.toLowerCase();
192
+ const nameBase = nameLower.split("/").pop() ?? nameLower; // last segment for path-based names
193
+ if (nameBase === queryLower || nameLower === queryLower) {
194
+ // Exact match: massive boost
195
+ boostSum += 2.0;
196
+ }
197
+ else if (nameBase.includes(queryLower) || queryLower.includes(nameBase)) {
198
+ // Near-exact: query is substring of name or vice versa
199
+ boostSum += 1.0;
200
+ }
201
+ else {
202
+ // Token overlap: how many query tokens appear in the base name?
203
+ const nameTokens = nameBase.split(/[-_\s]+/).filter(Boolean);
204
+ const matchCount = queryTokens.filter((qt) => nameTokens.some((nt) => nt === qt || nt.includes(qt))).length;
205
+ if (matchCount > 0) {
206
+ // Proportional to how many query tokens match (0.3 per token, max 0.9)
207
+ boostSum += Math.min(0.9, matchCount * 0.3);
208
+ }
209
+ }
210
+ // ── 2. Type relevance boost ──
211
+ // Actionable assets (skills, commands, agents) are generally more useful
212
+ // than passive reference material when the user is searching for something
213
+ // to use. Knowledge docs are reference — valuable but secondary.
214
+ const TYPE_BOOST = {
215
+ skill: 0.4,
216
+ command: 0.35,
217
+ agent: 0.3,
218
+ script: 0.2,
219
+ memory: 0.1,
220
+ knowledge: 0,
221
+ };
222
+ boostSum += TYPE_BOOST[entry.type] ?? 0;
223
+ // ── 3. Tag exact match ──
224
+ // Exact tag equality is a strong signal — the author explicitly tagged
225
+ // this asset with the user's search term.
192
226
  if (entry.tags) {
193
227
  let tagBoost = 0;
194
228
  for (const tag of entry.tags) {
@@ -198,7 +232,8 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
198
232
  }
199
233
  boostSum += Math.min(0.3, tagBoost);
200
234
  }
201
- // Search hint boost — capped at 0.24 (Issue #7)
235
+ // ── 4. Search hint match ──
236
+ // Hints are author-curated retrieval cues (e.g. "use when deploying to k8s").
202
237
  if (entry.searchHints) {
203
238
  let hintBoost = 0;
204
239
  for (const hint of entry.searchHints) {
@@ -212,20 +247,69 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
212
247
  }
213
248
  boostSum += Math.min(0.24, hintBoost);
214
249
  }
215
- // Name boost
216
- const nameLower = entry.name.toLowerCase().replace(/[-_]/g, " ");
217
- if (queryTokens.some((t) => nameLower.includes(t))) {
218
- boostSum += 0.1;
250
+ // ── 5. Alias match ──
251
+ // Aliases are alternate names the author defined for discovery.
252
+ if (entry.aliases) {
253
+ for (const alias of entry.aliases) {
254
+ const aliasLower = alias.toLowerCase();
255
+ if (aliasLower === queryLower) {
256
+ boostSum += 1.5; // Nearly as strong as exact name match
257
+ break;
258
+ }
259
+ if (queryTokens.some((t) => aliasLower.includes(t))) {
260
+ boostSum += 0.3;
261
+ }
262
+ }
219
263
  }
220
- // Quality boost (Issue #1: moved from buildDbHit to single-phase)
264
+ // ── 6. Description relevance ──
265
+ // All query tokens appearing in description suggests strong relevance.
266
+ if (entry.description) {
267
+ const descLower = entry.description.toLowerCase();
268
+ const descMatchCount = queryTokens.filter((t) => descLower.includes(t)).length;
269
+ if (descMatchCount === queryTokens.length && queryTokens.length > 1) {
270
+ // All query tokens found in description — high relevance
271
+ boostSum += 0.25;
272
+ }
273
+ else if (descMatchCount > 0) {
274
+ boostSum += 0.1;
275
+ }
276
+ }
277
+ // ── 7. Metadata quality signals ──
221
278
  const qualityBoost = entry.quality === "generated" ? 0 : 0.05;
222
279
  boostSum += qualityBoost;
223
- // Confidence boost (Issue #1: moved from buildDbHit to single-phase)
224
280
  const confidenceBoost = typeof entry.confidence === "number" ? Math.min(0.05, Math.max(0, entry.confidence) * 0.05) : 0;
225
281
  boostSum += confidenceBoost;
226
- item.score = item.score * (1 + boostSum);
282
+ const cappedBoost = Math.min(boostSum, MAX_BOOST_SUM);
283
+ item.score = item.score * (1 + cappedBoost);
284
+ }
285
+ // Utility-based re-ranking (MemRL pattern).
286
+ // After the FTS+boost scoring pass, apply a multiplicative
287
+ // utility factor based on aggregated usage telemetry.
288
+ // Batch-load all utility scores in one query to avoid N+1.
289
+ const UTILITY_WEIGHT = 0.5;
290
+ const UTILITY_MAX_BOOST = 1.5; // Cap at 1.5x multiplier
291
+ const RECENCY_DECAY_DAYS = 30;
292
+ const utilScoresMap = getUtilityScoresByIds(db, scored.map((s) => s.id));
293
+ for (const item of scored) {
294
+ const utilScore = utilScoresMap.get(item.id);
295
+ if (utilScore && utilScore.utility > 0) {
296
+ // Compute recency factor: exponential decay based on days since last use
297
+ let recencyFactor = 1;
298
+ if (utilScore.lastUsedAt) {
299
+ const lastUsedMs = new Date(utilScore.lastUsedAt).getTime();
300
+ const daysSinceLastUse = Number.isNaN(lastUsedMs)
301
+ ? Infinity
302
+ : Math.max(0, (Date.now() - lastUsedMs) / (1000 * 60 * 60 * 24));
303
+ recencyFactor = Math.exp(-daysSinceLastUse / RECENCY_DECAY_DAYS);
304
+ }
305
+ // Compute raw utility boost and cap it
306
+ const rawBoost = 1 + utilScore.utility * recencyFactor * UTILITY_WEIGHT;
307
+ const cappedBoost = Math.min(rawBoost, UTILITY_MAX_BOOST);
308
+ item.score = item.score * cappedBoost;
309
+ item.utilityBoosted = true;
310
+ }
227
311
  }
228
- // Issue #14: deterministic tiebreaker on equal scores
312
+ // Deterministic tiebreaker on equal scores
229
313
  scored.sort((a, b) => b.score - a.score || a.entry.name.localeCompare(b.entry.name));
230
314
  // Deduplicate by file path — keep only the highest-scored entry per file.
231
315
  // Multiple .stash.json entries can map to the same file (e.g. entries without
@@ -234,10 +318,10 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
234
318
  const deduped = deduplicateByPath(scored);
235
319
  const rankMs = Date.now() - tRank0;
236
320
  const selected = deduped.slice(0, limit);
237
- const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode }) => buildDbHit({
321
+ const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode, utilityBoosted }) => buildDbHit({
238
322
  entry,
239
323
  path: filePath,
240
- // Issue #8: round to 4 decimal places instead of 2
324
+ // Round to 4 decimal places
241
325
  score: Math.round(score * 10000) / 10000,
242
326
  query,
243
327
  rankingMode,
@@ -245,6 +329,7 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
245
329
  allStashDirs,
246
330
  sources,
247
331
  config,
332
+ utilityBoosted,
248
333
  })));
249
334
  return { embedMs, rankMs, hits };
250
335
  }
@@ -262,7 +347,7 @@ async function tryVecScores(db, query, k, config) {
262
347
  const scores = new Map();
263
348
  for (const { id, distance } of vecResults) {
264
349
  // Convert L2 distance to cosine similarity (vectors are normalized).
265
- // Issue #3: guard against NaN/Infinity from sqlite-vec edge cases.
350
+ // Guard against NaN/Infinity from sqlite-vec edge cases.
266
351
  const raw = 1 - (distance * distance) / 2;
267
352
  scores.set(id, Number.isFinite(raw) ? Math.max(0, raw) : 0);
268
353
  }
@@ -280,16 +365,14 @@ async function substringSearch(query, searchType, limit, stashDir, sources, conf
280
365
  if (!query) {
281
366
  const sorted = matched.sort(compareAssets);
282
367
  const unique = deduplicateAssetsByPath(sorted);
283
- return Promise.all(unique.slice(0, limit).map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
368
+ return Promise.all(unique.slice(0, limit).map((asset) => assetToSearchHit(asset, stashDir, sources, config)));
284
369
  }
285
370
  // Score and sort by relevance
286
371
  const scored = matched.map((asset) => ({ asset, score: scoreSubstringMatch(asset.entry, query) }));
287
372
  scored.sort((a, b) => b.score - a.score || compareAssets(a.asset, b.asset));
288
373
  // Deduplicate by path — keep highest-scored entry per file
289
374
  const dedupedScored = deduplicateByPath(scored.map((s) => ({ ...s, filePath: s.asset.path })));
290
- return Promise.all(dedupedScored
291
- .slice(0, limit)
292
- .map(({ asset, score }) => assetToSearchHit(asset, query, stashDir, sources, config, score)));
375
+ return Promise.all(dedupedScored.slice(0, limit).map(({ asset, score }) => assetToSearchHit(asset, stashDir, sources, config, score)));
293
376
  }
294
377
  function scoreSubstringMatch(entry, query) {
295
378
  const tokens = query.split(/\s+/).filter(Boolean);
@@ -322,17 +405,18 @@ export async function buildDbHit(input) {
322
405
  const entryStashDir = findSourceForPath(input.path, input.sources)?.path ?? input.defaultStashDir;
323
406
  const canonical = deriveCanonicalAssetNameFromStashRoot(input.entry.type, entryStashDir, input.path);
324
407
  const refName = canonical && !canonical.startsWith("../") && !canonical.startsWith("..\\") ? canonical : input.entry.name;
325
- // Issue #1: Quality and confidence boosts are now applied in the main scoring
408
+ // Quality and confidence boosts are now applied in the main scoring
326
409
  // phase (searchDatabase). buildDbHit receives the already-final score and
327
410
  // passes it through without further multiplication. We still compute the
328
411
  // boost values here for buildWhyMatched reporting.
329
412
  const qualityBoost = input.entry.quality === "generated" ? 0 : 0.05;
330
413
  const confidenceBoost = typeof input.entry.confidence === "number" ? Math.min(0.05, Math.max(0, input.entry.confidence) * 0.05) : 0;
331
- // Issue #8: round to 4 decimal places, no boost multiplication
414
+ // Round to 4 decimal places, no boost multiplication
332
415
  const score = Math.round(input.score * 10000) / 10000;
333
- const whyMatched = buildWhyMatched(input.entry, input.query, input.rankingMode, qualityBoost, confidenceBoost);
416
+ const whyMatched = buildWhyMatched(input.entry, input.query, input.rankingMode, qualityBoost, confidenceBoost, input.utilityBoosted);
334
417
  const source = findSourceForPath(input.path, input.sources);
335
418
  const editable = isEditable(input.path, input.config);
419
+ const estimatedTokens = typeof input.entry.fileSize === "number" ? Math.round(input.entry.fileSize / 4) : undefined;
336
420
  const hit = {
337
421
  type: input.entry.type,
338
422
  name: input.entry.name,
@@ -347,6 +431,7 @@ export async function buildDbHit(input) {
347
431
  action: buildLocalAction(input.entry.type, makeAssetRef(input.entry.type, refName, source?.registryId)),
348
432
  score,
349
433
  whyMatched,
434
+ ...(estimatedTokens !== undefined ? { estimatedTokens } : {}),
350
435
  };
351
436
  const renderer = await rendererForType(input.entry.type);
352
437
  if (renderer?.enrichSearchHit) {
@@ -355,9 +440,8 @@ export async function buildDbHit(input) {
355
440
  return hit;
356
441
  }
357
442
  export function buildWhyMatched(entry, query,
358
- // Issue #15: added "hybrid" ranking mode
359
- rankingMode, qualityBoost, confidenceBoost) {
360
- // Issue #15: "hybrid" label for combined FTS+vec results
443
+ // "hybrid" ranking mode
444
+ rankingMode, qualityBoost, confidenceBoost, utilityBoosted) {
361
445
  const reasons = [
362
446
  rankingMode === "hybrid"
363
447
  ? "hybrid (fts + semantic)"
@@ -366,35 +450,50 @@ rankingMode, qualityBoost, confidenceBoost) {
366
450
  : "fts bm25 relevance",
367
451
  ];
368
452
  const tokens = query.toLowerCase().split(/\s+/).filter(Boolean);
453
+ const queryLower = query.toLowerCase().trim();
369
454
  const name = entry.name.toLowerCase();
455
+ const nameBase = name.split("/").pop() ?? name;
370
456
  const tags = entry.tags?.join(" ").toLowerCase() ?? "";
371
457
  const searchHints = entry.searchHints?.join(" ").toLowerCase() ?? "";
372
458
  const aliases = entry.aliases?.join(" ").toLowerCase() ?? "";
373
- // Issue #12: include description in match reasons
374
459
  const desc = entry.description?.toLowerCase() ?? "";
375
- if (tokens.some((t) => name.includes(t)))
460
+ // Name match quality
461
+ if (nameBase === queryLower || name === queryLower) {
462
+ reasons.push("exact name match");
463
+ }
464
+ else if (nameBase.includes(queryLower) || queryLower.includes(nameBase)) {
465
+ reasons.push("near-exact name match");
466
+ }
467
+ else if (tokens.some((t) => nameBase.includes(t))) {
376
468
  reasons.push("matched name tokens");
469
+ }
470
+ // Type relevance
471
+ if (entry.type === "skill" || entry.type === "command" || entry.type === "agent") {
472
+ reasons.push(`${entry.type} type boost`);
473
+ }
377
474
  if (tokens.some((t) => tags.includes(t)))
378
475
  reasons.push("matched tags");
379
476
  if (tokens.some((t) => searchHints.includes(t)))
380
477
  reasons.push("matched searchHints");
381
478
  if (tokens.some((t) => aliases.includes(t)))
382
479
  reasons.push("matched aliases");
383
- // Issue #12: report description matches
384
480
  if (tokens.some((t) => desc.includes(t)))
385
481
  reasons.push("matched description");
386
482
  if (qualityBoost > 0)
387
483
  reasons.push("curated metadata boost");
388
484
  if (confidenceBoost > 0)
389
485
  reasons.push("metadata confidence boost");
486
+ if (utilityBoosted)
487
+ reasons.push("usage history boost");
390
488
  return reasons;
391
489
  }
392
- async function assetToSearchHit(asset, _query, stashDir, sources, config, score) {
490
+ async function assetToSearchHit(asset, stashDir, sources, config, score) {
393
491
  const source = findSourceForPath(asset.path, sources);
394
492
  const editable = isEditable(asset.path, config);
395
493
  const ref = makeAssetRef(asset.entry.type, asset.entry.name, source?.registryId);
396
494
  const fileSize = readFileSize(asset.path);
397
495
  const size = deriveSize(fileSize);
496
+ const estimatedTokens = typeof fileSize === "number" ? Math.round(fileSize / 4) : undefined;
398
497
  const hit = {
399
498
  type: asset.entry.type,
400
499
  name: asset.entry.name,
@@ -410,6 +509,7 @@ async function assetToSearchHit(asset, _query, stashDir, sources, config, score)
410
509
  ...(size ? { size } : {}),
411
510
  action: buildLocalAction(asset.entry.type, ref),
412
511
  ...(score !== undefined ? { score } : {}),
512
+ ...(estimatedTokens !== undefined ? { estimatedTokens } : {}),
413
513
  };
414
514
  const renderer = await rendererForType(asset.entry.type);
415
515
  if (renderer?.enrichSearchHit) {
@@ -499,10 +599,10 @@ function compareAssets(a, b) {
499
599
  /**
500
600
  * Deduplicate scored results by file path, keeping only the highest-scored
501
601
  * entry per unique path. Sorts by score descending internally to ensure the
502
- * precondition is always met regardless of caller (Issue #4).
602
+ * precondition is always met regardless of caller.
503
603
  */
504
604
  function deduplicateByPath(items) {
505
- // Issue #4: sort inside to enforce the descending-score precondition
605
+ // Sort inside to enforce the descending-score precondition
506
606
  const sorted = [...items].sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
507
607
  const seen = new Set();
508
608
  return sorted.filter((item) => {
@@ -0,0 +1,172 @@
1
+ /**
2
+ * Manifest: compact asset listing for cheap capability discovery.
3
+ *
4
+ * Returns a lightweight list of all assets (name, type, ref, one-line
5
+ * description) that stays under 500 tokens for 50 assets. This gives
6
+ * agents a fast way to discover available capabilities without loading
7
+ * full content or running a search query.
8
+ */
9
+ import fs from "node:fs";
10
+ import path from "node:path";
11
+ import { deriveCanonicalAssetNameFromStashRoot } from "./asset-spec";
12
+ import { resolveStashDir } from "./common";
13
+ import { loadConfig } from "./config";
14
+ import { closeDatabase, getAllEntries, getEntryCount, getMeta, openDatabase } from "./db";
15
+ import { generateMetadataFlat, loadStashFile } from "./metadata";
16
+ import { getDbPath } from "./paths";
17
+ import { resolveStashSources } from "./search-source";
18
+ import { makeAssetRef } from "./stash-ref";
19
+ import { walkStashFlat } from "./walker";
20
+ import { warn } from "./warn";
21
+ const MAX_DESCRIPTION_LENGTH = 80;
22
+ /**
23
+ * Truncate a description string to a maximum length, appending "..." if truncated.
24
+ */
25
+ function truncateDescription(desc) {
26
+ if (!desc)
27
+ return undefined;
28
+ if (desc.length <= MAX_DESCRIPTION_LENGTH)
29
+ return desc;
30
+ return `${desc.slice(0, MAX_DESCRIPTION_LENGTH)}...`;
31
+ }
32
+ /**
33
+ * Build a compact ManifestEntry from a StashEntry.
34
+ * Returns null if the entry cannot be converted (e.g. malformed name).
35
+ */
36
+ function toManifestEntry(entry, filePath, stashDir, registryId) {
37
+ try {
38
+ const canonical = deriveCanonicalAssetNameFromStashRoot(entry.type, stashDir, filePath);
39
+ const refName = canonical && !canonical.startsWith("../") && !canonical.startsWith("..\\") ? canonical : entry.name;
40
+ const ref = makeAssetRef(entry.type, refName, registryId);
41
+ const result = {
42
+ name: entry.name,
43
+ type: entry.type,
44
+ ref,
45
+ };
46
+ const desc = truncateDescription(entry.description);
47
+ if (desc) {
48
+ result.description = desc;
49
+ }
50
+ return result;
51
+ }
52
+ catch (error) {
53
+ warn(`Manifest: skipping entry "${entry.name}" (${entry.type}):`, error instanceof Error ? error.message : String(error));
54
+ return null;
55
+ }
56
+ }
57
+ /**
58
+ * Get the manifest from the database (fast path).
59
+ */
60
+ function getManifestFromDb(stashDir, config, sources, type) {
61
+ const dbPath = getDbPath();
62
+ try {
63
+ if (!fs.existsSync(dbPath))
64
+ return null;
65
+ const embeddingDim = config.embedding?.dimension;
66
+ const db = openDatabase(dbPath, embeddingDim ? { embeddingDim } : undefined);
67
+ try {
68
+ const entryCount = getEntryCount(db);
69
+ const storedStashDir = getMeta(db, "stashDir");
70
+ if (entryCount === 0 || storedStashDir !== stashDir)
71
+ return null;
72
+ const typeFilter = type && type !== "any" ? type : undefined;
73
+ const allEntries = getAllEntries(db, typeFilter);
74
+ // Deduplicate by file path
75
+ const seenFilePaths = new Set();
76
+ const entries = [];
77
+ for (const ie of allEntries) {
78
+ if (seenFilePaths.has(ie.filePath))
79
+ continue;
80
+ seenFilePaths.add(ie.filePath);
81
+ // Find origin for this entry
82
+ const source = sources.find((s) => ie.filePath.startsWith(path.resolve(s.path) + path.sep));
83
+ const entry = toManifestEntry(ie.entry, ie.filePath, ie.stashDir, source?.registryId);
84
+ if (entry)
85
+ entries.push(entry);
86
+ }
87
+ return entries;
88
+ }
89
+ finally {
90
+ closeDatabase(db);
91
+ }
92
+ }
93
+ catch (error) {
94
+ warn("Manifest: index unavailable, falling back to walker:", error instanceof Error ? error.message : String(error));
95
+ return null;
96
+ }
97
+ }
98
+ /**
99
+ * Get the manifest by walking the stash directory (fallback when no index).
100
+ */
101
+ async function getManifestFromWalker(sources, type) {
102
+ const allStashDirs = sources.map((s) => s.path);
103
+ const entries = [];
104
+ for (const currentStashDir of allStashDirs) {
105
+ const fileContexts = walkStashFlat(currentStashDir);
106
+ // Group by parent directory
107
+ const dirGroups = new Map();
108
+ for (const ctx of fileContexts) {
109
+ const group = dirGroups.get(ctx.parentDirAbs);
110
+ if (group)
111
+ group.push(ctx.absPath);
112
+ else
113
+ dirGroups.set(ctx.parentDirAbs, [ctx.absPath]);
114
+ }
115
+ for (const [dirPath, files] of dirGroups) {
116
+ // Try loading existing .stash.json first
117
+ let stash = loadStashFile(dirPath);
118
+ if (stash) {
119
+ const coveredFiles = new Set(stash.entries.map((e) => e.filename).filter((e) => !!e));
120
+ const uncoveredFiles = files.filter((f) => !coveredFiles.has(path.basename(f)));
121
+ if (uncoveredFiles.length > 0) {
122
+ const generated = await generateMetadataFlat(currentStashDir, uncoveredFiles);
123
+ if (generated.entries.length > 0) {
124
+ stash = { entries: [...stash.entries, ...generated.entries] };
125
+ }
126
+ }
127
+ }
128
+ else {
129
+ const generated = await generateMetadataFlat(currentStashDir, files);
130
+ if (generated.entries.length === 0)
131
+ continue;
132
+ stash = generated;
133
+ }
134
+ const source = sources.find((s) => dirPath.startsWith(path.resolve(s.path) + path.sep));
135
+ for (const stashEntry of stash.entries) {
136
+ if (type && type !== "any" && stashEntry.type !== type)
137
+ continue;
138
+ const entryPath = stashEntry.filename ? path.join(dirPath, stashEntry.filename) : files[0] || dirPath;
139
+ const manifestEntry = toManifestEntry(stashEntry, entryPath, currentStashDir, source?.registryId);
140
+ if (manifestEntry)
141
+ entries.push(manifestEntry);
142
+ }
143
+ }
144
+ }
145
+ return entries;
146
+ }
147
+ /**
148
+ * Generate a compact manifest of all assets in the stash.
149
+ *
150
+ * Tries the database first (fast path). Falls back to walker-based listing
151
+ * if no index is available.
152
+ */
153
+ export async function akmManifest(options) {
154
+ const stashDir = options?.stashDir ?? resolveStashDir();
155
+ const type = options?.type;
156
+ const config = loadConfig();
157
+ const sources = resolveStashSources(stashDir, config);
158
+ // Fast path: try database
159
+ const dbEntries = getManifestFromDb(stashDir, config, sources, type);
160
+ if (dbEntries !== null) {
161
+ return {
162
+ schemaVersion: 1,
163
+ entries: dbEntries,
164
+ };
165
+ }
166
+ // Fallback: walk filesystem
167
+ const walkerEntries = await getManifestFromWalker(sources, type);
168
+ return {
169
+ schemaVersion: 1,
170
+ entries: walkerEntries,
171
+ };
172
+ }