akm-cli 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -41,6 +41,8 @@ const pkgVersion = (() => {
41
41
  const OUTPUT_FORMATS = ["json", "yaml", "text"];
42
42
  const DETAIL_LEVELS = ["brief", "normal", "full"];
43
43
  const NORMAL_DESCRIPTION_LIMIT = 250;
44
+ const CONTEXT_HUB_ALIAS_REF = "context-hub";
45
+ const CONTEXT_HUB_ALIAS_URL = "https://github.com/andrewyng/context-hub";
44
46
  function hasBunYAML(b) {
45
47
  // biome-ignore lint/suspicious/noExplicitAny: type guard for runtime feature detection
46
48
  return typeof b.YAML?.stringify === "function";
@@ -468,6 +470,15 @@ const addCommand = defineCommand({
468
470
  },
469
471
  async run({ args }) {
470
472
  await runWithJsonErrors(async () => {
473
+ if (args.ref.trim() === CONTEXT_HUB_ALIAS_REF) {
474
+ const result = addStash({
475
+ target: CONTEXT_HUB_ALIAS_URL,
476
+ providerType: "context-hub",
477
+ name: "context-hub",
478
+ });
479
+ output("stash-add", result);
480
+ return;
481
+ }
471
482
  const result = await akmAdd({ ref: args.ref });
472
483
  output("add", result);
473
484
  });
@@ -1143,6 +1154,7 @@ akm search "<query>" --type skill # Filter by type
1143
1154
  akm search "<query>" --source both # Also search registries for installable kits
1144
1155
  akm show <ref> # View asset details
1145
1156
  akm add <ref> # Install a kit (npm, GitHub, git, local)
1157
+ akm add context-hub # Shortcut for adding Context Hub as a stash provider
1146
1158
  akm clone <ref> # Copy an asset to the working stash (optional --dest arg to clone to specific location)
1147
1159
  akm registry search "<query>" # Search all registries
1148
1160
  \`\`\`
@@ -1213,6 +1225,7 @@ akm add <ref> # Install a kit (smart router: loc
1213
1225
  akm add @scope/kit # From npm
1214
1226
  akm add owner/repo # From GitHub
1215
1227
  akm add ./path/to/local/kit # From local directory (adds as stash)
1228
+ akm add context-hub # Add the official Context Hub stash
1216
1229
  akm kit add <ref> # Install a kit (explicit)
1217
1230
  akm kit list # List installed kits
1218
1231
  akm kit remove <target> # Remove a kit
package/dist/db.js CHANGED
@@ -385,17 +385,22 @@ export function searchFts(db, query, limit, entryType) {
385
385
  return [];
386
386
  }
387
387
  }
388
- function sanitizeFtsQuery(query) {
389
- const tokens = query
390
- .replace(/[^a-zA-Z0-9\s]/g, " ")
391
- .split(/\s+/)
392
- .filter((t) => t.length >= 2);
388
+ export function sanitizeFtsQuery(query) {
389
+ // Allow only characters safe in FTS5 queries: letters, digits, underscores,
390
+ // and whitespace. Everything else (hyphens, dots, quotes, parens, asterisks,
391
+ // colons, carets, @, !, etc.) is replaced with a space so that compound
392
+ // identifiers like "code-review" or "k8s.setup" become AND-joined tokens
393
+ // ("code review", "k8s setup") rather than triggering FTS5 syntax errors.
394
+ let sanitized = query.replace(/[^a-zA-Z0-9_\s]/g, " ");
395
+ // Neutralize the NEAR operator (FTS5 proximity syntax)
396
+ sanitized = sanitized.replace(/\bNEAR\b/g, " ");
397
+ const tokens = sanitized.split(/\s+/).filter((t) => t.length >= 1);
393
398
  if (tokens.length === 0)
394
399
  return "";
395
- // MD-1: Use OR so that any matching token returns results (better recall for
396
- // exploratory search). Use unquoted tokens so the porter stemmer can
397
- // normalize word forms.
398
- return tokens.join(" OR ");
400
+ // Use implicit AND (space-separated tokens) for precision. FTS5 treats
401
+ // space-separated tokens as an implicit AND, matching only rows that
402
+ // contain ALL terms.
403
+ return tokens.join(" ");
399
404
  }
400
405
  // ── All entries ─────────────────────────────────────────────────────────────
401
406
  export function getAllEntries(db, entryType) {
package/dist/embedder.js CHANGED
@@ -30,6 +30,19 @@ async function embedLocal(text) {
30
30
  const result = await model(text, { pooling: "mean", normalize: true });
31
31
  return Array.from(result.data);
32
32
  }
33
+ // ── Vector normalization ─────────────────────────────────────────────────────
34
+ /**
35
+ * L2-normalize a vector to unit length.
36
+ * Required for remote embeddings because the scoring pipeline's L2-to-cosine
37
+ * conversion formula (1 - distance^2/2) is only correct for unit vectors.
38
+ * The local embedder already normalizes via `normalize: true`.
39
+ */
40
+ function l2Normalize(vec) {
41
+ const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
42
+ if (norm === 0)
43
+ return vec;
44
+ return vec.map((v) => v / norm);
45
+ }
33
46
  // ── OpenAI-compatible remote embedder ───────────────────────────────────────
34
47
  async function embedRemote(text, config) {
35
48
  const headers = { "Content-Type": "application/json" };
@@ -56,7 +69,7 @@ async function embedRemote(text, config) {
56
69
  if (!json.data?.[0]?.embedding) {
57
70
  throw new Error("Unexpected embedding response format: missing data[0].embedding");
58
71
  }
59
- return json.data[0].embedding;
72
+ return l2Normalize(json.data[0].embedding);
60
73
  }
61
74
  // ── Public API ──────────────────────────────────────────────────────────────
62
75
  /**
@@ -118,11 +131,13 @@ async function embedRemoteBatch(texts, config) {
118
131
  if (!json.data || json.data.length !== batch.length) {
119
132
  throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}`);
120
133
  }
121
- for (const [idx, d] of json.data.entries()) {
134
+ // Sort by index to guarantee correct order (OpenAI API doesn't guarantee order)
135
+ const sorted = [...json.data].sort((a, b) => a.index - b.index);
136
+ for (const [idx, d] of sorted.entries()) {
122
137
  if (!Array.isArray(d.embedding)) {
123
138
  throw new Error(`Unexpected embedding at batch index ${idx}: missing or invalid`);
124
139
  }
125
- results.push(d.embedding);
140
+ results.push(l2Normalize(d.embedding));
126
141
  }
127
142
  }
128
143
  return results;
package/dist/indexer.js CHANGED
@@ -162,6 +162,13 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
162
162
  }
163
163
  }
164
164
  // Phase 2 (sync): write all pre-generated metadata inside a single transaction.
165
+ //
166
+ // Cross-stash dedup: track indexed assets by content identity
167
+ // (type + filename + description) so the same asset from a lower-priority
168
+ // stash root is skipped when a higher-priority root already covers it.
169
+ // Sources are ordered by priority (primary stash first), so the first
170
+ // occurrence wins.
171
+ const indexedAssetIdentities = new Set();
165
172
  const insertTransaction = db.transaction(() => {
166
173
  // HI-5: Perform the full-rebuild wipe as the FIRST step of the insert
167
174
  // transaction so delete and re-insert are atomic — a concurrent reader
@@ -190,8 +197,20 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
190
197
  // Delete old entries for this dir (will be re-inserted)
191
198
  deleteEntriesByDir(db, dirPath);
192
199
  if (stash) {
200
+ // Build a lookup for matching filename-less entries to actual files
201
+ const fileBasenameMap = buildFileBasenameMap(files);
193
202
  for (const entry of stash.entries) {
194
- const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
203
+ const entryPath = entry.filename
204
+ ? path.join(dirPath, entry.filename)
205
+ : matchEntryToFile(entry.name, fileBasenameMap, files);
206
+ if (!entryPath)
207
+ continue; // skip unresolvable entries
208
+ // Skip if a higher-priority stash root already indexed this asset
209
+ const basename = path.basename(entryPath);
210
+ const identityKey = `${entry.type}\0${basename}\0${entry.description ?? ""}`;
211
+ if (indexedAssetIdentities.has(identityKey))
212
+ continue;
213
+ indexedAssetIdentities.add(identityKey);
195
214
  const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
196
215
  const searchText = buildSearchText(entry);
197
216
  const entryWithSize = attachFileSize(entry, entryPath);
@@ -335,6 +354,43 @@ async function enhanceStashWithLlm(llmConfig, stash, _dirPath, files) {
335
354
  }
336
355
  return { entries: enhanced };
337
356
  }
357
+ /**
358
+ * Build a map from base filename (without extension) to full path for quick lookups.
359
+ */
360
+ export function buildFileBasenameMap(files) {
361
+ const map = new Map();
362
+ for (const file of files) {
363
+ const base = path.basename(file, path.extname(file));
364
+ // Only keep first match per base name to avoid ambiguity
365
+ if (!map.has(base))
366
+ map.set(base, file);
367
+ }
368
+ return map;
369
+ }
370
+ /**
371
+ * Try to match a filename-less entry to an actual file in the directory.
372
+ *
373
+ * Matching strategy (in priority order):
374
+ * 1. Exact basename match: entry.name === filename without extension
375
+ * 2. Last path segment match: for entries with names like "dir/sub-entry",
376
+ * try matching the last segment
377
+ * 3. Fallback: first file in the directory, or null if no files are available
378
+ */
379
+ export function matchEntryToFile(entryName, fileMap, files) {
380
+ // Exact match on entry name
381
+ const exact = fileMap.get(entryName);
382
+ if (exact)
383
+ return exact;
384
+ // Try last segment for hierarchical names (e.g. "corpus/agentic-patterns/foo")
385
+ const lastSegment = entryName.split("/").pop() ?? entryName;
386
+ if (lastSegment !== entryName) {
387
+ const segmentMatch = fileMap.get(lastSegment);
388
+ if (segmentMatch)
389
+ return segmentMatch;
390
+ }
391
+ // Fallback to first file, or null if no files are available
392
+ return files[0] || null;
393
+ }
338
394
  export function buildSearchText(entry) {
339
395
  const parts = [entry.name.replace(/[-_]/g, " ")];
340
396
  if (entry.description)
@@ -347,6 +403,8 @@ export function buildSearchText(entry) {
347
403
  parts.push(entry.aliases.join(" "));
348
404
  if (entry.searchHints)
349
405
  parts.push(entry.searchHints.join(" "));
406
+ if (entry.usage)
407
+ parts.push(entry.usage.join(" "));
350
408
  if (entry.intent) {
351
409
  if (entry.intent.when)
352
410
  parts.push(entry.intent.when);
@@ -99,7 +99,15 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
99
99
  if (!query) {
100
100
  const typeFilter = searchType === "any" ? undefined : searchType;
101
101
  const allEntries = getAllEntries(db, typeFilter);
102
- const selected = allEntries.slice(0, limit);
102
+ // Deduplicate by file path — multiple entries can share the same file
103
+ const seenFilePaths = new Set();
104
+ const uniqueEntries = allEntries.filter((ie) => {
105
+ if (seenFilePaths.has(ie.filePath))
106
+ return false;
107
+ seenFilePaths.add(ie.filePath);
108
+ return true;
109
+ });
110
+ const selected = uniqueEntries.slice(0, limit);
103
111
  const hits = await Promise.all(selected.map((ie) => buildDbHit({
104
112
  entry: ie.entry,
105
113
  path: ie.filePath,
@@ -137,6 +145,7 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
137
145
  }
138
146
  }
139
147
  // Merge results using RRF
148
+ // Issue #15: "hybrid" for results appearing in both FTS and vec results.
140
149
  const scored = [];
141
150
  const seenIds = new Set();
142
151
  // Process FTS results
@@ -146,7 +155,8 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
146
155
  const embedRank = embedRankMap.get(id);
147
156
  const embedRrf = embedRank !== undefined ? 1 / (RRF_K + embedRank) : 0;
148
157
  const rrfScore = ftsRrf + embedRrf;
149
- const rankingMode = embedRrf > 0 ? "semantic" : "fts";
158
+ // Issue #15: combined FTS+vec results are "hybrid", not "semantic"
159
+ const rankingMode = embedRrf > 0 ? "hybrid" : "fts";
150
160
  scored.push({ id, entry: result.entry, filePath: result.filePath, score: rrfScore, rankingMode });
151
161
  }
152
162
  // Add vec-only results not already in FTS results
@@ -172,45 +182,63 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
172
182
  }
173
183
  }
174
184
  }
175
- // Apply boosts as multiplicative factors
185
+ // Apply boosts as multiplicative factors (all boosts in a single phase
186
+ // so that sort order and displayed scores are always consistent — Issue #1).
176
187
  const queryTokens = query.toLowerCase().split(/\s+/).filter(Boolean);
177
188
  for (const item of scored) {
178
189
  const entry = item.entry;
179
190
  let boostSum = 0;
180
- // Tag boost
191
+ // Tag boost — capped at 0.30 (Issue #7)
181
192
  if (entry.tags) {
193
+ let tagBoost = 0;
182
194
  for (const tag of entry.tags) {
183
195
  if (queryTokens.some((t) => tag.toLowerCase() === t)) {
184
- boostSum += 0.15;
196
+ tagBoost += 0.15;
185
197
  }
186
198
  }
199
+ boostSum += Math.min(0.3, tagBoost);
187
200
  }
188
- // Search hint boost
201
+ // Search hint boost — capped at 0.24 (Issue #7)
189
202
  if (entry.searchHints) {
203
+ let hintBoost = 0;
190
204
  for (const hint of entry.searchHints) {
191
205
  const hintLower = hint.toLowerCase();
192
206
  for (const token of queryTokens) {
193
207
  if (hintLower.includes(token)) {
194
- boostSum += 0.12;
208
+ hintBoost += 0.12;
195
209
  break;
196
210
  }
197
211
  }
198
212
  }
213
+ boostSum += Math.min(0.24, hintBoost);
199
214
  }
200
215
  // Name boost
201
216
  const nameLower = entry.name.toLowerCase().replace(/[-_]/g, " ");
202
217
  if (queryTokens.some((t) => nameLower.includes(t))) {
203
218
  boostSum += 0.1;
204
219
  }
220
+ // Quality boost (Issue #1: moved from buildDbHit to single-phase)
221
+ const qualityBoost = entry.quality === "generated" ? 0 : 0.05;
222
+ boostSum += qualityBoost;
223
+ // Confidence boost (Issue #1: moved from buildDbHit to single-phase)
224
+ const confidenceBoost = typeof entry.confidence === "number" ? Math.min(0.05, Math.max(0, entry.confidence) * 0.05) : 0;
225
+ boostSum += confidenceBoost;
205
226
  item.score = item.score * (1 + boostSum);
206
227
  }
207
- scored.sort((a, b) => b.score - a.score);
228
+ // Issue #14: deterministic tiebreaker on equal scores
229
+ scored.sort((a, b) => b.score - a.score || a.entry.name.localeCompare(b.entry.name));
230
+ // Deduplicate by file path — keep only the highest-scored entry per file.
231
+ // Multiple .stash.json entries can map to the same file (e.g. entries without
232
+ // a filename field all collapse to files[0]). Showing the same path/ref
233
+ // multiple times clutters results.
234
+ const deduped = deduplicateByPath(scored);
208
235
  const rankMs = Date.now() - tRank0;
209
- const selected = scored.slice(0, limit);
236
+ const selected = deduped.slice(0, limit);
210
237
  const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode }) => buildDbHit({
211
238
  entry,
212
239
  path: filePath,
213
- score: Math.round(score * 100) / 100,
240
+ // Issue #8: round to 4 decimal places instead of 2
241
+ score: Math.round(score * 10000) / 10000,
214
242
  query,
215
243
  rankingMode,
216
244
  defaultStashDir: stashDir,
@@ -233,9 +261,10 @@ async function tryVecScores(db, query, k, config) {
233
261
  const vecResults = searchVec(db, queryEmbedding, k);
234
262
  const scores = new Map();
235
263
  for (const { id, distance } of vecResults) {
236
- // Convert L2 distance to cosine similarity (vectors are normalized)
237
- const cosineSim = 1 - (distance * distance) / 2;
238
- scores.set(id, Math.max(0, cosineSim));
264
+ // Convert L2 distance to cosine similarity (vectors are normalized).
265
+ // Issue #3: guard against NaN/Infinity from sqlite-vec edge cases.
266
+ const raw = 1 - (distance * distance) / 2;
267
+ scores.set(id, Number.isFinite(raw) ? Math.max(0, raw) : 0);
239
268
  }
240
269
  return scores;
241
270
  }
@@ -249,15 +278,18 @@ async function substringSearch(query, searchType, limit, stashDir, sources, conf
249
278
  const assets = await indexAssets(stashDir, searchType);
250
279
  const matched = assets.filter((asset) => !query || buildSearchText(asset.entry).includes(query));
251
280
  if (!query) {
252
- return Promise.all(matched
253
- .sort(compareAssets)
254
- .slice(0, limit)
255
- .map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
281
+ const sorted = matched.sort(compareAssets);
282
+ const unique = deduplicateAssetsByPath(sorted);
283
+ return Promise.all(unique.slice(0, limit).map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
256
284
  }
257
285
  // Score and sort by relevance
258
286
  const scored = matched.map((asset) => ({ asset, score: scoreSubstringMatch(asset.entry, query) }));
259
287
  scored.sort((a, b) => b.score - a.score || compareAssets(a.asset, b.asset));
260
- return Promise.all(scored.slice(0, limit).map(({ asset, score }) => assetToSearchHit(asset, query, stashDir, sources, config, score)));
288
+ // Deduplicate by path keep highest-scored entry per file
289
+ const dedupedScored = deduplicateByPath(scored.map((s) => ({ ...s, filePath: s.asset.path })));
290
+ return Promise.all(dedupedScored
291
+ .slice(0, limit)
292
+ .map(({ asset, score }) => assetToSearchHit(asset, query, stashDir, sources, config, score)));
261
293
  }
262
294
  function scoreSubstringMatch(entry, query) {
263
295
  const tokens = query.split(/\s+/).filter(Boolean);
@@ -282,16 +314,22 @@ function scoreSubstringMatch(entry, query) {
282
314
  if (tokens.some((t) => descLower.includes(t))) {
283
315
  score += 0.05;
284
316
  }
285
- return Math.round(Math.min(1, score) * 100) / 100;
317
+ // Issue #8: round to 4 decimal places instead of 2
318
+ return Math.round(Math.min(1, score) * 10000) / 10000;
286
319
  }
287
320
  // ── Hit building ────────────────────────────────────────────────────────────
288
321
  export async function buildDbHit(input) {
289
322
  const entryStashDir = findSourceForPath(input.path, input.sources)?.path ?? input.defaultStashDir;
290
323
  const canonical = deriveCanonicalAssetNameFromStashRoot(input.entry.type, entryStashDir, input.path);
291
324
  const refName = canonical && !canonical.startsWith("../") && !canonical.startsWith("..\\") ? canonical : input.entry.name;
325
+ // Issue #1: Quality and confidence boosts are now applied in the main scoring
326
+ // phase (searchDatabase). buildDbHit receives the already-final score and
327
+ // passes it through without further multiplication. We still compute the
328
+ // boost values here for buildWhyMatched reporting.
292
329
  const qualityBoost = input.entry.quality === "generated" ? 0 : 0.05;
293
330
  const confidenceBoost = typeof input.entry.confidence === "number" ? Math.min(0.05, Math.max(0, input.entry.confidence) * 0.05) : 0;
294
- const score = Math.round(input.score * (1 + qualityBoost + confidenceBoost) * 100) / 100;
331
+ // Issue #8: round to 4 decimal places, no boost multiplication
332
+ const score = Math.round(input.score * 10000) / 10000;
295
333
  const whyMatched = buildWhyMatched(input.entry, input.query, input.rankingMode, qualityBoost, confidenceBoost);
296
334
  const source = findSourceForPath(input.path, input.sources);
297
335
  const editable = isEditable(input.path, input.config);
@@ -316,13 +354,24 @@ export async function buildDbHit(input) {
316
354
  }
317
355
  return hit;
318
356
  }
319
- export function buildWhyMatched(entry, query, rankingMode, qualityBoost, confidenceBoost) {
320
- const reasons = [rankingMode === "semantic" ? "semantic similarity" : "fts bm25 relevance"];
357
+ export function buildWhyMatched(entry, query,
358
+ // Issue #15: added "hybrid" ranking mode
359
+ rankingMode, qualityBoost, confidenceBoost) {
360
+ // Issue #15: "hybrid" label for combined FTS+vec results
361
+ const reasons = [
362
+ rankingMode === "hybrid"
363
+ ? "hybrid (fts + semantic)"
364
+ : rankingMode === "semantic"
365
+ ? "semantic similarity"
366
+ : "fts bm25 relevance",
367
+ ];
321
368
  const tokens = query.toLowerCase().split(/\s+/).filter(Boolean);
322
369
  const name = entry.name.toLowerCase();
323
370
  const tags = entry.tags?.join(" ").toLowerCase() ?? "";
324
371
  const searchHints = entry.searchHints?.join(" ").toLowerCase() ?? "";
325
372
  const aliases = entry.aliases?.join(" ").toLowerCase() ?? "";
373
+ // Issue #12: include description in match reasons
374
+ const desc = entry.description?.toLowerCase() ?? "";
326
375
  if (tokens.some((t) => name.includes(t)))
327
376
  reasons.push("matched name tokens");
328
377
  if (tokens.some((t) => tags.includes(t)))
@@ -331,6 +380,9 @@ export function buildWhyMatched(entry, query, rankingMode, qualityBoost, confide
331
380
  reasons.push("matched searchHints");
332
381
  if (tokens.some((t) => aliases.includes(t)))
333
382
  reasons.push("matched aliases");
383
+ // Issue #12: report description matches
384
+ if (tokens.some((t) => desc.includes(t)))
385
+ reasons.push("matched description");
334
386
  if (qualityBoost > 0)
335
387
  reasons.push("curated metadata boost");
336
388
  if (confidenceBoost > 0)
@@ -413,10 +465,27 @@ async function indexAssets(stashDir, type) {
413
465
  continue;
414
466
  stash = generated;
415
467
  }
468
+ // Build a lookup for matching filename-less entries to actual files
469
+ const fileBasenameMap = new Map();
470
+ for (const file of files) {
471
+ const base = path.basename(file, path.extname(file));
472
+ if (!fileBasenameMap.has(base))
473
+ fileBasenameMap.set(base, file);
474
+ }
416
475
  for (const entry of stash.entries) {
417
476
  if (filterType && entry.type !== filterType)
418
477
  continue;
419
- const entryPath = entry.filename ? path.join(dirPath, entry.filename) : files[0] || dirPath;
478
+ let entryPath;
479
+ if (entry.filename) {
480
+ entryPath = path.join(dirPath, entry.filename);
481
+ }
482
+ else {
483
+ // Try matching entry name to a file by basename
484
+ entryPath =
485
+ fileBasenameMap.get(entry.name) ??
486
+ fileBasenameMap.get(entry.name.split("/").pop() ?? "") ??
487
+ (files[0] || dirPath);
488
+ }
420
489
  assets.push({ entry, path: entryPath });
421
490
  }
422
491
  }
@@ -427,3 +496,31 @@ function compareAssets(a, b) {
427
496
  return a.entry.type.localeCompare(b.entry.type);
428
497
  return a.entry.name.localeCompare(b.entry.name);
429
498
  }
499
+ /**
500
+ * Deduplicate scored results by file path, keeping only the highest-scored
501
+ * entry per unique path. Sorts by score descending internally to ensure the
502
+ * precondition is always met regardless of caller (Issue #4).
503
+ */
504
+ function deduplicateByPath(items) {
505
+ // Issue #4: sort inside to enforce the descending-score precondition
506
+ const sorted = [...items].sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
507
+ const seen = new Set();
508
+ return sorted.filter((item) => {
509
+ if (seen.has(item.filePath))
510
+ return false;
511
+ seen.add(item.filePath);
512
+ return true;
513
+ });
514
+ }
515
+ /**
516
+ * Deduplicate IndexedAsset[] by path, keeping the first (highest-priority) entry.
517
+ */
518
+ function deduplicateAssetsByPath(assets) {
519
+ const seen = new Set();
520
+ return assets.filter((asset) => {
521
+ if (seen.has(asset.path))
522
+ return false;
523
+ seen.add(asset.path);
524
+ return true;
525
+ });
526
+ }
@@ -0,0 +1,389 @@
1
+ import { createHash } from "node:crypto";
2
+ import fs from "node:fs";
3
+ import path from "node:path";
4
+ import { fetchWithRetry } from "../common";
5
+ import { ConfigError, NotFoundError, UsageError } from "../errors";
6
+ import { parseFrontmatter, toStringOrUndefined } from "../frontmatter";
7
+ import { extractFrontmatterOnly, extractLineRange, extractSection, formatToc, parseMarkdownToc } from "../markdown";
8
+ import { getRegistryIndexCacheDir } from "../paths";
9
+ import { extractTarGzSecure } from "../registry-install";
10
+ import { registerStashProvider } from "../stash-provider-factory";
11
+ /** Cache TTL before refreshing the mirrored repo (12 hours). */
12
+ const CACHE_TTL_MS = 12 * 60 * 60 * 1000;
13
+ /** Maximum stale age allowed when refresh fails (7 days). */
14
+ const CACHE_STALE_MS = 7 * 24 * 60 * 60 * 1000;
15
+ const CONTEXT_HUB_REF_PREFIX = "context-hub://";
16
+ class ContextHubStashProvider {
17
+ type = "context-hub";
18
+ name;
19
+ repo;
20
+ constructor(config) {
21
+ this.repo = parseContextHubRepoUrl(config.url ?? "");
22
+ this.name = config.name ?? `${this.repo.owner}/${this.repo.repo}`;
23
+ }
24
+ async search(options) {
25
+ try {
26
+ const entries = await this.loadEntries();
27
+ const filtered = entries
28
+ .filter((entry) => matchesType(entry, options.type))
29
+ .map((entry) => ({ entry, score: scoreEntry(entry, options.query) }))
30
+ .filter(({ score }) => options.query.trim() === "" || score > 0)
31
+ .sort((a, b) => b.score - a.score || a.entry.sortName.localeCompare(b.entry.sortName))
32
+ .slice(0, options.limit);
33
+ return {
34
+ hits: filtered.map(({ entry, score }) => entryToHit(entry, score)),
35
+ };
36
+ }
37
+ catch (err) {
38
+ const message = err instanceof Error ? err.message : String(err);
39
+ return { hits: [], warnings: [`Stash ${this.name}: ${message}`] };
40
+ }
41
+ }
42
+ async show(ref, view) {
43
+ const filePath = parseContextHubRef(ref);
44
+ const repoDir = await this.loadRepoDir();
45
+ const resolved = resolveCachedFilePath(repoDir, filePath);
46
+ if (!fs.existsSync(resolved) || !fs.statSync(resolved).isFile()) {
47
+ throw new NotFoundError(`Context Hub asset not found: ${filePath}`);
48
+ }
49
+ const raw = fs.readFileSync(resolved, "utf8");
50
+ const parsed = parseFrontmatter(raw);
51
+ const relFromContent = path.posix.normalize(path.relative(path.join(repoDir, "content"), resolved).replace(/\\/g, "/"));
52
+ const author = sanitizeString(relFromContent.split("/")[0] ?? "") || "unknown";
53
+ const name = sanitizeString(toStringOrUndefined(parsed.data.name) ?? path.basename(path.dirname(resolved)));
54
+ const description = sanitizeString(toStringOrUndefined(parsed.data.description), 1000);
55
+ const assetType = path.basename(resolved) === "SKILL.md" ? "skill" : "knowledge";
56
+ const content = renderContentForView(raw, view);
57
+ return {
58
+ type: assetType,
59
+ name: `${author}/${name}`,
60
+ path: ref,
61
+ content,
62
+ description,
63
+ editable: false,
64
+ origin: this.type,
65
+ action: `Context Hub content from ${this.repo.canonicalUrl}`,
66
+ };
67
+ }
68
+ canShow(ref) {
69
+ return ref.trim().startsWith(CONTEXT_HUB_REF_PREFIX);
70
+ }
71
+ async loadEntries() {
72
+ const cachePaths = getCachePaths(this.repo.canonicalUrl);
73
+ const index = await ensureContextHubMirror(this.repo, cachePaths);
74
+ return index.entries;
75
+ }
76
+ async loadRepoDir() {
77
+ const cachePaths = getCachePaths(this.repo.canonicalUrl);
78
+ await ensureContextHubMirror(this.repo, cachePaths, { requireRepoDir: true });
79
+ return cachePaths.repoDir;
80
+ }
81
+ }
82
+ registerStashProvider("context-hub", (config) => new ContextHubStashProvider(config));
83
+ function getCachePaths(repoUrl) {
84
+ const key = createHash("sha256").update(repoUrl).digest("hex").slice(0, 16);
85
+ const rootDir = path.join(getRegistryIndexCacheDir(), `context-hub-${key}`);
86
+ return {
87
+ rootDir,
88
+ archivePath: path.join(rootDir, "repo.tar.gz"),
89
+ repoDir: path.join(rootDir, "repo"),
90
+ indexPath: path.join(rootDir, "index.json"),
91
+ };
92
+ }
93
+ async function ensureContextHubMirror(repo, cachePaths, options) {
94
+ const requireRepoDir = options?.requireRepoDir === true;
95
+ const cached = readCachedIndex(cachePaths.indexPath);
96
+ if (cached && !isExpired(cached.mtime, CACHE_TTL_MS) && (!requireRepoDir || hasExtractedRepo(cachePaths.repoDir))) {
97
+ return { entries: cached.entries };
98
+ }
99
+ try {
100
+ fs.mkdirSync(cachePaths.rootDir, { recursive: true });
101
+ await downloadArchive(buildTarballUrl(repo), cachePaths.archivePath);
102
+ extractTarGzSecure(cachePaths.archivePath, cachePaths.repoDir);
103
+ const entries = buildContextHubIndex(cachePaths.repoDir);
104
+ writeCachedIndex(cachePaths.indexPath, entries);
105
+ return { entries };
106
+ }
107
+ catch (err) {
108
+ if (cached &&
109
+ !isExpired(cached.mtime, CACHE_STALE_MS) &&
110
+ (!requireRepoDir || hasExtractedRepo(cachePaths.repoDir))) {
111
+ return { entries: cached.entries };
112
+ }
113
+ throw err;
114
+ }
115
+ }
116
+ function hasExtractedRepo(repoDir) {
117
+ try {
118
+ return fs.statSync(repoDir).isDirectory() && fs.statSync(path.join(repoDir, "content")).isDirectory();
119
+ }
120
+ catch {
121
+ return false;
122
+ }
123
+ }
124
+ function readCachedIndex(indexPath) {
125
+ try {
126
+ const stat = fs.statSync(indexPath);
127
+ const raw = JSON.parse(fs.readFileSync(indexPath, "utf8"));
128
+ if (!Array.isArray(raw))
129
+ return null;
130
+ const entries = raw.filter(isContextHubEntry);
131
+ return { entries, mtime: stat.mtimeMs };
132
+ }
133
+ catch {
134
+ return null;
135
+ }
136
+ }
137
+ function writeCachedIndex(indexPath, entries) {
138
+ const dir = path.dirname(indexPath);
139
+ fs.mkdirSync(dir, { recursive: true });
140
+ const tmpPath = `${indexPath}.tmp.${process.pid}.${Math.random().toString(36).slice(2)}`;
141
+ fs.writeFileSync(tmpPath, JSON.stringify(entries), { encoding: "utf8", mode: 0o600 });
142
+ fs.renameSync(tmpPath, indexPath);
143
+ }
144
+ async function downloadArchive(url, destination) {
145
+ const response = await fetchWithRetry(url, undefined, { timeout: 120_000, retries: 1 });
146
+ if (!response.ok) {
147
+ throw new Error(`Failed to download Context Hub archive (${response.status}) from ${url}`);
148
+ }
149
+ const BunRuntime = globalThis.Bun;
150
+ if (BunRuntime?.write) {
151
+ await BunRuntime.write(destination, response);
152
+ return;
153
+ }
154
+ const arrayBuffer = await response.arrayBuffer();
155
+ fs.writeFileSync(destination, Buffer.from(arrayBuffer));
156
+ }
157
+ function buildContextHubIndex(repoDir) {
158
+ const contentDir = path.join(repoDir, "content");
159
+ if (!fs.existsSync(contentDir) || !fs.statSync(contentDir).isDirectory()) {
160
+ throw new Error(`Context Hub repo at ${repoDir} is missing a content/ directory`);
161
+ }
162
+ const files = findEntryFiles(contentDir);
163
+ const entries = [];
164
+ for (const filePath of files) {
165
+ const entry = buildEntry(repoDir, contentDir, filePath);
166
+ if (entry)
167
+ entries.push(entry);
168
+ }
169
+ return entries;
170
+ }
171
+ function findEntryFiles(dir) {
172
+ const results = [];
173
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
174
+ const full = path.join(dir, entry.name);
175
+ if (entry.isDirectory()) {
176
+ results.push(...findEntryFiles(full));
177
+ }
178
+ else if (entry.name === "DOC.md" || entry.name === "SKILL.md") {
179
+ results.push(full);
180
+ }
181
+ }
182
+ return results;
183
+ }
184
+ function buildEntry(repoDir, contentDir, fullPath) {
185
+ const raw = fs.readFileSync(fullPath, "utf8");
186
+ const parsed = parseFrontmatter(raw);
187
+ const relPath = path.posix.normalize(path.relative(repoDir, fullPath).replace(/\\/g, "/"));
188
+ const relFromContent = path.posix.normalize(path.relative(contentDir, fullPath).replace(/\\/g, "/"));
189
+ const segments = relFromContent.split("/");
190
+ const author = sanitizeString(segments[0] ?? "");
191
+ if (!author)
192
+ return null;
193
+ const name = sanitizeString(toStringOrUndefined(parsed.data.name) ?? path.basename(path.dirname(fullPath)));
194
+ if (!name)
195
+ return null;
196
+ const metadata = (parsed.data.metadata ?? {});
197
+ const tags = parseCsv(metadata.tags);
198
+ const language = sanitizeString(toStringOrUndefined(metadata.languages));
199
+ const version = sanitizeString(toStringOrUndefined(metadata.versions));
200
+ const id = `${author}/${name}`;
201
+ const assetType = path.basename(fullPath) === "SKILL.md" ? "skill" : "knowledge";
202
+ return {
203
+ id,
204
+ ref: makeContextHubRef(relPath),
205
+ assetType,
206
+ filePath: relPath,
207
+ description: sanitizeString(toStringOrUndefined(parsed.data.description), 1000),
208
+ tags,
209
+ language: language || undefined,
210
+ version: version || undefined,
211
+ sortName: `${id}:${language ?? ""}:${version ?? ""}`,
212
+ };
213
+ }
214
+ function scoreEntry(entry, query) {
215
+ const trimmed = query.trim().toLowerCase();
216
+ if (!trimmed)
217
+ return 1;
218
+ const tokens = trimmed.split(/\s+/).filter(Boolean);
219
+ if (tokens.length === 0)
220
+ return 1;
221
+ const haystacks = [
222
+ { text: entry.id.toLowerCase(), weight: 4 },
223
+ { text: entry.description?.toLowerCase() ?? "", weight: 2 },
224
+ { text: (entry.tags ?? []).join(" ").toLowerCase(), weight: 2 },
225
+ { text: entry.language?.toLowerCase() ?? "", weight: 1 },
226
+ { text: entry.version?.toLowerCase() ?? "", weight: 1 },
227
+ ];
228
+ let matched = 0;
229
+ let score = 0;
230
+ for (const token of tokens) {
231
+ let tokenScore = 0;
232
+ for (const { text, weight } of haystacks) {
233
+ if (!text)
234
+ continue;
235
+ if (text === token)
236
+ tokenScore = Math.max(tokenScore, weight * 2);
237
+ else if (text.includes(token))
238
+ tokenScore = Math.max(tokenScore, weight);
239
+ }
240
+ if (tokenScore > 0) {
241
+ matched++;
242
+ score += tokenScore;
243
+ }
244
+ }
245
+ if (matched === 0)
246
+ return 0;
247
+ const coverage = matched / tokens.length;
248
+ return Math.round((score * coverage + (entry.id.toLowerCase() === trimmed ? 5 : 0)) * 1000) / 1000;
249
+ }
250
+ function matchesType(entry, requested) {
251
+ if (!requested || requested === "any")
252
+ return true;
253
+ return entry.assetType === requested;
254
+ }
255
+ function entryToHit(entry, score) {
256
+ const details = [entry.language, entry.version].filter(Boolean).join(" • ");
257
+ const description = [entry.description, details].filter(Boolean).join(" — ") || undefined;
258
+ return {
259
+ type: entry.assetType,
260
+ name: entry.id,
261
+ path: entry.ref,
262
+ ref: entry.ref,
263
+ origin: "context-hub",
264
+ editable: false,
265
+ description,
266
+ tags: entry.tags,
267
+ action: `akm show ${entry.ref}`,
268
+ score,
269
+ };
270
+ }
271
+ function renderContentForView(content, view) {
272
+ if (!view || view.mode === "full")
273
+ return content;
274
+ switch (view.mode) {
275
+ case "toc":
276
+ return formatToc(parseMarkdownToc(content));
277
+ case "frontmatter":
278
+ return extractFrontmatterOnly(content) ?? "(no frontmatter)";
279
+ case "section": {
280
+ const section = extractSection(content, view.heading);
281
+ if (!section) {
282
+ throw new UsageError(`Section not found: ${view.heading}`);
283
+ }
284
+ return section.content;
285
+ }
286
+ case "lines":
287
+ return extractLineRange(content, view.start, view.end);
288
+ default:
289
+ return content;
290
+ }
291
+ }
292
+ function resolveCachedFilePath(repoDir, filePath) {
293
+ const normalized = path.posix.normalize(filePath.replace(/\\/g, "/"));
294
+ if (!normalized.startsWith("content/")) {
295
+ throw new UsageError(`Invalid Context Hub ref: ${filePath}`);
296
+ }
297
+ const resolved = path.resolve(repoDir, normalized);
298
+ const root = path.resolve(repoDir);
299
+ if (!resolved.startsWith(root + path.sep)) {
300
+ throw new UsageError(`Invalid Context Hub ref: ${filePath}`);
301
+ }
302
+ return resolved;
303
+ }
304
+ function buildTarballUrl(repo) {
305
+ return `https://github.com/${repo.owner}/${repo.repo}/archive/refs/heads/${repo.ref}.tar.gz`;
306
+ }
307
+ function parseContextHubRepoUrl(rawUrl) {
308
+ if (!rawUrl) {
309
+ throw new ConfigError("Context Hub provider requires a GitHub repository URL");
310
+ }
311
+ let parsed;
312
+ try {
313
+ parsed = new URL(rawUrl);
314
+ }
315
+ catch {
316
+ throw new ConfigError(`Context Hub URL is not valid: "${rawUrl}"`);
317
+ }
318
+ if (parsed.protocol !== "https:") {
319
+ throw new ConfigError(`Context Hub URL must use https://, got "${parsed.protocol}"`);
320
+ }
321
+ if (parsed.hostname !== "github.com") {
322
+ throw new ConfigError(`Context Hub provider only supports github.com URLs, got "${parsed.hostname}"`);
323
+ }
324
+ const segments = parsed.pathname.split("/").filter(Boolean);
325
+ if (segments.length < 2) {
326
+ throw new ConfigError(`Context Hub URL must point to a GitHub repository, got "${rawUrl}"`);
327
+ }
328
+ const owner = sanitizeString(segments[0]);
329
+ const repo = sanitizeString(segments[1].replace(/\.git$/i, ""));
330
+ let ref = "main";
331
+ if (segments[2] === "tree" && segments.length >= 4) {
332
+ ref = sanitizeString(segments.slice(3).join("/"), 255) || "main";
333
+ }
334
+ if (!owner || !repo || !/^[A-Za-z0-9_.-]+$/.test(owner) || !/^[A-Za-z0-9_.-]+$/.test(repo)) {
335
+ throw new ConfigError(`Unsupported Context Hub repository URL: "${rawUrl}"`);
336
+ }
337
+ if (!ref || ref.includes("..") || !/^[A-Za-z0-9._/-]+$/.test(ref)) {
338
+ throw new ConfigError(`Unsupported Context Hub branch/ref in URL: "${rawUrl}"`);
339
+ }
340
+ return {
341
+ owner,
342
+ repo,
343
+ ref,
344
+ canonicalUrl: `https://github.com/${owner}/${repo}/tree/${ref}`,
345
+ };
346
+ }
347
+ function makeContextHubRef(filePath) {
348
+ return `${CONTEXT_HUB_REF_PREFIX}${path.posix.normalize(filePath)}`;
349
+ }
350
+ function parseContextHubRef(ref) {
351
+ const trimmed = ref.trim();
352
+ if (!trimmed.startsWith(CONTEXT_HUB_REF_PREFIX)) {
353
+ throw new UsageError(`Invalid Context Hub ref: ${ref}`);
354
+ }
355
+ const filePath = trimmed.slice(CONTEXT_HUB_REF_PREFIX.length);
356
+ if (!filePath) {
357
+ throw new UsageError(`Invalid Context Hub ref: ${ref}`);
358
+ }
359
+ return filePath;
360
+ }
361
+ function parseCsv(value) {
362
+ if (typeof value !== "string")
363
+ return undefined;
364
+ const items = value
365
+ .split(",")
366
+ .map((item) => sanitizeString(item.trim(), 100))
367
+ .filter(Boolean);
368
+ return items.length > 0 ? items : undefined;
369
+ }
370
+ function sanitizeString(value, maxLength = 255) {
371
+ if (typeof value !== "string")
372
+ return "";
373
+ // biome-ignore lint/suspicious/noControlCharactersInRegex: strips untrusted control chars from remote metadata
374
+ return value.replace(/[\u0000-\u001f\u007f]/g, "").slice(0, maxLength);
375
+ }
376
+ function isExpired(mtimeMs, ttlMs) {
377
+ return Date.now() - mtimeMs > ttlMs;
378
+ }
379
+ function isContextHubEntry(value) {
380
+ if (typeof value !== "object" || value === null || Array.isArray(value))
381
+ return false;
382
+ const obj = value;
383
+ return (typeof obj.id === "string" &&
384
+ typeof obj.ref === "string" &&
385
+ (obj.assetType === "knowledge" || obj.assetType === "skill") &&
386
+ typeof obj.filePath === "string" &&
387
+ typeof obj.sortName === "string");
388
+ }
389
+ export { ContextHubStashProvider, buildContextHubIndex, makeContextHubRef, parseContextHubRef, parseContextHubRepoUrl };
@@ -6,4 +6,5 @@
6
6
  * side-effect imports that were duplicated in stash-search.ts and stash-show.ts.
7
7
  */
8
8
  import "./filesystem";
9
+ import "./context-hub";
9
10
  import "./openviking";
@@ -128,12 +128,37 @@ export function registerActionBuilder(type, builder) {
128
128
  // Re-export for consumers that were already importing from stash-search
129
129
  export { buildLocalAction, rendererForType };
130
130
  // ── Helpers ──────────────────────────────────────────────────────────────────
131
- function mergeStashHits(localHits, additionalHits, limit) {
131
+ /**
132
+ * Merge hits from local stash and additional providers using Reciprocal Rank
133
+ * Fusion (RRF). Each list is already internally sorted by relevance. RRF
134
+ * assigns scores based on rank position rather than raw score values, so
135
+ * sources with incompatible score scales (e.g. RRF ~0.01-0.03 vs 0-1 or
136
+ * 0-100) are merged fairly.
137
+ */
138
+ export function mergeStashHits(localHits, additionalHits, limit) {
132
139
  if (additionalHits.length === 0)
133
140
  return localHits.slice(0, limit);
134
- const all = [...localHits, ...additionalHits];
135
- all.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
136
- return all.slice(0, limit);
141
+ const RRF_K = 60;
142
+ const scoreMap = new Map();
143
+ const applyRankedList = (hits) => {
144
+ for (let i = 0; i < hits.length; i++) {
145
+ const key = hits[i].path ?? hits[i].ref ?? hits[i].name;
146
+ const rrf = 1 / (RRF_K + i + 1);
147
+ const existing = scoreMap.get(key);
148
+ if (existing) {
149
+ existing.score += rrf;
150
+ }
151
+ else {
152
+ scoreMap.set(key, { hit: hits[i], score: rrf });
153
+ }
154
+ }
155
+ };
156
+ applyRankedList(localHits);
157
+ applyRankedList(additionalHits);
158
+ return [...scoreMap.values()]
159
+ .sort((a, b) => b.score - a.score)
160
+ .slice(0, limit)
161
+ .map((v) => ({ ...v.hit, score: Math.round(v.score * 10000) / 10000 }));
137
162
  }
138
163
  function normalizeLimit(limit) {
139
164
  if (typeof limit !== "number" || Number.isNaN(limit) || limit <= 0) {
@@ -151,8 +176,46 @@ export function parseSearchSource(source) {
151
176
  return "stash";
152
177
  throw new UsageError(`Invalid value for --source: ${String(source)}. Expected one of: stash|registry|both`);
153
178
  }
154
- function mergeSearchHits(localHits, registryHits, limit) {
155
- const all = [...localHits, ...registryHits];
156
- all.sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
157
- return all.slice(0, limit);
179
+ /**
180
+ * Merge stash hits and registry hits using RRF, same rationale as mergeStashHits.
181
+ */
182
+ export function mergeSearchHits(localHits, registryHits, limit) {
183
+ if (registryHits.length === 0)
184
+ return localHits.slice(0, limit);
185
+ if (localHits.length === 0)
186
+ return registryHits.slice(0, limit);
187
+ const RRF_K = 60;
188
+ const scoreMap = new Map();
189
+ const applyStashList = (hits) => {
190
+ for (let i = 0; i < hits.length; i++) {
191
+ const key = hits[i].path ?? hits[i].ref ?? hits[i].name;
192
+ const rrf = 1 / (RRF_K + i + 1);
193
+ const existing = scoreMap.get(key);
194
+ if (existing) {
195
+ existing.score += rrf;
196
+ }
197
+ else {
198
+ scoreMap.set(key, { hit: hits[i], score: rrf });
199
+ }
200
+ }
201
+ };
202
+ const applyRegistryList = (hits) => {
203
+ for (let i = 0; i < hits.length; i++) {
204
+ const key = `registry:${hits[i].id ?? hits[i].name}`;
205
+ const rrf = 1 / (RRF_K + i + 1);
206
+ const existing = scoreMap.get(key);
207
+ if (existing) {
208
+ existing.score += rrf;
209
+ }
210
+ else {
211
+ scoreMap.set(key, { hit: hits[i], score: rrf });
212
+ }
213
+ }
214
+ };
215
+ applyStashList(localHits);
216
+ applyRegistryList(registryHits);
217
+ return [...scoreMap.values()]
218
+ .sort((a, b) => b.score - a.score)
219
+ .slice(0, limit)
220
+ .map((v) => ({ ...v.hit, score: Math.round(v.score * 10000) / 10000 }));
158
221
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "akm-cli",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "type": "module",
5
5
  "description": "CLI tool to search, open, and run extension assets from an akm stash directory.",
6
6
  "keywords": [