akm-cli 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +13 -0
- package/dist/db.js +14 -9
- package/dist/embedder.js +18 -3
- package/dist/indexer.js +59 -1
- package/dist/local-search.js +120 -23
- package/dist/stash-providers/context-hub.js +389 -0
- package/dist/stash-providers/index.js +1 -0
- package/dist/stash-search.js +71 -8
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -41,6 +41,8 @@ const pkgVersion = (() => {
|
|
|
41
41
|
const OUTPUT_FORMATS = ["json", "yaml", "text"];
|
|
42
42
|
const DETAIL_LEVELS = ["brief", "normal", "full"];
|
|
43
43
|
const NORMAL_DESCRIPTION_LIMIT = 250;
|
|
44
|
+
const CONTEXT_HUB_ALIAS_REF = "context-hub";
|
|
45
|
+
const CONTEXT_HUB_ALIAS_URL = "https://github.com/andrewyng/context-hub";
|
|
44
46
|
function hasBunYAML(b) {
|
|
45
47
|
// biome-ignore lint/suspicious/noExplicitAny: type guard for runtime feature detection
|
|
46
48
|
return typeof b.YAML?.stringify === "function";
|
|
@@ -468,6 +470,15 @@ const addCommand = defineCommand({
|
|
|
468
470
|
},
|
|
469
471
|
async run({ args }) {
|
|
470
472
|
await runWithJsonErrors(async () => {
|
|
473
|
+
if (args.ref.trim() === CONTEXT_HUB_ALIAS_REF) {
|
|
474
|
+
const result = addStash({
|
|
475
|
+
target: CONTEXT_HUB_ALIAS_URL,
|
|
476
|
+
providerType: "context-hub",
|
|
477
|
+
name: "context-hub",
|
|
478
|
+
});
|
|
479
|
+
output("stash-add", result);
|
|
480
|
+
return;
|
|
481
|
+
}
|
|
471
482
|
const result = await akmAdd({ ref: args.ref });
|
|
472
483
|
output("add", result);
|
|
473
484
|
});
|
|
@@ -1143,6 +1154,7 @@ akm search "<query>" --type skill # Filter by type
|
|
|
1143
1154
|
akm search "<query>" --source both # Also search registries for installable kits
|
|
1144
1155
|
akm show <ref> # View asset details
|
|
1145
1156
|
akm add <ref> # Install a kit (npm, GitHub, git, local)
|
|
1157
|
+
akm add context-hub # Shortcut for adding Context Hub as a stash provider
|
|
1146
1158
|
akm clone <ref> # Copy an asset to the working stash (optional --dest arg to clone to specific location)
|
|
1147
1159
|
akm registry search "<query>" # Search all registries
|
|
1148
1160
|
\`\`\`
|
|
@@ -1213,6 +1225,7 @@ akm add <ref> # Install a kit (smart router: loc
|
|
|
1213
1225
|
akm add @scope/kit # From npm
|
|
1214
1226
|
akm add owner/repo # From GitHub
|
|
1215
1227
|
akm add ./path/to/local/kit # From local directory (adds as stash)
|
|
1228
|
+
akm add context-hub # Add the official Context Hub stash
|
|
1216
1229
|
akm kit add <ref> # Install a kit (explicit)
|
|
1217
1230
|
akm kit list # List installed kits
|
|
1218
1231
|
akm kit remove <target> # Remove a kit
|
package/dist/db.js
CHANGED
|
@@ -385,17 +385,22 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
385
385
|
return [];
|
|
386
386
|
}
|
|
387
387
|
}
|
|
388
|
-
function sanitizeFtsQuery(query) {
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
388
|
+
export function sanitizeFtsQuery(query) {
|
|
389
|
+
// Allow only characters safe in FTS5 queries: letters, digits, underscores,
|
|
390
|
+
// and whitespace. Everything else (hyphens, dots, quotes, parens, asterisks,
|
|
391
|
+
// colons, carets, @, !, etc.) is replaced with a space so that compound
|
|
392
|
+
// identifiers like "code-review" or "k8s.setup" become AND-joined tokens
|
|
393
|
+
// ("code review", "k8s setup") rather than triggering FTS5 syntax errors.
|
|
394
|
+
let sanitized = query.replace(/[^a-zA-Z0-9_\s]/g, " ");
|
|
395
|
+
// Neutralize the NEAR operator (FTS5 proximity syntax)
|
|
396
|
+
sanitized = sanitized.replace(/\bNEAR\b/g, " ");
|
|
397
|
+
const tokens = sanitized.split(/\s+/).filter((t) => t.length >= 1);
|
|
393
398
|
if (tokens.length === 0)
|
|
394
399
|
return "";
|
|
395
|
-
//
|
|
396
|
-
//
|
|
397
|
-
//
|
|
398
|
-
return tokens.join("
|
|
400
|
+
// Use implicit AND (space-separated tokens) for precision. FTS5 treats
|
|
401
|
+
// space-separated tokens as an implicit AND, matching only rows that
|
|
402
|
+
// contain ALL terms.
|
|
403
|
+
return tokens.join(" ");
|
|
399
404
|
}
|
|
400
405
|
// ── All entries ─────────────────────────────────────────────────────────────
|
|
401
406
|
export function getAllEntries(db, entryType) {
|
package/dist/embedder.js
CHANGED
|
@@ -30,6 +30,19 @@ async function embedLocal(text) {
|
|
|
30
30
|
const result = await model(text, { pooling: "mean", normalize: true });
|
|
31
31
|
return Array.from(result.data);
|
|
32
32
|
}
|
|
33
|
+
// ── Vector normalization ─────────────────────────────────────────────────────
|
|
34
|
+
/**
|
|
35
|
+
* L2-normalize a vector to unit length.
|
|
36
|
+
* Required for remote embeddings because the scoring pipeline's L2-to-cosine
|
|
37
|
+
* conversion formula (1 - distance^2/2) is only correct for unit vectors.
|
|
38
|
+
* The local embedder already normalizes via `normalize: true`.
|
|
39
|
+
*/
|
|
40
|
+
function l2Normalize(vec) {
|
|
41
|
+
const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
|
|
42
|
+
if (norm === 0)
|
|
43
|
+
return vec;
|
|
44
|
+
return vec.map((v) => v / norm);
|
|
45
|
+
}
|
|
33
46
|
// ── OpenAI-compatible remote embedder ───────────────────────────────────────
|
|
34
47
|
async function embedRemote(text, config) {
|
|
35
48
|
const headers = { "Content-Type": "application/json" };
|
|
@@ -56,7 +69,7 @@ async function embedRemote(text, config) {
|
|
|
56
69
|
if (!json.data?.[0]?.embedding) {
|
|
57
70
|
throw new Error("Unexpected embedding response format: missing data[0].embedding");
|
|
58
71
|
}
|
|
59
|
-
return json.data[0].embedding;
|
|
72
|
+
return l2Normalize(json.data[0].embedding);
|
|
60
73
|
}
|
|
61
74
|
// ── Public API ──────────────────────────────────────────────────────────────
|
|
62
75
|
/**
|
|
@@ -118,11 +131,13 @@ async function embedRemoteBatch(texts, config) {
|
|
|
118
131
|
if (!json.data || json.data.length !== batch.length) {
|
|
119
132
|
throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}`);
|
|
120
133
|
}
|
|
121
|
-
|
|
134
|
+
// Sort by index to guarantee correct order (OpenAI API doesn't guarantee order)
|
|
135
|
+
const sorted = [...json.data].sort((a, b) => a.index - b.index);
|
|
136
|
+
for (const [idx, d] of sorted.entries()) {
|
|
122
137
|
if (!Array.isArray(d.embedding)) {
|
|
123
138
|
throw new Error(`Unexpected embedding at batch index ${idx}: missing or invalid`);
|
|
124
139
|
}
|
|
125
|
-
results.push(d.embedding);
|
|
140
|
+
results.push(l2Normalize(d.embedding));
|
|
126
141
|
}
|
|
127
142
|
}
|
|
128
143
|
return results;
|
package/dist/indexer.js
CHANGED
|
@@ -162,6 +162,13 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
|
|
|
162
162
|
}
|
|
163
163
|
}
|
|
164
164
|
// Phase 2 (sync): write all pre-generated metadata inside a single transaction.
|
|
165
|
+
//
|
|
166
|
+
// Cross-stash dedup: track indexed assets by content identity
|
|
167
|
+
// (type + filename + description) so the same asset from a lower-priority
|
|
168
|
+
// stash root is skipped when a higher-priority root already covers it.
|
|
169
|
+
// Sources are ordered by priority (primary stash first), so the first
|
|
170
|
+
// occurrence wins.
|
|
171
|
+
const indexedAssetIdentities = new Set();
|
|
165
172
|
const insertTransaction = db.transaction(() => {
|
|
166
173
|
// HI-5: Perform the full-rebuild wipe as the FIRST step of the insert
|
|
167
174
|
// transaction so delete and re-insert are atomic — a concurrent reader
|
|
@@ -190,8 +197,20 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
|
|
|
190
197
|
// Delete old entries for this dir (will be re-inserted)
|
|
191
198
|
deleteEntriesByDir(db, dirPath);
|
|
192
199
|
if (stash) {
|
|
200
|
+
// Build a lookup for matching filename-less entries to actual files
|
|
201
|
+
const fileBasenameMap = buildFileBasenameMap(files);
|
|
193
202
|
for (const entry of stash.entries) {
|
|
194
|
-
const entryPath = entry.filename
|
|
203
|
+
const entryPath = entry.filename
|
|
204
|
+
? path.join(dirPath, entry.filename)
|
|
205
|
+
: matchEntryToFile(entry.name, fileBasenameMap, files);
|
|
206
|
+
if (!entryPath)
|
|
207
|
+
continue; // skip unresolvable entries
|
|
208
|
+
// Skip if a higher-priority stash root already indexed this asset
|
|
209
|
+
const basename = path.basename(entryPath);
|
|
210
|
+
const identityKey = `${entry.type}\0${basename}\0${entry.description ?? ""}`;
|
|
211
|
+
if (indexedAssetIdentities.has(identityKey))
|
|
212
|
+
continue;
|
|
213
|
+
indexedAssetIdentities.add(identityKey);
|
|
195
214
|
const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
|
|
196
215
|
const searchText = buildSearchText(entry);
|
|
197
216
|
const entryWithSize = attachFileSize(entry, entryPath);
|
|
@@ -335,6 +354,43 @@ async function enhanceStashWithLlm(llmConfig, stash, _dirPath, files) {
|
|
|
335
354
|
}
|
|
336
355
|
return { entries: enhanced };
|
|
337
356
|
}
|
|
357
|
+
/**
|
|
358
|
+
* Build a map from base filename (without extension) to full path for quick lookups.
|
|
359
|
+
*/
|
|
360
|
+
export function buildFileBasenameMap(files) {
|
|
361
|
+
const map = new Map();
|
|
362
|
+
for (const file of files) {
|
|
363
|
+
const base = path.basename(file, path.extname(file));
|
|
364
|
+
// Only keep first match per base name to avoid ambiguity
|
|
365
|
+
if (!map.has(base))
|
|
366
|
+
map.set(base, file);
|
|
367
|
+
}
|
|
368
|
+
return map;
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Try to match a filename-less entry to an actual file in the directory.
|
|
372
|
+
*
|
|
373
|
+
* Matching strategy (in priority order):
|
|
374
|
+
* 1. Exact basename match: entry.name === filename without extension
|
|
375
|
+
* 2. Last path segment match: for entries with names like "dir/sub-entry",
|
|
376
|
+
* try matching the last segment
|
|
377
|
+
* 3. Fallback: first file in the directory, or null if no files are available
|
|
378
|
+
*/
|
|
379
|
+
export function matchEntryToFile(entryName, fileMap, files) {
|
|
380
|
+
// Exact match on entry name
|
|
381
|
+
const exact = fileMap.get(entryName);
|
|
382
|
+
if (exact)
|
|
383
|
+
return exact;
|
|
384
|
+
// Try last segment for hierarchical names (e.g. "corpus/agentic-patterns/foo")
|
|
385
|
+
const lastSegment = entryName.split("/").pop() ?? entryName;
|
|
386
|
+
if (lastSegment !== entryName) {
|
|
387
|
+
const segmentMatch = fileMap.get(lastSegment);
|
|
388
|
+
if (segmentMatch)
|
|
389
|
+
return segmentMatch;
|
|
390
|
+
}
|
|
391
|
+
// Fallback to first file, or null if no files are available
|
|
392
|
+
return files[0] || null;
|
|
393
|
+
}
|
|
338
394
|
export function buildSearchText(entry) {
|
|
339
395
|
const parts = [entry.name.replace(/[-_]/g, " ")];
|
|
340
396
|
if (entry.description)
|
|
@@ -347,6 +403,8 @@ export function buildSearchText(entry) {
|
|
|
347
403
|
parts.push(entry.aliases.join(" "));
|
|
348
404
|
if (entry.searchHints)
|
|
349
405
|
parts.push(entry.searchHints.join(" "));
|
|
406
|
+
if (entry.usage)
|
|
407
|
+
parts.push(entry.usage.join(" "));
|
|
350
408
|
if (entry.intent) {
|
|
351
409
|
if (entry.intent.when)
|
|
352
410
|
parts.push(entry.intent.when);
|
package/dist/local-search.js
CHANGED
|
@@ -99,7 +99,15 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
99
99
|
if (!query) {
|
|
100
100
|
const typeFilter = searchType === "any" ? undefined : searchType;
|
|
101
101
|
const allEntries = getAllEntries(db, typeFilter);
|
|
102
|
-
|
|
102
|
+
// Deduplicate by file path — multiple entries can share the same file
|
|
103
|
+
const seenFilePaths = new Set();
|
|
104
|
+
const uniqueEntries = allEntries.filter((ie) => {
|
|
105
|
+
if (seenFilePaths.has(ie.filePath))
|
|
106
|
+
return false;
|
|
107
|
+
seenFilePaths.add(ie.filePath);
|
|
108
|
+
return true;
|
|
109
|
+
});
|
|
110
|
+
const selected = uniqueEntries.slice(0, limit);
|
|
103
111
|
const hits = await Promise.all(selected.map((ie) => buildDbHit({
|
|
104
112
|
entry: ie.entry,
|
|
105
113
|
path: ie.filePath,
|
|
@@ -137,6 +145,7 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
137
145
|
}
|
|
138
146
|
}
|
|
139
147
|
// Merge results using RRF
|
|
148
|
+
// Issue #15: "hybrid" for results appearing in both FTS and vec results.
|
|
140
149
|
const scored = [];
|
|
141
150
|
const seenIds = new Set();
|
|
142
151
|
// Process FTS results
|
|
@@ -146,7 +155,8 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
146
155
|
const embedRank = embedRankMap.get(id);
|
|
147
156
|
const embedRrf = embedRank !== undefined ? 1 / (RRF_K + embedRank) : 0;
|
|
148
157
|
const rrfScore = ftsRrf + embedRrf;
|
|
149
|
-
|
|
158
|
+
// Issue #15: combined FTS+vec results are "hybrid", not "semantic"
|
|
159
|
+
const rankingMode = embedRrf > 0 ? "hybrid" : "fts";
|
|
150
160
|
scored.push({ id, entry: result.entry, filePath: result.filePath, score: rrfScore, rankingMode });
|
|
151
161
|
}
|
|
152
162
|
// Add vec-only results not already in FTS results
|
|
@@ -172,45 +182,63 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
172
182
|
}
|
|
173
183
|
}
|
|
174
184
|
}
|
|
175
|
-
// Apply boosts as multiplicative factors
|
|
185
|
+
// Apply boosts as multiplicative factors (all boosts in a single phase
|
|
186
|
+
// so that sort order and displayed scores are always consistent — Issue #1).
|
|
176
187
|
const queryTokens = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
177
188
|
for (const item of scored) {
|
|
178
189
|
const entry = item.entry;
|
|
179
190
|
let boostSum = 0;
|
|
180
|
-
// Tag boost
|
|
191
|
+
// Tag boost — capped at 0.30 (Issue #7)
|
|
181
192
|
if (entry.tags) {
|
|
193
|
+
let tagBoost = 0;
|
|
182
194
|
for (const tag of entry.tags) {
|
|
183
195
|
if (queryTokens.some((t) => tag.toLowerCase() === t)) {
|
|
184
|
-
|
|
196
|
+
tagBoost += 0.15;
|
|
185
197
|
}
|
|
186
198
|
}
|
|
199
|
+
boostSum += Math.min(0.3, tagBoost);
|
|
187
200
|
}
|
|
188
|
-
// Search hint boost
|
|
201
|
+
// Search hint boost — capped at 0.24 (Issue #7)
|
|
189
202
|
if (entry.searchHints) {
|
|
203
|
+
let hintBoost = 0;
|
|
190
204
|
for (const hint of entry.searchHints) {
|
|
191
205
|
const hintLower = hint.toLowerCase();
|
|
192
206
|
for (const token of queryTokens) {
|
|
193
207
|
if (hintLower.includes(token)) {
|
|
194
|
-
|
|
208
|
+
hintBoost += 0.12;
|
|
195
209
|
break;
|
|
196
210
|
}
|
|
197
211
|
}
|
|
198
212
|
}
|
|
213
|
+
boostSum += Math.min(0.24, hintBoost);
|
|
199
214
|
}
|
|
200
215
|
// Name boost
|
|
201
216
|
const nameLower = entry.name.toLowerCase().replace(/[-_]/g, " ");
|
|
202
217
|
if (queryTokens.some((t) => nameLower.includes(t))) {
|
|
203
218
|
boostSum += 0.1;
|
|
204
219
|
}
|
|
220
|
+
// Quality boost (Issue #1: moved from buildDbHit to single-phase)
|
|
221
|
+
const qualityBoost = entry.quality === "generated" ? 0 : 0.05;
|
|
222
|
+
boostSum += qualityBoost;
|
|
223
|
+
// Confidence boost (Issue #1: moved from buildDbHit to single-phase)
|
|
224
|
+
const confidenceBoost = typeof entry.confidence === "number" ? Math.min(0.05, Math.max(0, entry.confidence) * 0.05) : 0;
|
|
225
|
+
boostSum += confidenceBoost;
|
|
205
226
|
item.score = item.score * (1 + boostSum);
|
|
206
227
|
}
|
|
207
|
-
|
|
228
|
+
// Issue #14: deterministic tiebreaker on equal scores
|
|
229
|
+
scored.sort((a, b) => b.score - a.score || a.entry.name.localeCompare(b.entry.name));
|
|
230
|
+
// Deduplicate by file path — keep only the highest-scored entry per file.
|
|
231
|
+
// Multiple .stash.json entries can map to the same file (e.g. entries without
|
|
232
|
+
// a filename field all collapse to files[0]). Showing the same path/ref
|
|
233
|
+
// multiple times clutters results.
|
|
234
|
+
const deduped = deduplicateByPath(scored);
|
|
208
235
|
const rankMs = Date.now() - tRank0;
|
|
209
|
-
const selected =
|
|
236
|
+
const selected = deduped.slice(0, limit);
|
|
210
237
|
const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode }) => buildDbHit({
|
|
211
238
|
entry,
|
|
212
239
|
path: filePath,
|
|
213
|
-
|
|
240
|
+
// Issue #8: round to 4 decimal places instead of 2
|
|
241
|
+
score: Math.round(score * 10000) / 10000,
|
|
214
242
|
query,
|
|
215
243
|
rankingMode,
|
|
216
244
|
defaultStashDir: stashDir,
|
|
@@ -233,9 +261,10 @@ async function tryVecScores(db, query, k, config) {
|
|
|
233
261
|
const vecResults = searchVec(db, queryEmbedding, k);
|
|
234
262
|
const scores = new Map();
|
|
235
263
|
for (const { id, distance } of vecResults) {
|
|
236
|
-
// Convert L2 distance to cosine similarity (vectors are normalized)
|
|
237
|
-
|
|
238
|
-
|
|
264
|
+
// Convert L2 distance to cosine similarity (vectors are normalized).
|
|
265
|
+
// Issue #3: guard against NaN/Infinity from sqlite-vec edge cases.
|
|
266
|
+
const raw = 1 - (distance * distance) / 2;
|
|
267
|
+
scores.set(id, Number.isFinite(raw) ? Math.max(0, raw) : 0);
|
|
239
268
|
}
|
|
240
269
|
return scores;
|
|
241
270
|
}
|
|
@@ -249,15 +278,18 @@ async function substringSearch(query, searchType, limit, stashDir, sources, conf
|
|
|
249
278
|
const assets = await indexAssets(stashDir, searchType);
|
|
250
279
|
const matched = assets.filter((asset) => !query || buildSearchText(asset.entry).includes(query));
|
|
251
280
|
if (!query) {
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
.map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
|
|
281
|
+
const sorted = matched.sort(compareAssets);
|
|
282
|
+
const unique = deduplicateAssetsByPath(sorted);
|
|
283
|
+
return Promise.all(unique.slice(0, limit).map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
|
|
256
284
|
}
|
|
257
285
|
// Score and sort by relevance
|
|
258
286
|
const scored = matched.map((asset) => ({ asset, score: scoreSubstringMatch(asset.entry, query) }));
|
|
259
287
|
scored.sort((a, b) => b.score - a.score || compareAssets(a.asset, b.asset));
|
|
260
|
-
|
|
288
|
+
// Deduplicate by path — keep highest-scored entry per file
|
|
289
|
+
const dedupedScored = deduplicateByPath(scored.map((s) => ({ ...s, filePath: s.asset.path })));
|
|
290
|
+
return Promise.all(dedupedScored
|
|
291
|
+
.slice(0, limit)
|
|
292
|
+
.map(({ asset, score }) => assetToSearchHit(asset, query, stashDir, sources, config, score)));
|
|
261
293
|
}
|
|
262
294
|
function scoreSubstringMatch(entry, query) {
|
|
263
295
|
const tokens = query.split(/\s+/).filter(Boolean);
|
|
@@ -282,16 +314,22 @@ function scoreSubstringMatch(entry, query) {
|
|
|
282
314
|
if (tokens.some((t) => descLower.includes(t))) {
|
|
283
315
|
score += 0.05;
|
|
284
316
|
}
|
|
285
|
-
|
|
317
|
+
// Issue #8: round to 4 decimal places instead of 2
|
|
318
|
+
return Math.round(Math.min(1, score) * 10000) / 10000;
|
|
286
319
|
}
|
|
287
320
|
// ── Hit building ────────────────────────────────────────────────────────────
|
|
288
321
|
export async function buildDbHit(input) {
|
|
289
322
|
const entryStashDir = findSourceForPath(input.path, input.sources)?.path ?? input.defaultStashDir;
|
|
290
323
|
const canonical = deriveCanonicalAssetNameFromStashRoot(input.entry.type, entryStashDir, input.path);
|
|
291
324
|
const refName = canonical && !canonical.startsWith("../") && !canonical.startsWith("..\\") ? canonical : input.entry.name;
|
|
325
|
+
// Issue #1: Quality and confidence boosts are now applied in the main scoring
|
|
326
|
+
// phase (searchDatabase). buildDbHit receives the already-final score and
|
|
327
|
+
// passes it through without further multiplication. We still compute the
|
|
328
|
+
// boost values here for buildWhyMatched reporting.
|
|
292
329
|
const qualityBoost = input.entry.quality === "generated" ? 0 : 0.05;
|
|
293
330
|
const confidenceBoost = typeof input.entry.confidence === "number" ? Math.min(0.05, Math.max(0, input.entry.confidence) * 0.05) : 0;
|
|
294
|
-
|
|
331
|
+
// Issue #8: round to 4 decimal places, no boost multiplication
|
|
332
|
+
const score = Math.round(input.score * 10000) / 10000;
|
|
295
333
|
const whyMatched = buildWhyMatched(input.entry, input.query, input.rankingMode, qualityBoost, confidenceBoost);
|
|
296
334
|
const source = findSourceForPath(input.path, input.sources);
|
|
297
335
|
const editable = isEditable(input.path, input.config);
|
|
@@ -316,13 +354,24 @@ export async function buildDbHit(input) {
|
|
|
316
354
|
}
|
|
317
355
|
return hit;
|
|
318
356
|
}
|
|
319
|
-
export function buildWhyMatched(entry, query,
|
|
320
|
-
|
|
357
|
+
export function buildWhyMatched(entry, query,
|
|
358
|
+
// Issue #15: added "hybrid" ranking mode
|
|
359
|
+
rankingMode, qualityBoost, confidenceBoost) {
|
|
360
|
+
// Issue #15: "hybrid" label for combined FTS+vec results
|
|
361
|
+
const reasons = [
|
|
362
|
+
rankingMode === "hybrid"
|
|
363
|
+
? "hybrid (fts + semantic)"
|
|
364
|
+
: rankingMode === "semantic"
|
|
365
|
+
? "semantic similarity"
|
|
366
|
+
: "fts bm25 relevance",
|
|
367
|
+
];
|
|
321
368
|
const tokens = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
322
369
|
const name = entry.name.toLowerCase();
|
|
323
370
|
const tags = entry.tags?.join(" ").toLowerCase() ?? "";
|
|
324
371
|
const searchHints = entry.searchHints?.join(" ").toLowerCase() ?? "";
|
|
325
372
|
const aliases = entry.aliases?.join(" ").toLowerCase() ?? "";
|
|
373
|
+
// Issue #12: include description in match reasons
|
|
374
|
+
const desc = entry.description?.toLowerCase() ?? "";
|
|
326
375
|
if (tokens.some((t) => name.includes(t)))
|
|
327
376
|
reasons.push("matched name tokens");
|
|
328
377
|
if (tokens.some((t) => tags.includes(t)))
|
|
@@ -331,6 +380,9 @@ export function buildWhyMatched(entry, query, rankingMode, qualityBoost, confide
|
|
|
331
380
|
reasons.push("matched searchHints");
|
|
332
381
|
if (tokens.some((t) => aliases.includes(t)))
|
|
333
382
|
reasons.push("matched aliases");
|
|
383
|
+
// Issue #12: report description matches
|
|
384
|
+
if (tokens.some((t) => desc.includes(t)))
|
|
385
|
+
reasons.push("matched description");
|
|
334
386
|
if (qualityBoost > 0)
|
|
335
387
|
reasons.push("curated metadata boost");
|
|
336
388
|
if (confidenceBoost > 0)
|
|
@@ -413,10 +465,27 @@ async function indexAssets(stashDir, type) {
|
|
|
413
465
|
continue;
|
|
414
466
|
stash = generated;
|
|
415
467
|
}
|
|
468
|
+
// Build a lookup for matching filename-less entries to actual files
|
|
469
|
+
const fileBasenameMap = new Map();
|
|
470
|
+
for (const file of files) {
|
|
471
|
+
const base = path.basename(file, path.extname(file));
|
|
472
|
+
if (!fileBasenameMap.has(base))
|
|
473
|
+
fileBasenameMap.set(base, file);
|
|
474
|
+
}
|
|
416
475
|
for (const entry of stash.entries) {
|
|
417
476
|
if (filterType && entry.type !== filterType)
|
|
418
477
|
continue;
|
|
419
|
-
|
|
478
|
+
let entryPath;
|
|
479
|
+
if (entry.filename) {
|
|
480
|
+
entryPath = path.join(dirPath, entry.filename);
|
|
481
|
+
}
|
|
482
|
+
else {
|
|
483
|
+
// Try matching entry name to a file by basename
|
|
484
|
+
entryPath =
|
|
485
|
+
fileBasenameMap.get(entry.name) ??
|
|
486
|
+
fileBasenameMap.get(entry.name.split("/").pop() ?? "") ??
|
|
487
|
+
(files[0] || dirPath);
|
|
488
|
+
}
|
|
420
489
|
assets.push({ entry, path: entryPath });
|
|
421
490
|
}
|
|
422
491
|
}
|
|
@@ -427,3 +496,31 @@ function compareAssets(a, b) {
|
|
|
427
496
|
return a.entry.type.localeCompare(b.entry.type);
|
|
428
497
|
return a.entry.name.localeCompare(b.entry.name);
|
|
429
498
|
}
|
|
499
|
+
/**
|
|
500
|
+
* Deduplicate scored results by file path, keeping only the highest-scored
|
|
501
|
+
* entry per unique path. Sorts by score descending internally to ensure the
|
|
502
|
+
* precondition is always met regardless of caller (Issue #4).
|
|
503
|
+
*/
|
|
504
|
+
function deduplicateByPath(items) {
|
|
505
|
+
// Issue #4: sort inside to enforce the descending-score precondition
|
|
506
|
+
const sorted = [...items].sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
507
|
+
const seen = new Set();
|
|
508
|
+
return sorted.filter((item) => {
|
|
509
|
+
if (seen.has(item.filePath))
|
|
510
|
+
return false;
|
|
511
|
+
seen.add(item.filePath);
|
|
512
|
+
return true;
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Deduplicate IndexedAsset[] by path, keeping the first (highest-priority) entry.
|
|
517
|
+
*/
|
|
518
|
+
function deduplicateAssetsByPath(assets) {
|
|
519
|
+
const seen = new Set();
|
|
520
|
+
return assets.filter((asset) => {
|
|
521
|
+
if (seen.has(asset.path))
|
|
522
|
+
return false;
|
|
523
|
+
seen.add(asset.path);
|
|
524
|
+
return true;
|
|
525
|
+
});
|
|
526
|
+
}
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import fs from "node:fs";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { fetchWithRetry } from "../common";
|
|
5
|
+
import { ConfigError, NotFoundError, UsageError } from "../errors";
|
|
6
|
+
import { parseFrontmatter, toStringOrUndefined } from "../frontmatter";
|
|
7
|
+
import { extractFrontmatterOnly, extractLineRange, extractSection, formatToc, parseMarkdownToc } from "../markdown";
|
|
8
|
+
import { getRegistryIndexCacheDir } from "../paths";
|
|
9
|
+
import { extractTarGzSecure } from "../registry-install";
|
|
10
|
+
import { registerStashProvider } from "../stash-provider-factory";
|
|
11
|
+
/** Cache TTL before refreshing the mirrored repo (12 hours). */
|
|
12
|
+
const CACHE_TTL_MS = 12 * 60 * 60 * 1000;
|
|
13
|
+
/** Maximum stale age allowed when refresh fails (7 days). */
|
|
14
|
+
const CACHE_STALE_MS = 7 * 24 * 60 * 60 * 1000;
|
|
15
|
+
const CONTEXT_HUB_REF_PREFIX = "context-hub://";
|
|
16
|
+
class ContextHubStashProvider {
|
|
17
|
+
type = "context-hub";
|
|
18
|
+
name;
|
|
19
|
+
repo;
|
|
20
|
+
constructor(config) {
|
|
21
|
+
this.repo = parseContextHubRepoUrl(config.url ?? "");
|
|
22
|
+
this.name = config.name ?? `${this.repo.owner}/${this.repo.repo}`;
|
|
23
|
+
}
|
|
24
|
+
async search(options) {
|
|
25
|
+
try {
|
|
26
|
+
const entries = await this.loadEntries();
|
|
27
|
+
const filtered = entries
|
|
28
|
+
.filter((entry) => matchesType(entry, options.type))
|
|
29
|
+
.map((entry) => ({ entry, score: scoreEntry(entry, options.query) }))
|
|
30
|
+
.filter(({ score }) => options.query.trim() === "" || score > 0)
|
|
31
|
+
.sort((a, b) => b.score - a.score || a.entry.sortName.localeCompare(b.entry.sortName))
|
|
32
|
+
.slice(0, options.limit);
|
|
33
|
+
return {
|
|
34
|
+
hits: filtered.map(({ entry, score }) => entryToHit(entry, score)),
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
catch (err) {
|
|
38
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
39
|
+
return { hits: [], warnings: [`Stash ${this.name}: ${message}`] };
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
async show(ref, view) {
|
|
43
|
+
const filePath = parseContextHubRef(ref);
|
|
44
|
+
const repoDir = await this.loadRepoDir();
|
|
45
|
+
const resolved = resolveCachedFilePath(repoDir, filePath);
|
|
46
|
+
if (!fs.existsSync(resolved) || !fs.statSync(resolved).isFile()) {
|
|
47
|
+
throw new NotFoundError(`Context Hub asset not found: ${filePath}`);
|
|
48
|
+
}
|
|
49
|
+
const raw = fs.readFileSync(resolved, "utf8");
|
|
50
|
+
const parsed = parseFrontmatter(raw);
|
|
51
|
+
const relFromContent = path.posix.normalize(path.relative(path.join(repoDir, "content"), resolved).replace(/\\/g, "/"));
|
|
52
|
+
const author = sanitizeString(relFromContent.split("/")[0] ?? "") || "unknown";
|
|
53
|
+
const name = sanitizeString(toStringOrUndefined(parsed.data.name) ?? path.basename(path.dirname(resolved)));
|
|
54
|
+
const description = sanitizeString(toStringOrUndefined(parsed.data.description), 1000);
|
|
55
|
+
const assetType = path.basename(resolved) === "SKILL.md" ? "skill" : "knowledge";
|
|
56
|
+
const content = renderContentForView(raw, view);
|
|
57
|
+
return {
|
|
58
|
+
type: assetType,
|
|
59
|
+
name: `${author}/${name}`,
|
|
60
|
+
path: ref,
|
|
61
|
+
content,
|
|
62
|
+
description,
|
|
63
|
+
editable: false,
|
|
64
|
+
origin: this.type,
|
|
65
|
+
action: `Context Hub content from ${this.repo.canonicalUrl}`,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
canShow(ref) {
|
|
69
|
+
return ref.trim().startsWith(CONTEXT_HUB_REF_PREFIX);
|
|
70
|
+
}
|
|
71
|
+
async loadEntries() {
|
|
72
|
+
const cachePaths = getCachePaths(this.repo.canonicalUrl);
|
|
73
|
+
const index = await ensureContextHubMirror(this.repo, cachePaths);
|
|
74
|
+
return index.entries;
|
|
75
|
+
}
|
|
76
|
+
async loadRepoDir() {
|
|
77
|
+
const cachePaths = getCachePaths(this.repo.canonicalUrl);
|
|
78
|
+
await ensureContextHubMirror(this.repo, cachePaths, { requireRepoDir: true });
|
|
79
|
+
return cachePaths.repoDir;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
registerStashProvider("context-hub", (config) => new ContextHubStashProvider(config));
|
|
83
|
+
function getCachePaths(repoUrl) {
|
|
84
|
+
const key = createHash("sha256").update(repoUrl).digest("hex").slice(0, 16);
|
|
85
|
+
const rootDir = path.join(getRegistryIndexCacheDir(), `context-hub-${key}`);
|
|
86
|
+
return {
|
|
87
|
+
rootDir,
|
|
88
|
+
archivePath: path.join(rootDir, "repo.tar.gz"),
|
|
89
|
+
repoDir: path.join(rootDir, "repo"),
|
|
90
|
+
indexPath: path.join(rootDir, "index.json"),
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
async function ensureContextHubMirror(repo, cachePaths, options) {
|
|
94
|
+
const requireRepoDir = options?.requireRepoDir === true;
|
|
95
|
+
const cached = readCachedIndex(cachePaths.indexPath);
|
|
96
|
+
if (cached && !isExpired(cached.mtime, CACHE_TTL_MS) && (!requireRepoDir || hasExtractedRepo(cachePaths.repoDir))) {
|
|
97
|
+
return { entries: cached.entries };
|
|
98
|
+
}
|
|
99
|
+
try {
|
|
100
|
+
fs.mkdirSync(cachePaths.rootDir, { recursive: true });
|
|
101
|
+
await downloadArchive(buildTarballUrl(repo), cachePaths.archivePath);
|
|
102
|
+
extractTarGzSecure(cachePaths.archivePath, cachePaths.repoDir);
|
|
103
|
+
const entries = buildContextHubIndex(cachePaths.repoDir);
|
|
104
|
+
writeCachedIndex(cachePaths.indexPath, entries);
|
|
105
|
+
return { entries };
|
|
106
|
+
}
|
|
107
|
+
catch (err) {
|
|
108
|
+
if (cached &&
|
|
109
|
+
!isExpired(cached.mtime, CACHE_STALE_MS) &&
|
|
110
|
+
(!requireRepoDir || hasExtractedRepo(cachePaths.repoDir))) {
|
|
111
|
+
return { entries: cached.entries };
|
|
112
|
+
}
|
|
113
|
+
throw err;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
function hasExtractedRepo(repoDir) {
|
|
117
|
+
try {
|
|
118
|
+
return fs.statSync(repoDir).isDirectory() && fs.statSync(path.join(repoDir, "content")).isDirectory();
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
return false;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
function readCachedIndex(indexPath) {
|
|
125
|
+
try {
|
|
126
|
+
const stat = fs.statSync(indexPath);
|
|
127
|
+
const raw = JSON.parse(fs.readFileSync(indexPath, "utf8"));
|
|
128
|
+
if (!Array.isArray(raw))
|
|
129
|
+
return null;
|
|
130
|
+
const entries = raw.filter(isContextHubEntry);
|
|
131
|
+
return { entries, mtime: stat.mtimeMs };
|
|
132
|
+
}
|
|
133
|
+
catch {
|
|
134
|
+
return null;
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
function writeCachedIndex(indexPath, entries) {
|
|
138
|
+
const dir = path.dirname(indexPath);
|
|
139
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
140
|
+
const tmpPath = `${indexPath}.tmp.${process.pid}.${Math.random().toString(36).slice(2)}`;
|
|
141
|
+
fs.writeFileSync(tmpPath, JSON.stringify(entries), { encoding: "utf8", mode: 0o600 });
|
|
142
|
+
fs.renameSync(tmpPath, indexPath);
|
|
143
|
+
}
|
|
144
|
+
async function downloadArchive(url, destination) {
|
|
145
|
+
const response = await fetchWithRetry(url, undefined, { timeout: 120_000, retries: 1 });
|
|
146
|
+
if (!response.ok) {
|
|
147
|
+
throw new Error(`Failed to download Context Hub archive (${response.status}) from ${url}`);
|
|
148
|
+
}
|
|
149
|
+
const BunRuntime = globalThis.Bun;
|
|
150
|
+
if (BunRuntime?.write) {
|
|
151
|
+
await BunRuntime.write(destination, response);
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
155
|
+
fs.writeFileSync(destination, Buffer.from(arrayBuffer));
|
|
156
|
+
}
|
|
157
|
+
function buildContextHubIndex(repoDir) {
|
|
158
|
+
const contentDir = path.join(repoDir, "content");
|
|
159
|
+
if (!fs.existsSync(contentDir) || !fs.statSync(contentDir).isDirectory()) {
|
|
160
|
+
throw new Error(`Context Hub repo at ${repoDir} is missing a content/ directory`);
|
|
161
|
+
}
|
|
162
|
+
const files = findEntryFiles(contentDir);
|
|
163
|
+
const entries = [];
|
|
164
|
+
for (const filePath of files) {
|
|
165
|
+
const entry = buildEntry(repoDir, contentDir, filePath);
|
|
166
|
+
if (entry)
|
|
167
|
+
entries.push(entry);
|
|
168
|
+
}
|
|
169
|
+
return entries;
|
|
170
|
+
}
|
|
171
|
+
function findEntryFiles(dir) {
|
|
172
|
+
const results = [];
|
|
173
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
174
|
+
const full = path.join(dir, entry.name);
|
|
175
|
+
if (entry.isDirectory()) {
|
|
176
|
+
results.push(...findEntryFiles(full));
|
|
177
|
+
}
|
|
178
|
+
else if (entry.name === "DOC.md" || entry.name === "SKILL.md") {
|
|
179
|
+
results.push(full);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
return results;
|
|
183
|
+
}
|
|
184
|
+
function buildEntry(repoDir, contentDir, fullPath) {
|
|
185
|
+
const raw = fs.readFileSync(fullPath, "utf8");
|
|
186
|
+
const parsed = parseFrontmatter(raw);
|
|
187
|
+
const relPath = path.posix.normalize(path.relative(repoDir, fullPath).replace(/\\/g, "/"));
|
|
188
|
+
const relFromContent = path.posix.normalize(path.relative(contentDir, fullPath).replace(/\\/g, "/"));
|
|
189
|
+
const segments = relFromContent.split("/");
|
|
190
|
+
const author = sanitizeString(segments[0] ?? "");
|
|
191
|
+
if (!author)
|
|
192
|
+
return null;
|
|
193
|
+
const name = sanitizeString(toStringOrUndefined(parsed.data.name) ?? path.basename(path.dirname(fullPath)));
|
|
194
|
+
if (!name)
|
|
195
|
+
return null;
|
|
196
|
+
const metadata = (parsed.data.metadata ?? {});
|
|
197
|
+
const tags = parseCsv(metadata.tags);
|
|
198
|
+
const language = sanitizeString(toStringOrUndefined(metadata.languages));
|
|
199
|
+
const version = sanitizeString(toStringOrUndefined(metadata.versions));
|
|
200
|
+
const id = `${author}/${name}`;
|
|
201
|
+
const assetType = path.basename(fullPath) === "SKILL.md" ? "skill" : "knowledge";
|
|
202
|
+
return {
|
|
203
|
+
id,
|
|
204
|
+
ref: makeContextHubRef(relPath),
|
|
205
|
+
assetType,
|
|
206
|
+
filePath: relPath,
|
|
207
|
+
description: sanitizeString(toStringOrUndefined(parsed.data.description), 1000),
|
|
208
|
+
tags,
|
|
209
|
+
language: language || undefined,
|
|
210
|
+
version: version || undefined,
|
|
211
|
+
sortName: `${id}:${language ?? ""}:${version ?? ""}`,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
function scoreEntry(entry, query) {
|
|
215
|
+
const trimmed = query.trim().toLowerCase();
|
|
216
|
+
if (!trimmed)
|
|
217
|
+
return 1;
|
|
218
|
+
const tokens = trimmed.split(/\s+/).filter(Boolean);
|
|
219
|
+
if (tokens.length === 0)
|
|
220
|
+
return 1;
|
|
221
|
+
const haystacks = [
|
|
222
|
+
{ text: entry.id.toLowerCase(), weight: 4 },
|
|
223
|
+
{ text: entry.description?.toLowerCase() ?? "", weight: 2 },
|
|
224
|
+
{ text: (entry.tags ?? []).join(" ").toLowerCase(), weight: 2 },
|
|
225
|
+
{ text: entry.language?.toLowerCase() ?? "", weight: 1 },
|
|
226
|
+
{ text: entry.version?.toLowerCase() ?? "", weight: 1 },
|
|
227
|
+
];
|
|
228
|
+
let matched = 0;
|
|
229
|
+
let score = 0;
|
|
230
|
+
for (const token of tokens) {
|
|
231
|
+
let tokenScore = 0;
|
|
232
|
+
for (const { text, weight } of haystacks) {
|
|
233
|
+
if (!text)
|
|
234
|
+
continue;
|
|
235
|
+
if (text === token)
|
|
236
|
+
tokenScore = Math.max(tokenScore, weight * 2);
|
|
237
|
+
else if (text.includes(token))
|
|
238
|
+
tokenScore = Math.max(tokenScore, weight);
|
|
239
|
+
}
|
|
240
|
+
if (tokenScore > 0) {
|
|
241
|
+
matched++;
|
|
242
|
+
score += tokenScore;
|
|
243
|
+
}
|
|
244
|
+
}
|
|
245
|
+
if (matched === 0)
|
|
246
|
+
return 0;
|
|
247
|
+
const coverage = matched / tokens.length;
|
|
248
|
+
return Math.round((score * coverage + (entry.id.toLowerCase() === trimmed ? 5 : 0)) * 1000) / 1000;
|
|
249
|
+
}
|
|
250
|
+
function matchesType(entry, requested) {
|
|
251
|
+
if (!requested || requested === "any")
|
|
252
|
+
return true;
|
|
253
|
+
return entry.assetType === requested;
|
|
254
|
+
}
|
|
255
|
+
function entryToHit(entry, score) {
|
|
256
|
+
const details = [entry.language, entry.version].filter(Boolean).join(" • ");
|
|
257
|
+
const description = [entry.description, details].filter(Boolean).join(" — ") || undefined;
|
|
258
|
+
return {
|
|
259
|
+
type: entry.assetType,
|
|
260
|
+
name: entry.id,
|
|
261
|
+
path: entry.ref,
|
|
262
|
+
ref: entry.ref,
|
|
263
|
+
origin: "context-hub",
|
|
264
|
+
editable: false,
|
|
265
|
+
description,
|
|
266
|
+
tags: entry.tags,
|
|
267
|
+
action: `akm show ${entry.ref}`,
|
|
268
|
+
score,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
function renderContentForView(content, view) {
|
|
272
|
+
if (!view || view.mode === "full")
|
|
273
|
+
return content;
|
|
274
|
+
switch (view.mode) {
|
|
275
|
+
case "toc":
|
|
276
|
+
return formatToc(parseMarkdownToc(content));
|
|
277
|
+
case "frontmatter":
|
|
278
|
+
return extractFrontmatterOnly(content) ?? "(no frontmatter)";
|
|
279
|
+
case "section": {
|
|
280
|
+
const section = extractSection(content, view.heading);
|
|
281
|
+
if (!section) {
|
|
282
|
+
throw new UsageError(`Section not found: ${view.heading}`);
|
|
283
|
+
}
|
|
284
|
+
return section.content;
|
|
285
|
+
}
|
|
286
|
+
case "lines":
|
|
287
|
+
return extractLineRange(content, view.start, view.end);
|
|
288
|
+
default:
|
|
289
|
+
return content;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
function resolveCachedFilePath(repoDir, filePath) {
|
|
293
|
+
const normalized = path.posix.normalize(filePath.replace(/\\/g, "/"));
|
|
294
|
+
if (!normalized.startsWith("content/")) {
|
|
295
|
+
throw new UsageError(`Invalid Context Hub ref: ${filePath}`);
|
|
296
|
+
}
|
|
297
|
+
const resolved = path.resolve(repoDir, normalized);
|
|
298
|
+
const root = path.resolve(repoDir);
|
|
299
|
+
if (!resolved.startsWith(root + path.sep)) {
|
|
300
|
+
throw new UsageError(`Invalid Context Hub ref: ${filePath}`);
|
|
301
|
+
}
|
|
302
|
+
return resolved;
|
|
303
|
+
}
|
|
304
|
+
function buildTarballUrl(repo) {
|
|
305
|
+
return `https://github.com/${repo.owner}/${repo.repo}/archive/refs/heads/${repo.ref}.tar.gz`;
|
|
306
|
+
}
|
|
307
|
+
function parseContextHubRepoUrl(rawUrl) {
|
|
308
|
+
if (!rawUrl) {
|
|
309
|
+
throw new ConfigError("Context Hub provider requires a GitHub repository URL");
|
|
310
|
+
}
|
|
311
|
+
let parsed;
|
|
312
|
+
try {
|
|
313
|
+
parsed = new URL(rawUrl);
|
|
314
|
+
}
|
|
315
|
+
catch {
|
|
316
|
+
throw new ConfigError(`Context Hub URL is not valid: "${rawUrl}"`);
|
|
317
|
+
}
|
|
318
|
+
if (parsed.protocol !== "https:") {
|
|
319
|
+
throw new ConfigError(`Context Hub URL must use https://, got "${parsed.protocol}"`);
|
|
320
|
+
}
|
|
321
|
+
if (parsed.hostname !== "github.com") {
|
|
322
|
+
throw new ConfigError(`Context Hub provider only supports github.com URLs, got "${parsed.hostname}"`);
|
|
323
|
+
}
|
|
324
|
+
const segments = parsed.pathname.split("/").filter(Boolean);
|
|
325
|
+
if (segments.length < 2) {
|
|
326
|
+
throw new ConfigError(`Context Hub URL must point to a GitHub repository, got "${rawUrl}"`);
|
|
327
|
+
}
|
|
328
|
+
const owner = sanitizeString(segments[0]);
|
|
329
|
+
const repo = sanitizeString(segments[1].replace(/\.git$/i, ""));
|
|
330
|
+
let ref = "main";
|
|
331
|
+
if (segments[2] === "tree" && segments.length >= 4) {
|
|
332
|
+
ref = sanitizeString(segments.slice(3).join("/"), 255) || "main";
|
|
333
|
+
}
|
|
334
|
+
if (!owner || !repo || !/^[A-Za-z0-9_.-]+$/.test(owner) || !/^[A-Za-z0-9_.-]+$/.test(repo)) {
|
|
335
|
+
throw new ConfigError(`Unsupported Context Hub repository URL: "${rawUrl}"`);
|
|
336
|
+
}
|
|
337
|
+
if (!ref || ref.includes("..") || !/^[A-Za-z0-9._/-]+$/.test(ref)) {
|
|
338
|
+
throw new ConfigError(`Unsupported Context Hub branch/ref in URL: "${rawUrl}"`);
|
|
339
|
+
}
|
|
340
|
+
return {
|
|
341
|
+
owner,
|
|
342
|
+
repo,
|
|
343
|
+
ref,
|
|
344
|
+
canonicalUrl: `https://github.com/${owner}/${repo}/tree/${ref}`,
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
function makeContextHubRef(filePath) {
|
|
348
|
+
return `${CONTEXT_HUB_REF_PREFIX}${path.posix.normalize(filePath)}`;
|
|
349
|
+
}
|
|
350
|
+
function parseContextHubRef(ref) {
|
|
351
|
+
const trimmed = ref.trim();
|
|
352
|
+
if (!trimmed.startsWith(CONTEXT_HUB_REF_PREFIX)) {
|
|
353
|
+
throw new UsageError(`Invalid Context Hub ref: ${ref}`);
|
|
354
|
+
}
|
|
355
|
+
const filePath = trimmed.slice(CONTEXT_HUB_REF_PREFIX.length);
|
|
356
|
+
if (!filePath) {
|
|
357
|
+
throw new UsageError(`Invalid Context Hub ref: ${ref}`);
|
|
358
|
+
}
|
|
359
|
+
return filePath;
|
|
360
|
+
}
|
|
361
|
+
function parseCsv(value) {
|
|
362
|
+
if (typeof value !== "string")
|
|
363
|
+
return undefined;
|
|
364
|
+
const items = value
|
|
365
|
+
.split(",")
|
|
366
|
+
.map((item) => sanitizeString(item.trim(), 100))
|
|
367
|
+
.filter(Boolean);
|
|
368
|
+
return items.length > 0 ? items : undefined;
|
|
369
|
+
}
|
|
370
|
+
function sanitizeString(value, maxLength = 255) {
|
|
371
|
+
if (typeof value !== "string")
|
|
372
|
+
return "";
|
|
373
|
+
// biome-ignore lint/suspicious/noControlCharactersInRegex: strips untrusted control chars from remote metadata
|
|
374
|
+
return value.replace(/[\u0000-\u001f\u007f]/g, "").slice(0, maxLength);
|
|
375
|
+
}
|
|
376
|
+
function isExpired(mtimeMs, ttlMs) {
|
|
377
|
+
return Date.now() - mtimeMs > ttlMs;
|
|
378
|
+
}
|
|
379
|
+
function isContextHubEntry(value) {
|
|
380
|
+
if (typeof value !== "object" || value === null || Array.isArray(value))
|
|
381
|
+
return false;
|
|
382
|
+
const obj = value;
|
|
383
|
+
return (typeof obj.id === "string" &&
|
|
384
|
+
typeof obj.ref === "string" &&
|
|
385
|
+
(obj.assetType === "knowledge" || obj.assetType === "skill") &&
|
|
386
|
+
typeof obj.filePath === "string" &&
|
|
387
|
+
typeof obj.sortName === "string");
|
|
388
|
+
}
|
|
389
|
+
export { ContextHubStashProvider, buildContextHubIndex, makeContextHubRef, parseContextHubRef, parseContextHubRepoUrl };
|
package/dist/stash-search.js
CHANGED
|
@@ -128,12 +128,37 @@ export function registerActionBuilder(type, builder) {
|
|
|
128
128
|
// Re-export for consumers that were already importing from stash-search
|
|
129
129
|
export { buildLocalAction, rendererForType };
|
|
130
130
|
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
131
|
-
|
|
131
|
+
/**
|
|
132
|
+
* Merge hits from local stash and additional providers using Reciprocal Rank
|
|
133
|
+
* Fusion (RRF). Each list is already internally sorted by relevance. RRF
|
|
134
|
+
* assigns scores based on rank position rather than raw score values, so
|
|
135
|
+
* sources with incompatible score scales (e.g. RRF ~0.01-0.03 vs 0-1 or
|
|
136
|
+
* 0-100) are merged fairly.
|
|
137
|
+
*/
|
|
138
|
+
export function mergeStashHits(localHits, additionalHits, limit) {
|
|
132
139
|
if (additionalHits.length === 0)
|
|
133
140
|
return localHits.slice(0, limit);
|
|
134
|
-
const
|
|
135
|
-
|
|
136
|
-
|
|
141
|
+
const RRF_K = 60;
|
|
142
|
+
const scoreMap = new Map();
|
|
143
|
+
const applyRankedList = (hits) => {
|
|
144
|
+
for (let i = 0; i < hits.length; i++) {
|
|
145
|
+
const key = hits[i].path ?? hits[i].ref ?? hits[i].name;
|
|
146
|
+
const rrf = 1 / (RRF_K + i + 1);
|
|
147
|
+
const existing = scoreMap.get(key);
|
|
148
|
+
if (existing) {
|
|
149
|
+
existing.score += rrf;
|
|
150
|
+
}
|
|
151
|
+
else {
|
|
152
|
+
scoreMap.set(key, { hit: hits[i], score: rrf });
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
};
|
|
156
|
+
applyRankedList(localHits);
|
|
157
|
+
applyRankedList(additionalHits);
|
|
158
|
+
return [...scoreMap.values()]
|
|
159
|
+
.sort((a, b) => b.score - a.score)
|
|
160
|
+
.slice(0, limit)
|
|
161
|
+
.map((v) => ({ ...v.hit, score: Math.round(v.score * 10000) / 10000 }));
|
|
137
162
|
}
|
|
138
163
|
function normalizeLimit(limit) {
|
|
139
164
|
if (typeof limit !== "number" || Number.isNaN(limit) || limit <= 0) {
|
|
@@ -151,8 +176,46 @@ export function parseSearchSource(source) {
|
|
|
151
176
|
return "stash";
|
|
152
177
|
throw new UsageError(`Invalid value for --source: ${String(source)}. Expected one of: stash|registry|both`);
|
|
153
178
|
}
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
179
|
+
/**
|
|
180
|
+
* Merge stash hits and registry hits using RRF, same rationale as mergeStashHits.
|
|
181
|
+
*/
|
|
182
|
+
export function mergeSearchHits(localHits, registryHits, limit) {
|
|
183
|
+
if (registryHits.length === 0)
|
|
184
|
+
return localHits.slice(0, limit);
|
|
185
|
+
if (localHits.length === 0)
|
|
186
|
+
return registryHits.slice(0, limit);
|
|
187
|
+
const RRF_K = 60;
|
|
188
|
+
const scoreMap = new Map();
|
|
189
|
+
const applyStashList = (hits) => {
|
|
190
|
+
for (let i = 0; i < hits.length; i++) {
|
|
191
|
+
const key = hits[i].path ?? hits[i].ref ?? hits[i].name;
|
|
192
|
+
const rrf = 1 / (RRF_K + i + 1);
|
|
193
|
+
const existing = scoreMap.get(key);
|
|
194
|
+
if (existing) {
|
|
195
|
+
existing.score += rrf;
|
|
196
|
+
}
|
|
197
|
+
else {
|
|
198
|
+
scoreMap.set(key, { hit: hits[i], score: rrf });
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
};
|
|
202
|
+
const applyRegistryList = (hits) => {
|
|
203
|
+
for (let i = 0; i < hits.length; i++) {
|
|
204
|
+
const key = `registry:${hits[i].id ?? hits[i].name}`;
|
|
205
|
+
const rrf = 1 / (RRF_K + i + 1);
|
|
206
|
+
const existing = scoreMap.get(key);
|
|
207
|
+
if (existing) {
|
|
208
|
+
existing.score += rrf;
|
|
209
|
+
}
|
|
210
|
+
else {
|
|
211
|
+
scoreMap.set(key, { hit: hits[i], score: rrf });
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
};
|
|
215
|
+
applyStashList(localHits);
|
|
216
|
+
applyRegistryList(registryHits);
|
|
217
|
+
return [...scoreMap.values()]
|
|
218
|
+
.sort((a, b) => b.score - a.score)
|
|
219
|
+
.slice(0, limit)
|
|
220
|
+
.map((v) => ({ ...v.hit, score: Math.round(v.score * 10000) / 10000 }));
|
|
158
221
|
}
|