akm-cli 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -1
- package/dist/cli.js +26 -0
- package/dist/db.js +14 -9
- package/dist/detect.js +120 -0
- package/dist/embedder.js +18 -3
- package/dist/indexer.js +59 -1
- package/dist/local-search.js +120 -23
- package/dist/setup.js +506 -0
- package/dist/stash-providers/context-hub.js +390 -0
- package/dist/stash-providers/index.js +1 -0
- package/dist/stash-search.js +71 -8
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -25,12 +25,15 @@ Upgrade in place with `akm upgrade`.
|
|
|
25
25
|
## Quick Start
|
|
26
26
|
|
|
27
27
|
```sh
|
|
28
|
-
akm
|
|
28
|
+
akm setup # Guided setup: configure, initialize, and index
|
|
29
29
|
akm add github:owner/repo # Add a kit from GitHub
|
|
30
30
|
akm search "deploy" # Find assets
|
|
31
31
|
akm show script:deploy.sh # View details and run command
|
|
32
32
|
```
|
|
33
33
|
|
|
34
|
+
If you want to skip the wizard, `akm init --dir ~/custom-stash` initializes the
|
|
35
|
+
working stash at a custom path.
|
|
36
|
+
|
|
34
37
|
## Features
|
|
35
38
|
|
|
36
39
|
### Works with Any AI Agent
|
package/dist/cli.js
CHANGED
|
@@ -41,6 +41,8 @@ const pkgVersion = (() => {
|
|
|
41
41
|
const OUTPUT_FORMATS = ["json", "yaml", "text"];
|
|
42
42
|
const DETAIL_LEVELS = ["brief", "normal", "full"];
|
|
43
43
|
const NORMAL_DESCRIPTION_LIMIT = 250;
|
|
44
|
+
const CONTEXT_HUB_ALIAS_REF = "context-hub";
|
|
45
|
+
const CONTEXT_HUB_ALIAS_URL = "https://github.com/andrewyng/context-hub";
|
|
44
46
|
function hasBunYAML(b) {
|
|
45
47
|
// biome-ignore lint/suspicious/noExplicitAny: type guard for runtime feature detection
|
|
46
48
|
return typeof b.YAML?.stringify === "function";
|
|
@@ -403,6 +405,18 @@ function formatSearchPlain(r, detail) {
|
|
|
403
405
|
* - registry-* : Kit discovery from remote registries (npm, GitHub)
|
|
404
406
|
* - installed-kits : Management of kits already installed locally
|
|
405
407
|
*/
|
|
408
|
+
const setupCommand = defineCommand({
|
|
409
|
+
meta: {
|
|
410
|
+
name: "setup",
|
|
411
|
+
description: "Interactive configuration wizard for embeddings, LLM, registries, and stash sources",
|
|
412
|
+
},
|
|
413
|
+
async run() {
|
|
414
|
+
await runWithJsonErrors(async () => {
|
|
415
|
+
const { runSetupWizard } = await import("./setup");
|
|
416
|
+
await runSetupWizard();
|
|
417
|
+
});
|
|
418
|
+
},
|
|
419
|
+
});
|
|
406
420
|
const initCommand = defineCommand({
|
|
407
421
|
meta: {
|
|
408
422
|
name: "init",
|
|
@@ -468,6 +482,15 @@ const addCommand = defineCommand({
|
|
|
468
482
|
},
|
|
469
483
|
async run({ args }) {
|
|
470
484
|
await runWithJsonErrors(async () => {
|
|
485
|
+
if (args.ref.trim() === CONTEXT_HUB_ALIAS_REF) {
|
|
486
|
+
const result = addStash({
|
|
487
|
+
target: CONTEXT_HUB_ALIAS_URL,
|
|
488
|
+
providerType: "context-hub",
|
|
489
|
+
name: "context-hub",
|
|
490
|
+
});
|
|
491
|
+
output("stash-add", result);
|
|
492
|
+
return;
|
|
493
|
+
}
|
|
471
494
|
const result = await akmAdd({ ref: args.ref });
|
|
472
495
|
output("add", result);
|
|
473
496
|
});
|
|
@@ -966,6 +989,7 @@ const main = defineCommand({
|
|
|
966
989
|
quiet: { type: "boolean", alias: "q", description: "Suppress stderr warnings", default: false },
|
|
967
990
|
},
|
|
968
991
|
subCommands: {
|
|
992
|
+
setup: setupCommand,
|
|
969
993
|
init: initCommand,
|
|
970
994
|
index: indexCommand,
|
|
971
995
|
add: addCommand,
|
|
@@ -1143,6 +1167,7 @@ akm search "<query>" --type skill # Filter by type
|
|
|
1143
1167
|
akm search "<query>" --source both # Also search registries for installable kits
|
|
1144
1168
|
akm show <ref> # View asset details
|
|
1145
1169
|
akm add <ref> # Install a kit (npm, GitHub, git, local)
|
|
1170
|
+
akm add context-hub # Shortcut for adding Context Hub as a stash provider
|
|
1146
1171
|
akm clone <ref> # Copy an asset to the working stash (optional --dest arg to clone to specific location)
|
|
1147
1172
|
akm registry search "<query>" # Search all registries
|
|
1148
1173
|
\`\`\`
|
|
@@ -1213,6 +1238,7 @@ akm add <ref> # Install a kit (smart router: loc
|
|
|
1213
1238
|
akm add @scope/kit # From npm
|
|
1214
1239
|
akm add owner/repo # From GitHub
|
|
1215
1240
|
akm add ./path/to/local/kit # From local directory (adds as stash)
|
|
1241
|
+
akm add context-hub # Add the official Context Hub stash
|
|
1216
1242
|
akm kit add <ref> # Install a kit (explicit)
|
|
1217
1243
|
akm kit list # List installed kits
|
|
1218
1244
|
akm kit remove <target> # Remove a kit
|
package/dist/db.js
CHANGED
|
@@ -385,17 +385,22 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
385
385
|
return [];
|
|
386
386
|
}
|
|
387
387
|
}
|
|
388
|
-
function sanitizeFtsQuery(query) {
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
388
|
+
export function sanitizeFtsQuery(query) {
|
|
389
|
+
// Allow only characters safe in FTS5 queries: letters, digits, underscores,
|
|
390
|
+
// and whitespace. Everything else (hyphens, dots, quotes, parens, asterisks,
|
|
391
|
+
// colons, carets, @, !, etc.) is replaced with a space so that compound
|
|
392
|
+
// identifiers like "code-review" or "k8s.setup" become AND-joined tokens
|
|
393
|
+
// ("code review", "k8s setup") rather than triggering FTS5 syntax errors.
|
|
394
|
+
let sanitized = query.replace(/[^a-zA-Z0-9_\s]/g, " ");
|
|
395
|
+
// Neutralize the NEAR operator (FTS5 proximity syntax)
|
|
396
|
+
sanitized = sanitized.replace(/\bNEAR\b/g, " ");
|
|
397
|
+
const tokens = sanitized.split(/\s+/).filter((t) => t.length >= 1);
|
|
393
398
|
if (tokens.length === 0)
|
|
394
399
|
return "";
|
|
395
|
-
//
|
|
396
|
-
//
|
|
397
|
-
//
|
|
398
|
-
return tokens.join("
|
|
400
|
+
// Use implicit AND (space-separated tokens) for precision. FTS5 treats
|
|
401
|
+
// space-separated tokens as an implicit AND, matching only rows that
|
|
402
|
+
// contain ALL terms.
|
|
403
|
+
return tokens.join(" ");
|
|
399
404
|
}
|
|
400
405
|
// ── All entries ─────────────────────────────────────────────────────────────
|
|
401
406
|
export function getAllEntries(db, entryType) {
|
package/dist/detect.js
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Service detection utilities for the setup wizard.
|
|
3
|
+
*
|
|
4
|
+
* Pure detection functions with no user interaction — each returns
|
|
5
|
+
* a result object describing what was found.
|
|
6
|
+
*/
|
|
7
|
+
import fs from "node:fs";
|
|
8
|
+
import path from "node:path";
|
|
9
|
+
// ── Ollama Detection ────────────────────────────────────────────────────────
|
|
10
|
+
const OLLAMA_BASE = "http://localhost:11434";
|
|
11
|
+
/**
|
|
12
|
+
* Detect if Ollama is running and list available models.
|
|
13
|
+
*
|
|
14
|
+
* Tries the HTTP API first (`/api/tags`), then falls back to `ollama list`
|
|
15
|
+
* via subprocess. Returns available models sorted alphabetically.
|
|
16
|
+
*/
|
|
17
|
+
export async function detectOllama() {
|
|
18
|
+
const result = { available: false, models: [], endpoint: OLLAMA_BASE };
|
|
19
|
+
// Try HTTP API first
|
|
20
|
+
try {
|
|
21
|
+
const response = await fetch(`${OLLAMA_BASE}/api/tags`, {
|
|
22
|
+
signal: AbortSignal.timeout(3000),
|
|
23
|
+
});
|
|
24
|
+
if (response.ok) {
|
|
25
|
+
const data = (await response.json());
|
|
26
|
+
if (Array.isArray(data.models)) {
|
|
27
|
+
result.models = data.models
|
|
28
|
+
.map((m) => (typeof m.name === "string" ? m.name.replace(/:latest$/, "") : ""))
|
|
29
|
+
.filter(Boolean)
|
|
30
|
+
.sort();
|
|
31
|
+
result.available = true;
|
|
32
|
+
return result;
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
catch {
|
|
37
|
+
// HTTP failed — try CLI fallback
|
|
38
|
+
}
|
|
39
|
+
// CLI fallback
|
|
40
|
+
try {
|
|
41
|
+
const proc = Bun.spawn(["ollama", "list"], {
|
|
42
|
+
stdout: "pipe",
|
|
43
|
+
stderr: "pipe",
|
|
44
|
+
});
|
|
45
|
+
const text = await new Response(proc.stdout).text();
|
|
46
|
+
const exitCode = await proc.exited;
|
|
47
|
+
if (exitCode === 0 && text.trim()) {
|
|
48
|
+
const lines = text.trim().split("\n").slice(1); // skip header
|
|
49
|
+
result.models = lines
|
|
50
|
+
.map((line) => {
|
|
51
|
+
const name = line.split(/\s+/)[0]?.replace(/:latest$/, "");
|
|
52
|
+
return name || "";
|
|
53
|
+
})
|
|
54
|
+
.filter(Boolean)
|
|
55
|
+
.sort();
|
|
56
|
+
result.available = true;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
// Ollama not installed or not in PATH
|
|
61
|
+
}
|
|
62
|
+
return result;
|
|
63
|
+
}
|
|
64
|
+
// ── Agent Platform Detection ────────────────────────────────────────────────
|
|
65
|
+
const AGENT_PLATFORMS = [
|
|
66
|
+
{ name: "Claude Code", relPath: ".claude" },
|
|
67
|
+
{ name: "OpenCode", relPath: ".config/opencode" },
|
|
68
|
+
{ name: "Continue", relPath: ".continue" },
|
|
69
|
+
{ name: "Codeium / Windsurf", relPath: ".codeium" },
|
|
70
|
+
{ name: "Cursor", relPath: ".cursor" },
|
|
71
|
+
{ name: "Codex CLI", relPath: ".codex" },
|
|
72
|
+
];
|
|
73
|
+
/**
|
|
74
|
+
* Scan the user's home directory for known agent platform config directories.
|
|
75
|
+
* Supports both HOME (Unix) and USERPROFILE (Windows).
|
|
76
|
+
*/
|
|
77
|
+
export function detectAgentPlatforms() {
|
|
78
|
+
const home = process.env.HOME?.trim() || process.env.USERPROFILE?.trim();
|
|
79
|
+
if (!home)
|
|
80
|
+
return [];
|
|
81
|
+
return AGENT_PLATFORMS.filter((p) => {
|
|
82
|
+
const fullPath = path.join(home, p.relPath);
|
|
83
|
+
try {
|
|
84
|
+
return fs.statSync(fullPath).isDirectory();
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
return false;
|
|
88
|
+
}
|
|
89
|
+
}).map((p) => ({
|
|
90
|
+
name: p.name,
|
|
91
|
+
path: path.join(home, p.relPath),
|
|
92
|
+
}));
|
|
93
|
+
}
|
|
94
|
+
// ── OpenViking Detection ────────────────────────────────────────────────────
|
|
95
|
+
/**
|
|
96
|
+
* Check if an OpenViking server is reachable at the given URL.
|
|
97
|
+
* Uses the lightweight /api/v1/fs/stat endpoint (GET) rather than
|
|
98
|
+
* the search endpoint which requires a running search index.
|
|
99
|
+
*/
|
|
100
|
+
export async function detectOpenViking(url) {
|
|
101
|
+
const normalized = url.replace(/\/+$/, "");
|
|
102
|
+
try {
|
|
103
|
+
// Any HTTP response (even non-2xx) from the API endpoint means the server is reachable.
|
|
104
|
+
// Only network errors / timeouts indicate the server is truly unavailable.
|
|
105
|
+
await fetch(`${normalized}/api/v1/fs/stat?uri=${encodeURIComponent("viking://")}`, {
|
|
106
|
+
signal: AbortSignal.timeout(5000),
|
|
107
|
+
});
|
|
108
|
+
return { available: true, url: normalized };
|
|
109
|
+
}
|
|
110
|
+
catch {
|
|
111
|
+
// stat endpoint unreachable — try root URL as fallback
|
|
112
|
+
try {
|
|
113
|
+
await fetch(normalized, { signal: AbortSignal.timeout(5000) });
|
|
114
|
+
return { available: true, url: normalized };
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
return { available: false, url: normalized };
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
package/dist/embedder.js
CHANGED
|
@@ -30,6 +30,19 @@ async function embedLocal(text) {
|
|
|
30
30
|
const result = await model(text, { pooling: "mean", normalize: true });
|
|
31
31
|
return Array.from(result.data);
|
|
32
32
|
}
|
|
33
|
+
// ── Vector normalization ─────────────────────────────────────────────────────
|
|
34
|
+
/**
|
|
35
|
+
* L2-normalize a vector to unit length.
|
|
36
|
+
* Required for remote embeddings because the scoring pipeline's L2-to-cosine
|
|
37
|
+
* conversion formula (1 - distance^2/2) is only correct for unit vectors.
|
|
38
|
+
* The local embedder already normalizes via `normalize: true`.
|
|
39
|
+
*/
|
|
40
|
+
function l2Normalize(vec) {
|
|
41
|
+
const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
|
|
42
|
+
if (norm === 0)
|
|
43
|
+
return vec;
|
|
44
|
+
return vec.map((v) => v / norm);
|
|
45
|
+
}
|
|
33
46
|
// ── OpenAI-compatible remote embedder ───────────────────────────────────────
|
|
34
47
|
async function embedRemote(text, config) {
|
|
35
48
|
const headers = { "Content-Type": "application/json" };
|
|
@@ -56,7 +69,7 @@ async function embedRemote(text, config) {
|
|
|
56
69
|
if (!json.data?.[0]?.embedding) {
|
|
57
70
|
throw new Error("Unexpected embedding response format: missing data[0].embedding");
|
|
58
71
|
}
|
|
59
|
-
return json.data[0].embedding;
|
|
72
|
+
return l2Normalize(json.data[0].embedding);
|
|
60
73
|
}
|
|
61
74
|
// ── Public API ──────────────────────────────────────────────────────────────
|
|
62
75
|
/**
|
|
@@ -118,11 +131,13 @@ async function embedRemoteBatch(texts, config) {
|
|
|
118
131
|
if (!json.data || json.data.length !== batch.length) {
|
|
119
132
|
throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}`);
|
|
120
133
|
}
|
|
121
|
-
|
|
134
|
+
// Sort by index to guarantee correct order (OpenAI API doesn't guarantee order)
|
|
135
|
+
const sorted = [...json.data].sort((a, b) => a.index - b.index);
|
|
136
|
+
for (const [idx, d] of sorted.entries()) {
|
|
122
137
|
if (!Array.isArray(d.embedding)) {
|
|
123
138
|
throw new Error(`Unexpected embedding at batch index ${idx}: missing or invalid`);
|
|
124
139
|
}
|
|
125
|
-
results.push(d.embedding);
|
|
140
|
+
results.push(l2Normalize(d.embedding));
|
|
126
141
|
}
|
|
127
142
|
}
|
|
128
143
|
return results;
|
package/dist/indexer.js
CHANGED
|
@@ -162,6 +162,13 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
|
|
|
162
162
|
}
|
|
163
163
|
}
|
|
164
164
|
// Phase 2 (sync): write all pre-generated metadata inside a single transaction.
|
|
165
|
+
//
|
|
166
|
+
// Cross-stash dedup: track indexed assets by content identity
|
|
167
|
+
// (type + filename + description) so the same asset from a lower-priority
|
|
168
|
+
// stash root is skipped when a higher-priority root already covers it.
|
|
169
|
+
// Sources are ordered by priority (primary stash first), so the first
|
|
170
|
+
// occurrence wins.
|
|
171
|
+
const indexedAssetIdentities = new Set();
|
|
165
172
|
const insertTransaction = db.transaction(() => {
|
|
166
173
|
// HI-5: Perform the full-rebuild wipe as the FIRST step of the insert
|
|
167
174
|
// transaction so delete and re-insert are atomic — a concurrent reader
|
|
@@ -190,8 +197,20 @@ async function indexEntries(db, allStashDirs, _stashDir, isIncremental, builtAtM
|
|
|
190
197
|
// Delete old entries for this dir (will be re-inserted)
|
|
191
198
|
deleteEntriesByDir(db, dirPath);
|
|
192
199
|
if (stash) {
|
|
200
|
+
// Build a lookup for matching filename-less entries to actual files
|
|
201
|
+
const fileBasenameMap = buildFileBasenameMap(files);
|
|
193
202
|
for (const entry of stash.entries) {
|
|
194
|
-
const entryPath = entry.filename
|
|
203
|
+
const entryPath = entry.filename
|
|
204
|
+
? path.join(dirPath, entry.filename)
|
|
205
|
+
: matchEntryToFile(entry.name, fileBasenameMap, files);
|
|
206
|
+
if (!entryPath)
|
|
207
|
+
continue; // skip unresolvable entries
|
|
208
|
+
// Skip if a higher-priority stash root already indexed this asset
|
|
209
|
+
const basename = path.basename(entryPath);
|
|
210
|
+
const identityKey = `${entry.type}\0${basename}\0${entry.description ?? ""}`;
|
|
211
|
+
if (indexedAssetIdentities.has(identityKey))
|
|
212
|
+
continue;
|
|
213
|
+
indexedAssetIdentities.add(identityKey);
|
|
195
214
|
const entryKey = `${currentStashDir}:${entry.type}:${entry.name}`;
|
|
196
215
|
const searchText = buildSearchText(entry);
|
|
197
216
|
const entryWithSize = attachFileSize(entry, entryPath);
|
|
@@ -335,6 +354,43 @@ async function enhanceStashWithLlm(llmConfig, stash, _dirPath, files) {
|
|
|
335
354
|
}
|
|
336
355
|
return { entries: enhanced };
|
|
337
356
|
}
|
|
357
|
+
/**
|
|
358
|
+
* Build a map from base filename (without extension) to full path for quick lookups.
|
|
359
|
+
*/
|
|
360
|
+
export function buildFileBasenameMap(files) {
|
|
361
|
+
const map = new Map();
|
|
362
|
+
for (const file of files) {
|
|
363
|
+
const base = path.basename(file, path.extname(file));
|
|
364
|
+
// Only keep first match per base name to avoid ambiguity
|
|
365
|
+
if (!map.has(base))
|
|
366
|
+
map.set(base, file);
|
|
367
|
+
}
|
|
368
|
+
return map;
|
|
369
|
+
}
|
|
370
|
+
/**
|
|
371
|
+
* Try to match a filename-less entry to an actual file in the directory.
|
|
372
|
+
*
|
|
373
|
+
* Matching strategy (in priority order):
|
|
374
|
+
* 1. Exact basename match: entry.name === filename without extension
|
|
375
|
+
* 2. Last path segment match: for entries with names like "dir/sub-entry",
|
|
376
|
+
* try matching the last segment
|
|
377
|
+
* 3. Fallback: first file in the directory, or null if no files are available
|
|
378
|
+
*/
|
|
379
|
+
export function matchEntryToFile(entryName, fileMap, files) {
|
|
380
|
+
// Exact match on entry name
|
|
381
|
+
const exact = fileMap.get(entryName);
|
|
382
|
+
if (exact)
|
|
383
|
+
return exact;
|
|
384
|
+
// Try last segment for hierarchical names (e.g. "corpus/agentic-patterns/foo")
|
|
385
|
+
const lastSegment = entryName.split("/").pop() ?? entryName;
|
|
386
|
+
if (lastSegment !== entryName) {
|
|
387
|
+
const segmentMatch = fileMap.get(lastSegment);
|
|
388
|
+
if (segmentMatch)
|
|
389
|
+
return segmentMatch;
|
|
390
|
+
}
|
|
391
|
+
// Fallback to first file, or null if no files are available
|
|
392
|
+
return files[0] || null;
|
|
393
|
+
}
|
|
338
394
|
export function buildSearchText(entry) {
|
|
339
395
|
const parts = [entry.name.replace(/[-_]/g, " ")];
|
|
340
396
|
if (entry.description)
|
|
@@ -347,6 +403,8 @@ export function buildSearchText(entry) {
|
|
|
347
403
|
parts.push(entry.aliases.join(" "));
|
|
348
404
|
if (entry.searchHints)
|
|
349
405
|
parts.push(entry.searchHints.join(" "));
|
|
406
|
+
if (entry.usage)
|
|
407
|
+
parts.push(entry.usage.join(" "));
|
|
350
408
|
if (entry.intent) {
|
|
351
409
|
if (entry.intent.when)
|
|
352
410
|
parts.push(entry.intent.when);
|
package/dist/local-search.js
CHANGED
|
@@ -99,7 +99,15 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
99
99
|
if (!query) {
|
|
100
100
|
const typeFilter = searchType === "any" ? undefined : searchType;
|
|
101
101
|
const allEntries = getAllEntries(db, typeFilter);
|
|
102
|
-
|
|
102
|
+
// Deduplicate by file path — multiple entries can share the same file
|
|
103
|
+
const seenFilePaths = new Set();
|
|
104
|
+
const uniqueEntries = allEntries.filter((ie) => {
|
|
105
|
+
if (seenFilePaths.has(ie.filePath))
|
|
106
|
+
return false;
|
|
107
|
+
seenFilePaths.add(ie.filePath);
|
|
108
|
+
return true;
|
|
109
|
+
});
|
|
110
|
+
const selected = uniqueEntries.slice(0, limit);
|
|
103
111
|
const hits = await Promise.all(selected.map((ie) => buildDbHit({
|
|
104
112
|
entry: ie.entry,
|
|
105
113
|
path: ie.filePath,
|
|
@@ -137,6 +145,7 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
137
145
|
}
|
|
138
146
|
}
|
|
139
147
|
// Merge results using RRF
|
|
148
|
+
// Issue #15: "hybrid" for results appearing in both FTS and vec results.
|
|
140
149
|
const scored = [];
|
|
141
150
|
const seenIds = new Set();
|
|
142
151
|
// Process FTS results
|
|
@@ -146,7 +155,8 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
146
155
|
const embedRank = embedRankMap.get(id);
|
|
147
156
|
const embedRrf = embedRank !== undefined ? 1 / (RRF_K + embedRank) : 0;
|
|
148
157
|
const rrfScore = ftsRrf + embedRrf;
|
|
149
|
-
|
|
158
|
+
// Issue #15: combined FTS+vec results are "hybrid", not "semantic"
|
|
159
|
+
const rankingMode = embedRrf > 0 ? "hybrid" : "fts";
|
|
150
160
|
scored.push({ id, entry: result.entry, filePath: result.filePath, score: rrfScore, rankingMode });
|
|
151
161
|
}
|
|
152
162
|
// Add vec-only results not already in FTS results
|
|
@@ -172,45 +182,63 @@ async function searchDatabase(db, query, searchType, limit, stashDir, allStashDi
|
|
|
172
182
|
}
|
|
173
183
|
}
|
|
174
184
|
}
|
|
175
|
-
// Apply boosts as multiplicative factors
|
|
185
|
+
// Apply boosts as multiplicative factors (all boosts in a single phase
|
|
186
|
+
// so that sort order and displayed scores are always consistent — Issue #1).
|
|
176
187
|
const queryTokens = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
177
188
|
for (const item of scored) {
|
|
178
189
|
const entry = item.entry;
|
|
179
190
|
let boostSum = 0;
|
|
180
|
-
// Tag boost
|
|
191
|
+
// Tag boost — capped at 0.30 (Issue #7)
|
|
181
192
|
if (entry.tags) {
|
|
193
|
+
let tagBoost = 0;
|
|
182
194
|
for (const tag of entry.tags) {
|
|
183
195
|
if (queryTokens.some((t) => tag.toLowerCase() === t)) {
|
|
184
|
-
|
|
196
|
+
tagBoost += 0.15;
|
|
185
197
|
}
|
|
186
198
|
}
|
|
199
|
+
boostSum += Math.min(0.3, tagBoost);
|
|
187
200
|
}
|
|
188
|
-
// Search hint boost
|
|
201
|
+
// Search hint boost — capped at 0.24 (Issue #7)
|
|
189
202
|
if (entry.searchHints) {
|
|
203
|
+
let hintBoost = 0;
|
|
190
204
|
for (const hint of entry.searchHints) {
|
|
191
205
|
const hintLower = hint.toLowerCase();
|
|
192
206
|
for (const token of queryTokens) {
|
|
193
207
|
if (hintLower.includes(token)) {
|
|
194
|
-
|
|
208
|
+
hintBoost += 0.12;
|
|
195
209
|
break;
|
|
196
210
|
}
|
|
197
211
|
}
|
|
198
212
|
}
|
|
213
|
+
boostSum += Math.min(0.24, hintBoost);
|
|
199
214
|
}
|
|
200
215
|
// Name boost
|
|
201
216
|
const nameLower = entry.name.toLowerCase().replace(/[-_]/g, " ");
|
|
202
217
|
if (queryTokens.some((t) => nameLower.includes(t))) {
|
|
203
218
|
boostSum += 0.1;
|
|
204
219
|
}
|
|
220
|
+
// Quality boost (Issue #1: moved from buildDbHit to single-phase)
|
|
221
|
+
const qualityBoost = entry.quality === "generated" ? 0 : 0.05;
|
|
222
|
+
boostSum += qualityBoost;
|
|
223
|
+
// Confidence boost (Issue #1: moved from buildDbHit to single-phase)
|
|
224
|
+
const confidenceBoost = typeof entry.confidence === "number" ? Math.min(0.05, Math.max(0, entry.confidence) * 0.05) : 0;
|
|
225
|
+
boostSum += confidenceBoost;
|
|
205
226
|
item.score = item.score * (1 + boostSum);
|
|
206
227
|
}
|
|
207
|
-
|
|
228
|
+
// Issue #14: deterministic tiebreaker on equal scores
|
|
229
|
+
scored.sort((a, b) => b.score - a.score || a.entry.name.localeCompare(b.entry.name));
|
|
230
|
+
// Deduplicate by file path — keep only the highest-scored entry per file.
|
|
231
|
+
// Multiple .stash.json entries can map to the same file (e.g. entries without
|
|
232
|
+
// a filename field all collapse to files[0]). Showing the same path/ref
|
|
233
|
+
// multiple times clutters results.
|
|
234
|
+
const deduped = deduplicateByPath(scored);
|
|
208
235
|
const rankMs = Date.now() - tRank0;
|
|
209
|
-
const selected =
|
|
236
|
+
const selected = deduped.slice(0, limit);
|
|
210
237
|
const hits = await Promise.all(selected.map(({ entry, filePath, score, rankingMode }) => buildDbHit({
|
|
211
238
|
entry,
|
|
212
239
|
path: filePath,
|
|
213
|
-
|
|
240
|
+
// Issue #8: round to 4 decimal places instead of 2
|
|
241
|
+
score: Math.round(score * 10000) / 10000,
|
|
214
242
|
query,
|
|
215
243
|
rankingMode,
|
|
216
244
|
defaultStashDir: stashDir,
|
|
@@ -233,9 +261,10 @@ async function tryVecScores(db, query, k, config) {
|
|
|
233
261
|
const vecResults = searchVec(db, queryEmbedding, k);
|
|
234
262
|
const scores = new Map();
|
|
235
263
|
for (const { id, distance } of vecResults) {
|
|
236
|
-
// Convert L2 distance to cosine similarity (vectors are normalized)
|
|
237
|
-
|
|
238
|
-
|
|
264
|
+
// Convert L2 distance to cosine similarity (vectors are normalized).
|
|
265
|
+
// Issue #3: guard against NaN/Infinity from sqlite-vec edge cases.
|
|
266
|
+
const raw = 1 - (distance * distance) / 2;
|
|
267
|
+
scores.set(id, Number.isFinite(raw) ? Math.max(0, raw) : 0);
|
|
239
268
|
}
|
|
240
269
|
return scores;
|
|
241
270
|
}
|
|
@@ -249,15 +278,18 @@ async function substringSearch(query, searchType, limit, stashDir, sources, conf
|
|
|
249
278
|
const assets = await indexAssets(stashDir, searchType);
|
|
250
279
|
const matched = assets.filter((asset) => !query || buildSearchText(asset.entry).includes(query));
|
|
251
280
|
if (!query) {
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
.map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
|
|
281
|
+
const sorted = matched.sort(compareAssets);
|
|
282
|
+
const unique = deduplicateAssetsByPath(sorted);
|
|
283
|
+
return Promise.all(unique.slice(0, limit).map((asset) => assetToSearchHit(asset, query, stashDir, sources, config)));
|
|
256
284
|
}
|
|
257
285
|
// Score and sort by relevance
|
|
258
286
|
const scored = matched.map((asset) => ({ asset, score: scoreSubstringMatch(asset.entry, query) }));
|
|
259
287
|
scored.sort((a, b) => b.score - a.score || compareAssets(a.asset, b.asset));
|
|
260
|
-
|
|
288
|
+
// Deduplicate by path — keep highest-scored entry per file
|
|
289
|
+
const dedupedScored = deduplicateByPath(scored.map((s) => ({ ...s, filePath: s.asset.path })));
|
|
290
|
+
return Promise.all(dedupedScored
|
|
291
|
+
.slice(0, limit)
|
|
292
|
+
.map(({ asset, score }) => assetToSearchHit(asset, query, stashDir, sources, config, score)));
|
|
261
293
|
}
|
|
262
294
|
function scoreSubstringMatch(entry, query) {
|
|
263
295
|
const tokens = query.split(/\s+/).filter(Boolean);
|
|
@@ -282,16 +314,22 @@ function scoreSubstringMatch(entry, query) {
|
|
|
282
314
|
if (tokens.some((t) => descLower.includes(t))) {
|
|
283
315
|
score += 0.05;
|
|
284
316
|
}
|
|
285
|
-
|
|
317
|
+
// Issue #8: round to 4 decimal places instead of 2
|
|
318
|
+
return Math.round(Math.min(1, score) * 10000) / 10000;
|
|
286
319
|
}
|
|
287
320
|
// ── Hit building ────────────────────────────────────────────────────────────
|
|
288
321
|
export async function buildDbHit(input) {
|
|
289
322
|
const entryStashDir = findSourceForPath(input.path, input.sources)?.path ?? input.defaultStashDir;
|
|
290
323
|
const canonical = deriveCanonicalAssetNameFromStashRoot(input.entry.type, entryStashDir, input.path);
|
|
291
324
|
const refName = canonical && !canonical.startsWith("../") && !canonical.startsWith("..\\") ? canonical : input.entry.name;
|
|
325
|
+
// Issue #1: Quality and confidence boosts are now applied in the main scoring
|
|
326
|
+
// phase (searchDatabase). buildDbHit receives the already-final score and
|
|
327
|
+
// passes it through without further multiplication. We still compute the
|
|
328
|
+
// boost values here for buildWhyMatched reporting.
|
|
292
329
|
const qualityBoost = input.entry.quality === "generated" ? 0 : 0.05;
|
|
293
330
|
const confidenceBoost = typeof input.entry.confidence === "number" ? Math.min(0.05, Math.max(0, input.entry.confidence) * 0.05) : 0;
|
|
294
|
-
|
|
331
|
+
// Issue #8: round to 4 decimal places, no boost multiplication
|
|
332
|
+
const score = Math.round(input.score * 10000) / 10000;
|
|
295
333
|
const whyMatched = buildWhyMatched(input.entry, input.query, input.rankingMode, qualityBoost, confidenceBoost);
|
|
296
334
|
const source = findSourceForPath(input.path, input.sources);
|
|
297
335
|
const editable = isEditable(input.path, input.config);
|
|
@@ -316,13 +354,24 @@ export async function buildDbHit(input) {
|
|
|
316
354
|
}
|
|
317
355
|
return hit;
|
|
318
356
|
}
|
|
319
|
-
export function buildWhyMatched(entry, query,
|
|
320
|
-
|
|
357
|
+
export function buildWhyMatched(entry, query,
|
|
358
|
+
// Issue #15: added "hybrid" ranking mode
|
|
359
|
+
rankingMode, qualityBoost, confidenceBoost) {
|
|
360
|
+
// Issue #15: "hybrid" label for combined FTS+vec results
|
|
361
|
+
const reasons = [
|
|
362
|
+
rankingMode === "hybrid"
|
|
363
|
+
? "hybrid (fts + semantic)"
|
|
364
|
+
: rankingMode === "semantic"
|
|
365
|
+
? "semantic similarity"
|
|
366
|
+
: "fts bm25 relevance",
|
|
367
|
+
];
|
|
321
368
|
const tokens = query.toLowerCase().split(/\s+/).filter(Boolean);
|
|
322
369
|
const name = entry.name.toLowerCase();
|
|
323
370
|
const tags = entry.tags?.join(" ").toLowerCase() ?? "";
|
|
324
371
|
const searchHints = entry.searchHints?.join(" ").toLowerCase() ?? "";
|
|
325
372
|
const aliases = entry.aliases?.join(" ").toLowerCase() ?? "";
|
|
373
|
+
// Issue #12: include description in match reasons
|
|
374
|
+
const desc = entry.description?.toLowerCase() ?? "";
|
|
326
375
|
if (tokens.some((t) => name.includes(t)))
|
|
327
376
|
reasons.push("matched name tokens");
|
|
328
377
|
if (tokens.some((t) => tags.includes(t)))
|
|
@@ -331,6 +380,9 @@ export function buildWhyMatched(entry, query, rankingMode, qualityBoost, confide
|
|
|
331
380
|
reasons.push("matched searchHints");
|
|
332
381
|
if (tokens.some((t) => aliases.includes(t)))
|
|
333
382
|
reasons.push("matched aliases");
|
|
383
|
+
// Issue #12: report description matches
|
|
384
|
+
if (tokens.some((t) => desc.includes(t)))
|
|
385
|
+
reasons.push("matched description");
|
|
334
386
|
if (qualityBoost > 0)
|
|
335
387
|
reasons.push("curated metadata boost");
|
|
336
388
|
if (confidenceBoost > 0)
|
|
@@ -413,10 +465,27 @@ async function indexAssets(stashDir, type) {
|
|
|
413
465
|
continue;
|
|
414
466
|
stash = generated;
|
|
415
467
|
}
|
|
468
|
+
// Build a lookup for matching filename-less entries to actual files
|
|
469
|
+
const fileBasenameMap = new Map();
|
|
470
|
+
for (const file of files) {
|
|
471
|
+
const base = path.basename(file, path.extname(file));
|
|
472
|
+
if (!fileBasenameMap.has(base))
|
|
473
|
+
fileBasenameMap.set(base, file);
|
|
474
|
+
}
|
|
416
475
|
for (const entry of stash.entries) {
|
|
417
476
|
if (filterType && entry.type !== filterType)
|
|
418
477
|
continue;
|
|
419
|
-
|
|
478
|
+
let entryPath;
|
|
479
|
+
if (entry.filename) {
|
|
480
|
+
entryPath = path.join(dirPath, entry.filename);
|
|
481
|
+
}
|
|
482
|
+
else {
|
|
483
|
+
// Try matching entry name to a file by basename
|
|
484
|
+
entryPath =
|
|
485
|
+
fileBasenameMap.get(entry.name) ??
|
|
486
|
+
fileBasenameMap.get(entry.name.split("/").pop() ?? "") ??
|
|
487
|
+
(files[0] || dirPath);
|
|
488
|
+
}
|
|
420
489
|
assets.push({ entry, path: entryPath });
|
|
421
490
|
}
|
|
422
491
|
}
|
|
@@ -427,3 +496,31 @@ function compareAssets(a, b) {
|
|
|
427
496
|
return a.entry.type.localeCompare(b.entry.type);
|
|
428
497
|
return a.entry.name.localeCompare(b.entry.name);
|
|
429
498
|
}
|
|
499
|
+
/**
|
|
500
|
+
* Deduplicate scored results by file path, keeping only the highest-scored
|
|
501
|
+
* entry per unique path. Sorts by score descending internally to ensure the
|
|
502
|
+
* precondition is always met regardless of caller (Issue #4).
|
|
503
|
+
*/
|
|
504
|
+
function deduplicateByPath(items) {
|
|
505
|
+
// Issue #4: sort inside to enforce the descending-score precondition
|
|
506
|
+
const sorted = [...items].sort((a, b) => (b.score ?? 0) - (a.score ?? 0));
|
|
507
|
+
const seen = new Set();
|
|
508
|
+
return sorted.filter((item) => {
|
|
509
|
+
if (seen.has(item.filePath))
|
|
510
|
+
return false;
|
|
511
|
+
seen.add(item.filePath);
|
|
512
|
+
return true;
|
|
513
|
+
});
|
|
514
|
+
}
|
|
515
|
+
/**
|
|
516
|
+
* Deduplicate IndexedAsset[] by path, keeping the first (highest-priority) entry.
|
|
517
|
+
*/
|
|
518
|
+
function deduplicateAssetsByPath(assets) {
|
|
519
|
+
const seen = new Set();
|
|
520
|
+
return assets.filter((asset) => {
|
|
521
|
+
if (seen.has(asset.path))
|
|
522
|
+
return false;
|
|
523
|
+
seen.add(asset.path);
|
|
524
|
+
return true;
|
|
525
|
+
});
|
|
526
|
+
}
|