akm-cli 0.1.3 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/asset-registry.js +48 -0
- package/dist/asset-spec.js +11 -32
- package/dist/cli.js +173 -59
- package/dist/common.js +3 -0
- package/dist/completions.js +4 -2
- package/dist/config.js +35 -7
- package/dist/db.js +182 -22
- package/dist/embedder.js +140 -23
- package/dist/file-context.js +3 -0
- package/dist/indexer.js +198 -42
- package/dist/info.js +92 -0
- package/dist/local-search.js +190 -90
- package/dist/manifest.js +172 -0
- package/dist/metadata.js +165 -2
- package/dist/providers/skills-sh.js +21 -12
- package/dist/providers/static-index.js +3 -1
- package/dist/registry-build-index.js +12 -1
- package/dist/registry-resolve.js +10 -7
- package/dist/renderers.js +1 -1
- package/dist/search-fields.js +69 -0
- package/dist/search-source.js +42 -0
- package/dist/setup.js +151 -7
- package/dist/stash-clone.js +3 -1
- package/dist/stash-provider-factory.js +0 -2
- package/dist/stash-providers/filesystem.js +4 -5
- package/dist/stash-providers/git.js +140 -0
- package/dist/stash-providers/index.js +1 -1
- package/dist/stash-providers/openviking.js +36 -25
- package/dist/stash-providers/provider-utils.js +11 -0
- package/dist/stash-search.js +106 -90
- package/dist/stash-show.js +125 -9
- package/dist/usage-events.js +73 -0
- package/dist/version.js +20 -0
- package/dist/walker.js +1 -2
- package/package.json +4 -3
- package/dist/stash-providers/context-hub.js +0 -390
package/dist/config.js
CHANGED
|
@@ -22,6 +22,9 @@ export function getConfigPath() {
|
|
|
22
22
|
}
|
|
23
23
|
// ── Load / Save / Update ────────────────────────────────────────────────────
|
|
24
24
|
let cachedConfig;
|
|
25
|
+
export function resetConfigCache() {
|
|
26
|
+
cachedConfig = undefined;
|
|
27
|
+
}
|
|
25
28
|
export function loadConfig() {
|
|
26
29
|
const configPath = getConfigPath();
|
|
27
30
|
let stat;
|
|
@@ -184,10 +187,11 @@ const URL_FIELD_NAMES = new Set(["url", "endpoint", "artifactUrl"]);
|
|
|
184
187
|
*/
|
|
185
188
|
function expandEnvVars(value, fieldName) {
|
|
186
189
|
if (typeof value === "string") {
|
|
187
|
-
// Skip URL-type fields by name or by value prefix
|
|
188
|
-
if (
|
|
189
|
-
|
|
190
|
-
|
|
190
|
+
// Skip URL-type fields by name or by value prefix, unless they contain ${VAR} syntax
|
|
191
|
+
if (!value.includes("${") &&
|
|
192
|
+
((fieldName !== undefined && URL_FIELD_NAMES.has(fieldName)) ||
|
|
193
|
+
value.startsWith("http://") ||
|
|
194
|
+
value.startsWith("https://"))) {
|
|
191
195
|
return value;
|
|
192
196
|
}
|
|
193
197
|
return value.replace(/\$\{([^}]+)\}|\$([A-Za-z_][A-Za-z0-9_]*)/g, (_match, braced, bare) => {
|
|
@@ -274,14 +278,35 @@ function parseEmbeddingConfig(value) {
|
|
|
274
278
|
if (typeof value !== "object" || value === null || Array.isArray(value))
|
|
275
279
|
return undefined;
|
|
276
280
|
const obj = value;
|
|
277
|
-
|
|
281
|
+
// Extract localModel early — it's valid even without a remote endpoint
|
|
282
|
+
const localModel = typeof obj.localModel === "string" && obj.localModel ? obj.localModel : undefined;
|
|
283
|
+
// If no endpoint is provided, the config is only valid when localModel is set
|
|
284
|
+
// (local-only embedding configuration).
|
|
285
|
+
// Sentinel: { endpoint: "", model: "" } means "local-only" — use hasRemoteEndpoint()
|
|
286
|
+
// (in embedder.ts) to distinguish from a real remote config. Do NOT check
|
|
287
|
+
// endpoint/model directly in consuming code.
|
|
288
|
+
if (typeof obj.endpoint !== "string" || !obj.endpoint) {
|
|
289
|
+
if (localModel) {
|
|
290
|
+
return { endpoint: "", model: "", localModel };
|
|
291
|
+
}
|
|
278
292
|
return undefined;
|
|
293
|
+
}
|
|
279
294
|
if (!obj.endpoint.startsWith("http://") && !obj.endpoint.startsWith("https://")) {
|
|
280
295
|
console.warn(`[akm] Ignoring embedding config: endpoint must start with http:// or https://, got "${obj.endpoint}"`);
|
|
296
|
+
// Still return localModel-only config if localModel was set
|
|
297
|
+
if (localModel) {
|
|
298
|
+
return { endpoint: "", model: "", localModel };
|
|
299
|
+
}
|
|
281
300
|
return undefined;
|
|
282
301
|
}
|
|
283
|
-
if (typeof obj.model !== "string" || !obj.model)
|
|
302
|
+
if (typeof obj.model !== "string" || !obj.model) {
|
|
303
|
+
// No remote model, but localModel may still be valid
|
|
304
|
+
if (localModel) {
|
|
305
|
+
console.warn(`[akm] Embedding endpoint "${obj.endpoint}" ignored: model is required for remote embeddings. Using local model only.`);
|
|
306
|
+
return { endpoint: "", model: "", localModel };
|
|
307
|
+
}
|
|
284
308
|
return undefined;
|
|
309
|
+
}
|
|
285
310
|
const result = {
|
|
286
311
|
endpoint: obj.endpoint,
|
|
287
312
|
model: obj.model,
|
|
@@ -301,6 +326,9 @@ function parseEmbeddingConfig(value) {
|
|
|
301
326
|
if (typeof obj.apiKey === "string" && obj.apiKey) {
|
|
302
327
|
result.apiKey = obj.apiKey;
|
|
303
328
|
}
|
|
329
|
+
if (localModel) {
|
|
330
|
+
result.localModel = localModel;
|
|
331
|
+
}
|
|
304
332
|
return result;
|
|
305
333
|
}
|
|
306
334
|
function parseLlmConfig(value) {
|
|
@@ -432,7 +460,7 @@ function parseRegistryConfigEntry(value) {
|
|
|
432
460
|
return undefined;
|
|
433
461
|
const obj = value;
|
|
434
462
|
const url = asNonEmptyString(obj.url);
|
|
435
|
-
if (!url
|
|
463
|
+
if (!url?.startsWith("http"))
|
|
436
464
|
return undefined;
|
|
437
465
|
const entry = { url };
|
|
438
466
|
const name = asNonEmptyString(obj.name);
|
package/dist/db.js
CHANGED
|
@@ -4,9 +4,11 @@ import { createRequire } from "node:module";
|
|
|
4
4
|
import path from "node:path";
|
|
5
5
|
import { cosineSimilarity } from "./embedder";
|
|
6
6
|
import { getDbPath } from "./paths";
|
|
7
|
+
import { buildSearchFields } from "./search-fields";
|
|
8
|
+
import { ensureUsageEventsSchema } from "./usage-events";
|
|
7
9
|
import { warn } from "./warn";
|
|
8
10
|
// ── Constants ───────────────────────────────────────────────────────────────
|
|
9
|
-
export const DB_VERSION =
|
|
11
|
+
export const DB_VERSION = 8;
|
|
10
12
|
export const EMBEDDING_DIM = 384;
|
|
11
13
|
// ── Database lifecycle ──────────────────────────────────────────────────────
|
|
12
14
|
export function openDatabase(dbPath, options) {
|
|
@@ -83,6 +85,8 @@ function ensureSchema(db, embeddingDim) {
|
|
|
83
85
|
// Check stored version — if it differs from DB_VERSION, drop and recreate all tables
|
|
84
86
|
const storedVersion = getMeta(db, "version");
|
|
85
87
|
if (storedVersion && storedVersion !== String(DB_VERSION)) {
|
|
88
|
+
db.exec("DROP TABLE IF EXISTS utility_scores");
|
|
89
|
+
db.exec("DROP TABLE IF EXISTS usage_events");
|
|
86
90
|
db.exec("DROP TABLE IF EXISTS embeddings");
|
|
87
91
|
db.exec("DROP TABLE IF EXISTS entries_vec");
|
|
88
92
|
db.exec("DROP TABLE IF EXISTS entries_fts");
|
|
@@ -120,17 +124,35 @@ function ensureSchema(db, embeddingDim) {
|
|
|
120
124
|
FOREIGN KEY (id) REFERENCES entries(id)
|
|
121
125
|
);
|
|
122
126
|
`);
|
|
123
|
-
// FTS5 table —
|
|
127
|
+
// FTS5 table — multi-column with per-field weighting via bm25()
|
|
124
128
|
const ftsExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_fts'").get();
|
|
125
129
|
if (!ftsExists) {
|
|
126
130
|
db.exec(`
|
|
127
131
|
CREATE VIRTUAL TABLE entries_fts USING fts5(
|
|
128
132
|
entry_id UNINDEXED,
|
|
129
|
-
|
|
133
|
+
name,
|
|
134
|
+
description,
|
|
135
|
+
tags,
|
|
136
|
+
hints,
|
|
137
|
+
content,
|
|
130
138
|
tokenize='porter unicode61'
|
|
131
139
|
);
|
|
132
140
|
`);
|
|
133
141
|
}
|
|
142
|
+
// Usage events table — created by ensureUsageEventsSchema() at runtime.
|
|
143
|
+
// Utility scores table (aggregated per-entry utility metrics)
|
|
144
|
+
db.exec(`
|
|
145
|
+
CREATE TABLE IF NOT EXISTS utility_scores (
|
|
146
|
+
entry_id INTEGER PRIMARY KEY,
|
|
147
|
+
utility REAL NOT NULL DEFAULT 0,
|
|
148
|
+
show_count INTEGER NOT NULL DEFAULT 0,
|
|
149
|
+
search_count INTEGER NOT NULL DEFAULT 0,
|
|
150
|
+
select_rate REAL NOT NULL DEFAULT 0,
|
|
151
|
+
last_used_at TEXT,
|
|
152
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now')),
|
|
153
|
+
FOREIGN KEY (entry_id) REFERENCES entries(id) ON DELETE CASCADE
|
|
154
|
+
);
|
|
155
|
+
`);
|
|
134
156
|
// sqlite-vec table
|
|
135
157
|
if (isVecAvailable(db)) {
|
|
136
158
|
// Check if stored embedding dimension differs from configured one
|
|
@@ -142,7 +164,7 @@ function ensureSchema(db, embeddingDim) {
|
|
|
142
164
|
catch {
|
|
143
165
|
/* ignore */
|
|
144
166
|
}
|
|
145
|
-
//
|
|
167
|
+
// Delete stale BLOB embeddings so they don't produce silently wrong
|
|
146
168
|
// similarity scores against the new-dimension vec table.
|
|
147
169
|
try {
|
|
148
170
|
db.exec("DELETE FROM embeddings");
|
|
@@ -165,6 +187,8 @@ function ensureSchema(db, embeddingDim) {
|
|
|
165
187
|
}
|
|
166
188
|
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
167
189
|
}
|
|
190
|
+
// Usage telemetry table
|
|
191
|
+
ensureUsageEventsSchema(db);
|
|
168
192
|
}
|
|
169
193
|
// ── Meta helpers ────────────────────────────────────────────────────────────
|
|
170
194
|
export function getMeta(db, key) {
|
|
@@ -231,7 +255,7 @@ function deleteRelatedRows(db, ids) {
|
|
|
231
255
|
catch {
|
|
232
256
|
/* ignore */
|
|
233
257
|
}
|
|
234
|
-
//
|
|
258
|
+
// Also delete from FTS table so orphaned FTS rows don't remain
|
|
235
259
|
try {
|
|
236
260
|
db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
237
261
|
}
|
|
@@ -246,16 +270,48 @@ function deleteRelatedRows(db, ids) {
|
|
|
246
270
|
/* ignore */
|
|
247
271
|
}
|
|
248
272
|
}
|
|
273
|
+
// Clean up utility scores before deleting entries
|
|
274
|
+
try {
|
|
275
|
+
db.prepare(`DELETE FROM utility_scores WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
276
|
+
}
|
|
277
|
+
catch {
|
|
278
|
+
/* ignore */
|
|
279
|
+
}
|
|
280
|
+
// Clean up usage events before deleting entries
|
|
281
|
+
try {
|
|
282
|
+
db.prepare(`DELETE FROM usage_events WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
283
|
+
}
|
|
284
|
+
catch {
|
|
285
|
+
/* ignore */
|
|
286
|
+
}
|
|
249
287
|
}
|
|
250
288
|
}
|
|
251
289
|
export function rebuildFts(db) {
|
|
252
|
-
//
|
|
290
|
+
// Wrap DELETE + INSERT in a single transaction so the FTS table is
|
|
253
291
|
// never left empty between the two statements if a crash occurs.
|
|
254
|
-
//
|
|
292
|
+
// Store the integer id directly (FTS5 stores all content as text
|
|
255
293
|
// internally; the join in searchFts compares numerically without CAST).
|
|
294
|
+
//
|
|
295
|
+
// Insert into separate FTS5 columns by extracting per-field text from
|
|
296
|
+
// the entry_json using buildSearchFields(). The entries.search_text column
|
|
297
|
+
// is kept as a concatenated fallback for embedding generation.
|
|
256
298
|
db.transaction(() => {
|
|
257
299
|
db.exec("DELETE FROM entries_fts");
|
|
258
|
-
db.
|
|
300
|
+
const rows = db.prepare("SELECT id, entry_json FROM entries").all();
|
|
301
|
+
const insertStmt = db.prepare("INSERT INTO entries_fts (entry_id, name, description, tags, hints, content) VALUES (?, ?, ?, ?, ?, ?)");
|
|
302
|
+
for (const row of rows) {
|
|
303
|
+
let entry;
|
|
304
|
+
let fields;
|
|
305
|
+
try {
|
|
306
|
+
entry = JSON.parse(row.entry_json);
|
|
307
|
+
fields = buildSearchFields(entry);
|
|
308
|
+
}
|
|
309
|
+
catch {
|
|
310
|
+
warn(`[db] rebuildFts: skipping entry id=${row.id} — invalid entry_json`);
|
|
311
|
+
continue;
|
|
312
|
+
}
|
|
313
|
+
insertStmt.run(row.id, fields.name, fields.description, fields.tags, fields.hints, fields.content);
|
|
314
|
+
}
|
|
259
315
|
})();
|
|
260
316
|
}
|
|
261
317
|
// ── Vector operations ───────────────────────────────────────────────────────
|
|
@@ -284,8 +340,8 @@ export function searchVec(db, queryEmbedding, k) {
|
|
|
284
340
|
.all(buf, k);
|
|
285
341
|
}
|
|
286
342
|
catch (err) {
|
|
287
|
-
//
|
|
288
|
-
|
|
343
|
+
// Log the failure so it's visible in diagnostics
|
|
344
|
+
warn("[db] searchVec (sqlite-vec path) failed:", err instanceof Error ? err.message : String(err));
|
|
289
345
|
return [];
|
|
290
346
|
}
|
|
291
347
|
}
|
|
@@ -321,7 +377,7 @@ function searchBlobVec(db, queryEmbedding, k) {
|
|
|
321
377
|
}
|
|
322
378
|
catch (err) {
|
|
323
379
|
// MD-5: Log the failure so it's visible in diagnostics
|
|
324
|
-
|
|
380
|
+
warn("[db] searchBlobVec (JS fallback) failed:", err instanceof Error ? err.message : String(err));
|
|
325
381
|
return [];
|
|
326
382
|
}
|
|
327
383
|
}
|
|
@@ -330,13 +386,49 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
330
386
|
const ftsQuery = sanitizeFtsQuery(query);
|
|
331
387
|
if (!ftsQuery)
|
|
332
388
|
return [];
|
|
389
|
+
// Try the exact AND query first
|
|
390
|
+
const exactResults = runFtsQuery(db, ftsQuery, limit, entryType);
|
|
391
|
+
if (exactResults.length > 0)
|
|
392
|
+
return exactResults;
|
|
393
|
+
// Exact match returned zero results — try prefix fallback.
|
|
394
|
+
// Append FTS5 `*` suffix to each token that is >= 3 characters long.
|
|
395
|
+
// Short tokens (1-2 chars) are excluded from prefix expansion because
|
|
396
|
+
// they produce too many false positives.
|
|
397
|
+
const prefixQuery = buildPrefixQuery(ftsQuery);
|
|
398
|
+
if (!prefixQuery)
|
|
399
|
+
return [];
|
|
400
|
+
return runFtsQuery(db, prefixQuery, limit, entryType);
|
|
401
|
+
}
|
|
402
|
+
/**
|
|
403
|
+
* Build a prefix query from an FTS5 query string by appending `*` to each
|
|
404
|
+
* token that is 3+ characters long. Tokens shorter than 3 characters are
|
|
405
|
+
* kept as-is (no prefix expansion) to avoid overly broad matches.
|
|
406
|
+
*
|
|
407
|
+
* Returns null if no tokens qualify for prefix expansion.
|
|
408
|
+
*/
|
|
409
|
+
function buildPrefixQuery(ftsQuery) {
|
|
410
|
+
const tokens = ftsQuery.split(/\s+/).filter(Boolean);
|
|
411
|
+
let hasPrefix = false;
|
|
412
|
+
const prefixTokens = tokens.map((t) => {
|
|
413
|
+
if (t.length >= 3) {
|
|
414
|
+
hasPrefix = true;
|
|
415
|
+
return `${t}*`;
|
|
416
|
+
}
|
|
417
|
+
return t;
|
|
418
|
+
});
|
|
419
|
+
if (!hasPrefix)
|
|
420
|
+
return null;
|
|
421
|
+
return prefixTokens.join(" ");
|
|
422
|
+
}
|
|
423
|
+
function runFtsQuery(db, ftsQuery, limit, entryType) {
|
|
333
424
|
let sql;
|
|
334
425
|
let params;
|
|
335
|
-
//
|
|
426
|
+
// Join on integer entry_id directly (no CAST needed; we store integer)
|
|
427
|
+
// Use bm25() with per-column weights: entry_id(0), name(10), description(5), tags(3), hints(2), content(1)
|
|
336
428
|
if (entryType && entryType !== "any") {
|
|
337
429
|
sql = `
|
|
338
430
|
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
339
|
-
bm25(entries_fts) AS bm25Score
|
|
431
|
+
bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
|
|
340
432
|
FROM entries_fts f
|
|
341
433
|
JOIN entries e ON e.id = f.entry_id
|
|
342
434
|
WHERE entries_fts MATCH ?
|
|
@@ -349,7 +441,7 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
349
441
|
else {
|
|
350
442
|
sql = `
|
|
351
443
|
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
352
|
-
bm25(entries_fts) AS bm25Score
|
|
444
|
+
bm25(entries_fts, 0, 10.0, 5.0, 3.0, 2.0, 1.0) AS bm25Score
|
|
353
445
|
FROM entries_fts f
|
|
354
446
|
JOIN entries e ON e.id = f.entry_id
|
|
355
447
|
WHERE entries_fts MATCH ?
|
|
@@ -360,7 +452,7 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
360
452
|
}
|
|
361
453
|
try {
|
|
362
454
|
const rows = db.prepare(sql).all(...params);
|
|
363
|
-
//
|
|
455
|
+
// Guard against corrupt JSON — skip the row rather than crashing
|
|
364
456
|
const results = [];
|
|
365
457
|
for (const row of rows) {
|
|
366
458
|
let entry;
|
|
@@ -368,7 +460,7 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
368
460
|
entry = JSON.parse(row.entry_json);
|
|
369
461
|
}
|
|
370
462
|
catch {
|
|
371
|
-
|
|
463
|
+
warn(`[db] searchFts: skipping entry id=${row.id} — corrupt entry_json`);
|
|
372
464
|
continue;
|
|
373
465
|
}
|
|
374
466
|
results.push({
|
|
@@ -416,7 +508,7 @@ export function getAllEntries(db, entryType) {
|
|
|
416
508
|
params = [];
|
|
417
509
|
}
|
|
418
510
|
const rows = db.prepare(sql).all(...params);
|
|
419
|
-
//
|
|
511
|
+
// Guard against corrupt JSON — skip the row rather than crashing
|
|
420
512
|
const entries = [];
|
|
421
513
|
for (const row of rows) {
|
|
422
514
|
let entry;
|
|
@@ -424,7 +516,7 @@ export function getAllEntries(db, entryType) {
|
|
|
424
516
|
entry = JSON.parse(row.entry_json);
|
|
425
517
|
}
|
|
426
518
|
catch {
|
|
427
|
-
|
|
519
|
+
warn(`[db] getAllEntries: skipping entry id=${row.id} — corrupt entry_json`);
|
|
428
520
|
continue;
|
|
429
521
|
}
|
|
430
522
|
entries.push({
|
|
@@ -443,17 +535,21 @@ export function getEntryCount(db) {
|
|
|
443
535
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM entries").get();
|
|
444
536
|
return row.cnt;
|
|
445
537
|
}
|
|
538
|
+
export function getEmbeddingCount(db) {
|
|
539
|
+
const row = db.prepare("SELECT COUNT(*) AS cnt FROM embeddings").get();
|
|
540
|
+
return row.cnt;
|
|
541
|
+
}
|
|
446
542
|
export function getEntryById(db, id) {
|
|
447
543
|
const row = db.prepare("SELECT file_path, entry_json FROM entries WHERE id = ?").get(id);
|
|
448
544
|
if (!row)
|
|
449
545
|
return undefined;
|
|
450
|
-
//
|
|
546
|
+
// Guard against corrupt JSON
|
|
451
547
|
let entry;
|
|
452
548
|
try {
|
|
453
549
|
entry = JSON.parse(row.entry_json);
|
|
454
550
|
}
|
|
455
551
|
catch {
|
|
456
|
-
|
|
552
|
+
warn(`[db] getEntryById: skipping entry id=${id} — corrupt entry_json`);
|
|
457
553
|
return undefined;
|
|
458
554
|
}
|
|
459
555
|
return { filePath: row.file_path, entry };
|
|
@@ -462,7 +558,7 @@ export function getEntriesByDir(db, dirPath) {
|
|
|
462
558
|
const rows = db
|
|
463
559
|
.prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
|
|
464
560
|
.all(dirPath);
|
|
465
|
-
//
|
|
561
|
+
// Guard against corrupt JSON — skip the row rather than crashing
|
|
466
562
|
const entries = [];
|
|
467
563
|
for (const row of rows) {
|
|
468
564
|
let entry;
|
|
@@ -470,7 +566,7 @@ export function getEntriesByDir(db, dirPath) {
|
|
|
470
566
|
entry = JSON.parse(row.entry_json);
|
|
471
567
|
}
|
|
472
568
|
catch {
|
|
473
|
-
|
|
569
|
+
warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
|
|
474
570
|
continue;
|
|
475
571
|
}
|
|
476
572
|
entries.push({
|
|
@@ -485,3 +581,67 @@ export function getEntriesByDir(db, dirPath) {
|
|
|
485
581
|
}
|
|
486
582
|
return entries;
|
|
487
583
|
}
|
|
584
|
+
/**
|
|
585
|
+
* Get the utility score for an entry, or undefined if none exists.
|
|
586
|
+
*/
|
|
587
|
+
export function getUtilityScore(db, entryId) {
|
|
588
|
+
const row = db
|
|
589
|
+
.prepare("SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id = ?")
|
|
590
|
+
.get(entryId);
|
|
591
|
+
if (!row)
|
|
592
|
+
return undefined;
|
|
593
|
+
return {
|
|
594
|
+
entryId: row.entry_id,
|
|
595
|
+
utility: row.utility,
|
|
596
|
+
showCount: row.show_count,
|
|
597
|
+
searchCount: row.search_count,
|
|
598
|
+
selectRate: row.select_rate,
|
|
599
|
+
lastUsedAt: row.last_used_at ?? undefined,
|
|
600
|
+
updatedAt: row.updated_at,
|
|
601
|
+
};
|
|
602
|
+
}
|
|
603
|
+
/**
|
|
604
|
+
* Batch-load utility scores for multiple entry IDs in a single query.
|
|
605
|
+
* Returns a Map keyed by entry_id for O(1) lookup.
|
|
606
|
+
*/
|
|
607
|
+
export function getUtilityScoresByIds(db, ids) {
|
|
608
|
+
if (ids.length === 0)
|
|
609
|
+
return new Map();
|
|
610
|
+
const result = new Map();
|
|
611
|
+
// Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
|
|
612
|
+
for (let i = 0; i < ids.length; i += SQLITE_CHUNK_SIZE) {
|
|
613
|
+
const chunk = ids.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
614
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
615
|
+
const rows = db
|
|
616
|
+
.prepare(`SELECT entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at FROM utility_scores WHERE entry_id IN (${placeholders})`)
|
|
617
|
+
.all(...chunk);
|
|
618
|
+
for (const row of rows) {
|
|
619
|
+
result.set(row.entry_id, {
|
|
620
|
+
entryId: row.entry_id,
|
|
621
|
+
utility: row.utility,
|
|
622
|
+
showCount: row.show_count,
|
|
623
|
+
searchCount: row.search_count,
|
|
624
|
+
selectRate: row.select_rate,
|
|
625
|
+
lastUsedAt: row.last_used_at ?? undefined,
|
|
626
|
+
updatedAt: row.updated_at,
|
|
627
|
+
});
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
return result;
|
|
631
|
+
}
|
|
632
|
+
/**
|
|
633
|
+
* Insert or update a utility score for an entry.
|
|
634
|
+
*/
|
|
635
|
+
export function upsertUtilityScore(db, entryId, data) {
|
|
636
|
+
db.prepare(`
|
|
637
|
+
INSERT INTO utility_scores (entry_id, utility, show_count, search_count, select_rate, last_used_at, updated_at)
|
|
638
|
+
VALUES (?, ?, ?, ?, ?, ?, datetime('now'))
|
|
639
|
+
ON CONFLICT(entry_id) DO UPDATE SET
|
|
640
|
+
utility = excluded.utility,
|
|
641
|
+
show_count = excluded.show_count,
|
|
642
|
+
search_count = excluded.search_count,
|
|
643
|
+
select_rate = excluded.select_rate,
|
|
644
|
+
last_used_at = excluded.last_used_at,
|
|
645
|
+
updated_at = datetime('now')
|
|
646
|
+
`).run(entryId, data.utility, data.showCount, data.searchCount, data.selectRate, data.lastUsedAt ?? null);
|
|
647
|
+
}
|
package/dist/embedder.js
CHANGED
|
@@ -1,32 +1,61 @@
|
|
|
1
|
-
import { fetchWithTimeout } from "./common";
|
|
1
|
+
import { fetchWithTimeout, isHttpUrl } from "./common";
|
|
2
2
|
import { warn } from "./warn";
|
|
3
|
+
// ── Default local model ─────────────────────────────────────────────────────
|
|
4
|
+
/**
|
|
5
|
+
* Default local transformer model for embeddings.
|
|
6
|
+
* `bge-small-en-v1.5` scores higher on MTEB benchmarks than the previous
|
|
7
|
+
* `all-MiniLM-L6-v2` at the same 384-dimension footprint.
|
|
8
|
+
*/
|
|
9
|
+
export const DEFAULT_LOCAL_MODEL = "Xenova/bge-small-en-v1.5";
|
|
10
|
+
/**
|
|
11
|
+
* Return the local model name that will be used for embedding.
|
|
12
|
+
* When `overrideModel` is provided it takes precedence; otherwise
|
|
13
|
+
* the default model is returned.
|
|
14
|
+
*/
|
|
15
|
+
function getLocalModelName(overrideModel) {
|
|
16
|
+
return overrideModel || DEFAULT_LOCAL_MODEL;
|
|
17
|
+
}
|
|
3
18
|
// Cache the promise itself (not the resolved result) so concurrent calls share
|
|
4
19
|
// the same initialisation work and never download the model twice.
|
|
20
|
+
// The cache is keyed by model name so switching models gets a fresh pipeline.
|
|
5
21
|
let localEmbedderPromise;
|
|
6
|
-
|
|
22
|
+
let localEmbedderModelName;
|
|
23
|
+
async function getLocalEmbedder(modelName) {
|
|
24
|
+
const resolvedModel = getLocalModelName(modelName);
|
|
25
|
+
// If the cached pipeline was created for a different model, discard it.
|
|
26
|
+
if (localEmbedderPromise && localEmbedderModelName !== resolvedModel) {
|
|
27
|
+
localEmbedderPromise = undefined;
|
|
28
|
+
localEmbedderModelName = undefined;
|
|
29
|
+
}
|
|
7
30
|
if (!localEmbedderPromise) {
|
|
31
|
+
localEmbedderModelName = resolvedModel;
|
|
8
32
|
localEmbedderPromise = (async () => {
|
|
9
33
|
let pipeline;
|
|
10
34
|
try {
|
|
11
|
-
const mod = await import("@
|
|
35
|
+
const mod = await import("@huggingface/transformers");
|
|
12
36
|
pipeline = mod.pipeline;
|
|
13
37
|
}
|
|
14
38
|
catch {
|
|
15
|
-
throw new Error("Semantic search requires @
|
|
39
|
+
throw new Error("Semantic search requires @huggingface/transformers. Install it with: npm install @huggingface/transformers");
|
|
16
40
|
}
|
|
17
41
|
const pipelineFn = pipeline;
|
|
18
|
-
return pipelineFn("feature-extraction",
|
|
42
|
+
return pipelineFn("feature-extraction", resolvedModel);
|
|
19
43
|
})();
|
|
20
44
|
// HI-13: Clear the cached promise on failure so the next call retries
|
|
21
45
|
// instead of permanently rejecting every subsequent call with the same error.
|
|
22
46
|
localEmbedderPromise.catch(() => {
|
|
23
47
|
localEmbedderPromise = undefined;
|
|
48
|
+
localEmbedderModelName = undefined;
|
|
24
49
|
});
|
|
25
50
|
}
|
|
26
51
|
return localEmbedderPromise;
|
|
27
52
|
}
|
|
28
|
-
|
|
29
|
-
|
|
53
|
+
export function resetLocalEmbedder() {
|
|
54
|
+
localEmbedderPromise = undefined;
|
|
55
|
+
localEmbedderModelName = undefined;
|
|
56
|
+
}
|
|
57
|
+
async function embedLocal(text, modelName) {
|
|
58
|
+
const model = await getLocalEmbedder(modelName);
|
|
30
59
|
const result = await model(text, { pooling: "mean", normalize: true });
|
|
31
60
|
return Array.from(result.data);
|
|
32
61
|
}
|
|
@@ -71,17 +100,68 @@ async function embedRemote(text, config) {
|
|
|
71
100
|
}
|
|
72
101
|
return l2Normalize(json.data[0].embedding);
|
|
73
102
|
}
|
|
103
|
+
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
104
|
+
/** Check whether an EmbeddingConnectionConfig has a valid remote endpoint. */
|
|
105
|
+
function hasRemoteEndpoint(config) {
|
|
106
|
+
return isHttpUrl(config.endpoint);
|
|
107
|
+
}
|
|
108
|
+
// ── LRU embedding cache ─────────────────────────────────────────────────────
|
|
109
|
+
// Caches query embeddings to avoid redundant computation for repeated queries.
|
|
110
|
+
// Uses a simple Map with LRU eviction (delete + re-insert to move to end).
|
|
111
|
+
const EMBED_CACHE_MAX = 100;
|
|
112
|
+
const embedCache = new Map();
|
|
113
|
+
/**
|
|
114
|
+
* Build a cache key from query text and optional config.
|
|
115
|
+
* Different endpoints/models should not share cached embeddings.
|
|
116
|
+
* apiKey deliberately excluded: same endpoint+model produce identical embeddings regardless of auth
|
|
117
|
+
*/
|
|
118
|
+
function embedCacheKey(text, config) {
|
|
119
|
+
if (!config)
|
|
120
|
+
return `local::${text}`;
|
|
121
|
+
const endpoint = config.endpoint || "";
|
|
122
|
+
const model = config.model || config.localModel || "";
|
|
123
|
+
return `${endpoint}:${model}:${text}`;
|
|
124
|
+
}
|
|
125
|
+
/**
|
|
126
|
+
* Clear the embedding cache. Call when the embedding model changes
|
|
127
|
+
* or when you want to force fresh embeddings.
|
|
128
|
+
*/
|
|
129
|
+
export function clearEmbeddingCache() {
|
|
130
|
+
embedCache.clear();
|
|
131
|
+
}
|
|
74
132
|
// ── Public API ──────────────────────────────────────────────────────────────
|
|
75
133
|
/**
|
|
76
134
|
* Generate an embedding for the given text.
|
|
77
|
-
* If embeddingConfig
|
|
78
|
-
* Otherwise falls back to local @
|
|
135
|
+
* If embeddingConfig has a remote endpoint, uses the configured OpenAI-compatible endpoint.
|
|
136
|
+
* Otherwise falls back to local @huggingface/transformers using the model from
|
|
137
|
+
* `embeddingConfig.localModel` or `DEFAULT_LOCAL_MODEL`.
|
|
138
|
+
*
|
|
139
|
+
* Results are cached in an LRU cache (max ~100 entries) keyed by query text
|
|
140
|
+
* and embedding config. Repeated identical queries return the cached vector.
|
|
79
141
|
*/
|
|
80
142
|
export async function embed(text, embeddingConfig) {
|
|
81
|
-
|
|
82
|
-
|
|
143
|
+
const key = embedCacheKey(text, embeddingConfig);
|
|
144
|
+
// Check cache first
|
|
145
|
+
const cached = embedCache.get(key);
|
|
146
|
+
if (cached) {
|
|
147
|
+
// Move to end (most recently used) for LRU ordering
|
|
148
|
+
embedCache.delete(key);
|
|
149
|
+
embedCache.set(key, cached);
|
|
150
|
+
return cached;
|
|
83
151
|
}
|
|
84
|
-
|
|
152
|
+
// Compute the embedding
|
|
153
|
+
const result = embeddingConfig && hasRemoteEndpoint(embeddingConfig)
|
|
154
|
+
? await embedRemote(text, embeddingConfig)
|
|
155
|
+
: await embedLocal(text, embeddingConfig?.localModel);
|
|
156
|
+
// Evict oldest entry if at capacity
|
|
157
|
+
if (embedCache.size >= EMBED_CACHE_MAX) {
|
|
158
|
+
const oldest = embedCache.keys().next().value;
|
|
159
|
+
if (oldest !== undefined) {
|
|
160
|
+
embedCache.delete(oldest);
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
embedCache.set(key, result);
|
|
164
|
+
return result;
|
|
85
165
|
}
|
|
86
166
|
// ── Batch embedding ─────────────────────────────────────────────────────────
|
|
87
167
|
/**
|
|
@@ -92,13 +172,14 @@ export async function embed(text, embeddingConfig) {
|
|
|
92
172
|
export async function embedBatch(texts, embeddingConfig) {
|
|
93
173
|
if (texts.length === 0)
|
|
94
174
|
return [];
|
|
95
|
-
if (embeddingConfig) {
|
|
175
|
+
if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
|
|
96
176
|
return embedRemoteBatch(texts, embeddingConfig);
|
|
97
177
|
}
|
|
98
178
|
// Local transformer: process sequentially (pipeline handles one at a time)
|
|
179
|
+
const localModel = embeddingConfig?.localModel;
|
|
99
180
|
const results = [];
|
|
100
181
|
for (const text of texts) {
|
|
101
|
-
results.push(await embedLocal(text));
|
|
182
|
+
results.push(await embedLocal(text, localModel));
|
|
102
183
|
}
|
|
103
184
|
return results;
|
|
104
185
|
}
|
|
@@ -163,21 +244,57 @@ export function cosineSimilarity(a, b) {
|
|
|
163
244
|
return denom === 0 ? 0 : dot / denom;
|
|
164
245
|
}
|
|
165
246
|
// ── Availability check ──────────────────────────────────────────────────────
|
|
166
|
-
|
|
167
|
-
|
|
247
|
+
/**
|
|
248
|
+
* Check whether the `@huggingface/transformers` package can be imported.
|
|
249
|
+
* Returns `true` if it can, `false` otherwise.
|
|
250
|
+
*/
|
|
251
|
+
export async function isTransformersAvailable() {
|
|
252
|
+
try {
|
|
253
|
+
await import("@huggingface/transformers");
|
|
254
|
+
return true;
|
|
255
|
+
}
|
|
256
|
+
catch {
|
|
257
|
+
return false;
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
/**
|
|
261
|
+
* Check whether embedding is available with a detailed reason on failure.
|
|
262
|
+
*/
|
|
263
|
+
export async function checkEmbeddingAvailability(embeddingConfig) {
|
|
264
|
+
if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
|
|
168
265
|
try {
|
|
169
266
|
await embedRemote("test", embeddingConfig);
|
|
170
|
-
return true;
|
|
267
|
+
return { available: true };
|
|
171
268
|
}
|
|
172
|
-
catch {
|
|
173
|
-
return
|
|
269
|
+
catch (err) {
|
|
270
|
+
return {
|
|
271
|
+
available: false,
|
|
272
|
+
reason: "remote-unreachable",
|
|
273
|
+
message: err instanceof Error ? err.message : String(err),
|
|
274
|
+
};
|
|
174
275
|
}
|
|
175
276
|
}
|
|
277
|
+
// Check if the package is importable before attempting the model download.
|
|
278
|
+
if (!(await isTransformersAvailable())) {
|
|
279
|
+
return {
|
|
280
|
+
available: false,
|
|
281
|
+
reason: "missing-package",
|
|
282
|
+
message: "@huggingface/transformers is not installed.",
|
|
283
|
+
};
|
|
284
|
+
}
|
|
176
285
|
try {
|
|
177
|
-
await getLocalEmbedder();
|
|
178
|
-
return true;
|
|
286
|
+
await getLocalEmbedder(embeddingConfig?.localModel);
|
|
287
|
+
return { available: true };
|
|
179
288
|
}
|
|
180
|
-
catch {
|
|
181
|
-
return
|
|
289
|
+
catch (err) {
|
|
290
|
+
return {
|
|
291
|
+
available: false,
|
|
292
|
+
reason: "model-download-failed",
|
|
293
|
+
message: err instanceof Error ? err.message : String(err),
|
|
294
|
+
};
|
|
182
295
|
}
|
|
183
296
|
}
|
|
297
|
+
export async function isEmbeddingAvailable(embeddingConfig) {
|
|
298
|
+
const result = await checkEmbeddingAvailability(embeddingConfig);
|
|
299
|
+
return result.available;
|
|
300
|
+
}
|
package/dist/file-context.js
CHANGED
|
@@ -69,6 +69,9 @@ const matchers = [];
|
|
|
69
69
|
/** Renderer lookup by name. */
|
|
70
70
|
const renderers = new Map();
|
|
71
71
|
let builtinsPromise;
|
|
72
|
+
export function resetBuiltinsCache() {
|
|
73
|
+
builtinsPromise = undefined;
|
|
74
|
+
}
|
|
72
75
|
/**
|
|
73
76
|
* Ensure that built-in matchers and renderers are registered.
|
|
74
77
|
* Called lazily on first use of runMatchers/getRenderer.
|