akm-cli 0.0.20 → 0.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -5
- package/dist/asset-spec.js +96 -9
- package/dist/cli.js +195 -55
- package/dist/common.js +15 -2
- package/dist/config-cli.js +65 -6
- package/dist/config.js +206 -22
- package/dist/create-provider-registry.js +18 -0
- package/dist/db.js +156 -53
- package/dist/embedder.js +36 -18
- package/dist/errors.js +6 -0
- package/dist/file-context.js +18 -19
- package/dist/frontmatter.js +19 -3
- package/dist/indexer.js +126 -89
- package/dist/{stash-registry.js → installed-kits.js} +16 -24
- package/dist/kit-include.js +108 -0
- package/dist/local-search.js +429 -0
- package/dist/lockfile.js +47 -5
- package/dist/matchers.js +6 -0
- package/dist/metadata.js +22 -16
- package/dist/paths.js +4 -0
- package/dist/providers/skills-sh.js +3 -2
- package/dist/providers/static-index.js +4 -9
- package/dist/registry-build-index.js +356 -0
- package/dist/registry-factory.js +19 -0
- package/dist/registry-install.js +114 -109
- package/dist/registry-resolve.js +44 -9
- package/dist/registry-search.js +14 -9
- package/dist/renderers.js +23 -7
- package/dist/ripgrep-install.js +9 -4
- package/dist/self-update.js +31 -4
- package/dist/stash-add.js +75 -6
- package/dist/stash-clone.js +1 -1
- package/dist/stash-provider-factory.js +52 -0
- package/dist/stash-provider.js +1 -0
- package/dist/stash-providers/filesystem.js +42 -0
- package/dist/stash-providers/index.js +9 -0
- package/dist/stash-providers/openviking.js +337 -0
- package/dist/stash-resolve.js +33 -3
- package/dist/stash-search.js +70 -402
- package/dist/stash-show.js +24 -5
- package/dist/stash-source.js +19 -11
- package/dist/walker.js +15 -10
- package/dist/warn.js +7 -0
- package/package.json +1 -1
- package/dist/provider-registry.js +0 -8
package/dist/db.js
CHANGED
|
@@ -46,7 +46,9 @@ export function isVecAvailable(db) {
|
|
|
46
46
|
}
|
|
47
47
|
const VEC_DOCS_URL = "https://github.com/itlackey/agentikit/blob/main/docs/configuration.md#sqlite-vec-extension";
|
|
48
48
|
const VEC_FALLBACK_THRESHOLD = 10_000;
|
|
49
|
-
|
|
49
|
+
// Per-database warning state: tracks which databases have already emitted the
|
|
50
|
+
// vec-missing warning so we don't spam on every openDatabase() call.
|
|
51
|
+
const vecInitWarnedDbs = new WeakSet();
|
|
50
52
|
/**
|
|
51
53
|
* Warn if sqlite-vec is unavailable and embedding count exceeds threshold.
|
|
52
54
|
* Called from openDatabase (once at init) and from indexer (each run).
|
|
@@ -54,7 +56,7 @@ let vecInitWarned = false;
|
|
|
54
56
|
export function warnIfVecMissing(db, { once } = { once: false }) {
|
|
55
57
|
if (isVecAvailable(db))
|
|
56
58
|
return;
|
|
57
|
-
if (once &&
|
|
59
|
+
if (once && vecInitWarnedDbs.has(db))
|
|
58
60
|
return;
|
|
59
61
|
try {
|
|
60
62
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM embeddings").get();
|
|
@@ -62,7 +64,7 @@ export function warnIfVecMissing(db, { once } = { once: false }) {
|
|
|
62
64
|
if (count >= VEC_FALLBACK_THRESHOLD) {
|
|
63
65
|
warn("Semantic search is using JS fallback for %d entries. Install sqlite-vec for faster performance.\n See: %s", count, VEC_DOCS_URL);
|
|
64
66
|
if (once)
|
|
65
|
-
|
|
67
|
+
vecInitWarnedDbs.add(db);
|
|
66
68
|
}
|
|
67
69
|
}
|
|
68
70
|
catch {
|
|
@@ -104,6 +106,12 @@ function ensureSchema(db, embeddingDim) {
|
|
|
104
106
|
CREATE INDEX IF NOT EXISTS idx_entries_dir ON entries(dir_path);
|
|
105
107
|
CREATE INDEX IF NOT EXISTS idx_entries_type ON entries(entry_type);
|
|
106
108
|
`);
|
|
109
|
+
// Set version immediately after table creation so a crash before the end of
|
|
110
|
+
// ensureSchema() does not leave the database in a versionless state on next open.
|
|
111
|
+
const versionAfterCreate = getMeta(db, "version");
|
|
112
|
+
if (!versionAfterCreate) {
|
|
113
|
+
setMeta(db, "version", String(DB_VERSION));
|
|
114
|
+
}
|
|
107
115
|
// BLOB-based embedding storage (always available, no sqlite-vec needed)
|
|
108
116
|
db.exec(`
|
|
109
117
|
CREATE TABLE IF NOT EXISTS embeddings (
|
|
@@ -134,9 +142,20 @@ function ensureSchema(db, embeddingDim) {
|
|
|
134
142
|
catch {
|
|
135
143
|
/* ignore */
|
|
136
144
|
}
|
|
145
|
+
// CR-2: Delete stale BLOB embeddings so they don't produce silently wrong
|
|
146
|
+
// similarity scores against the new-dimension vec table.
|
|
147
|
+
try {
|
|
148
|
+
db.exec("DELETE FROM embeddings");
|
|
149
|
+
}
|
|
150
|
+
catch {
|
|
151
|
+
/* ignore */
|
|
152
|
+
}
|
|
137
153
|
}
|
|
138
154
|
const vecExists = db.prepare("SELECT name FROM sqlite_master WHERE type='table' AND name='entries_vec'").get();
|
|
139
155
|
if (!vecExists) {
|
|
156
|
+
if (!Number.isInteger(embeddingDim) || embeddingDim <= 0 || embeddingDim > 4096) {
|
|
157
|
+
throw new Error(`Invalid embedding dimension: ${embeddingDim}`);
|
|
158
|
+
}
|
|
140
159
|
db.exec(`
|
|
141
160
|
CREATE VIRTUAL TABLE entries_vec USING vec0(
|
|
142
161
|
id INTEGER PRIMARY KEY,
|
|
@@ -146,11 +165,6 @@ function ensureSchema(db, embeddingDim) {
|
|
|
146
165
|
}
|
|
147
166
|
setMeta(db, "embeddingDim", String(embeddingDim));
|
|
148
167
|
}
|
|
149
|
-
// Set version if not present
|
|
150
|
-
const version = getMeta(db, "version");
|
|
151
|
-
if (!version) {
|
|
152
|
-
setMeta(db, "version", String(DB_VERSION));
|
|
153
|
-
}
|
|
154
168
|
}
|
|
155
169
|
// ── Meta helpers ────────────────────────────────────────────────────────────
|
|
156
170
|
export function getMeta(db, key) {
|
|
@@ -161,6 +175,13 @@ export function setMeta(db, key, value) {
|
|
|
161
175
|
db.prepare("INSERT OR REPLACE INTO index_meta (key, value) VALUES (?, ?)").run(key, value);
|
|
162
176
|
}
|
|
163
177
|
// ── Entry operations ────────────────────────────────────────────────────────
|
|
178
|
+
/**
|
|
179
|
+
* Insert or update an entry in the `entries` table. Returns the row id.
|
|
180
|
+
*
|
|
181
|
+
* **Important:** This does not update the FTS index. Callers must call
|
|
182
|
+
* `rebuildFts()` after all upserts are complete for full-text search to
|
|
183
|
+
* reflect the changes.
|
|
184
|
+
*/
|
|
164
185
|
export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, searchText) {
|
|
165
186
|
const stmt = db.prepare(`
|
|
166
187
|
INSERT INTO entries (entry_key, dir_path, file_path, stash_dir, entry_json, search_text, entry_type)
|
|
@@ -176,29 +197,50 @@ export function upsertEntry(db, entryKey, dirPath, filePath, stashDir, entry, se
|
|
|
176
197
|
stmt.run(entryKey, dirPath, filePath, stashDir, JSON.stringify(entry), searchText, entry.type);
|
|
177
198
|
// Fetch the row id explicitly since last_insert_rowid() is unreliable for ON CONFLICT DO UPDATE
|
|
178
199
|
const row = db.prepare("SELECT id FROM entries WHERE entry_key = ?").get(entryKey);
|
|
200
|
+
if (!row)
|
|
201
|
+
throw new Error("upsertEntry: entry_key not found after upsert");
|
|
179
202
|
return row.id;
|
|
180
203
|
}
|
|
181
204
|
export function deleteEntriesByDir(db, dirPath) {
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
205
|
+
db.transaction(() => {
|
|
206
|
+
const ids = db.prepare("SELECT id FROM entries WHERE dir_path = ?").all(dirPath);
|
|
207
|
+
deleteRelatedRows(db, ids);
|
|
208
|
+
db.prepare("DELETE FROM entries WHERE dir_path = ?").run(dirPath);
|
|
209
|
+
})();
|
|
185
210
|
}
|
|
186
211
|
export function deleteEntriesByStashDir(db, stashDir) {
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
212
|
+
db.transaction(() => {
|
|
213
|
+
const ids = db.prepare("SELECT id FROM entries WHERE stash_dir = ?").all(stashDir);
|
|
214
|
+
deleteRelatedRows(db, ids);
|
|
215
|
+
db.prepare("DELETE FROM entries WHERE stash_dir = ?").run(stashDir);
|
|
216
|
+
})();
|
|
190
217
|
}
|
|
218
|
+
const SQLITE_CHUNK_SIZE = 500;
|
|
191
219
|
function deleteRelatedRows(db, ids) {
|
|
192
|
-
|
|
220
|
+
if (ids.length === 0)
|
|
221
|
+
return;
|
|
222
|
+
const numericIds = ids.map((r) => r.id);
|
|
223
|
+
const vecAvail = isVecAvailable(db);
|
|
224
|
+
// Process in chunks to stay within SQLITE_MAX_VARIABLE_NUMBER
|
|
225
|
+
for (let i = 0; i < numericIds.length; i += SQLITE_CHUNK_SIZE) {
|
|
226
|
+
const chunk = numericIds.slice(i, i + SQLITE_CHUNK_SIZE);
|
|
227
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
193
228
|
try {
|
|
194
|
-
db.prepare(
|
|
229
|
+
db.prepare(`DELETE FROM embeddings WHERE id IN (${placeholders})`).run(...chunk);
|
|
195
230
|
}
|
|
196
231
|
catch {
|
|
197
232
|
/* ignore */
|
|
198
233
|
}
|
|
199
|
-
|
|
234
|
+
// HI-1: Also delete from FTS table so orphaned FTS rows don't remain
|
|
235
|
+
try {
|
|
236
|
+
db.prepare(`DELETE FROM entries_fts WHERE entry_id IN (${placeholders})`).run(...chunk);
|
|
237
|
+
}
|
|
238
|
+
catch {
|
|
239
|
+
/* ignore */
|
|
240
|
+
}
|
|
241
|
+
if (vecAvail) {
|
|
200
242
|
try {
|
|
201
|
-
db.prepare(
|
|
243
|
+
db.prepare(`DELETE FROM entries_vec WHERE id IN (${placeholders})`).run(...chunk);
|
|
202
244
|
}
|
|
203
245
|
catch {
|
|
204
246
|
/* ignore */
|
|
@@ -207,8 +249,14 @@ function deleteRelatedRows(db, ids) {
|
|
|
207
249
|
}
|
|
208
250
|
}
|
|
209
251
|
export function rebuildFts(db) {
|
|
210
|
-
|
|
211
|
-
|
|
252
|
+
// CR-1: Wrap DELETE + INSERT in a single transaction so the FTS table is
|
|
253
|
+
// never left empty between the two statements if a crash occurs.
|
|
254
|
+
// HI-14: Store the integer id directly (FTS5 stores all content as text
|
|
255
|
+
// internally; the join in searchFts compares numerically without CAST).
|
|
256
|
+
db.transaction(() => {
|
|
257
|
+
db.exec("DELETE FROM entries_fts");
|
|
258
|
+
db.exec("INSERT INTO entries_fts (entry_id, search_text) SELECT id, search_text FROM entries");
|
|
259
|
+
})();
|
|
212
260
|
}
|
|
213
261
|
// ── Vector operations ───────────────────────────────────────────────────────
|
|
214
262
|
export function upsertEmbedding(db, entryId, embedding) {
|
|
@@ -235,7 +283,9 @@ export function searchVec(db, queryEmbedding, k) {
|
|
|
235
283
|
.prepare("SELECT id, distance FROM entries_vec WHERE embedding MATCH ? AND k = ?")
|
|
236
284
|
.all(buf, k);
|
|
237
285
|
}
|
|
238
|
-
catch {
|
|
286
|
+
catch (err) {
|
|
287
|
+
// MD-5: Log the failure so it's visible in diagnostics
|
|
288
|
+
console.warn("[db] searchVec (sqlite-vec path) failed:", err instanceof Error ? err.message : String(err));
|
|
239
289
|
return [];
|
|
240
290
|
}
|
|
241
291
|
}
|
|
@@ -269,7 +319,9 @@ function searchBlobVec(db, queryEmbedding, k) {
|
|
|
269
319
|
distance: Math.sqrt(2 * Math.max(0, 1 - similarity)),
|
|
270
320
|
}));
|
|
271
321
|
}
|
|
272
|
-
catch {
|
|
322
|
+
catch (err) {
|
|
323
|
+
// MD-5: Log the failure so it's visible in diagnostics
|
|
324
|
+
console.warn("[db] searchBlobVec (JS fallback) failed:", err instanceof Error ? err.message : String(err));
|
|
273
325
|
return [];
|
|
274
326
|
}
|
|
275
327
|
}
|
|
@@ -280,12 +332,13 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
280
332
|
return [];
|
|
281
333
|
let sql;
|
|
282
334
|
let params;
|
|
335
|
+
// HI-14: Join on integer entry_id directly (no CAST needed; we store integer)
|
|
283
336
|
if (entryType && entryType !== "any") {
|
|
284
337
|
sql = `
|
|
285
338
|
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
286
339
|
bm25(entries_fts) AS bm25Score
|
|
287
340
|
FROM entries_fts f
|
|
288
|
-
JOIN entries e ON e.id =
|
|
341
|
+
JOIN entries e ON e.id = f.entry_id
|
|
289
342
|
WHERE entries_fts MATCH ?
|
|
290
343
|
AND e.entry_type = ?
|
|
291
344
|
ORDER BY bm25Score
|
|
@@ -298,7 +351,7 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
298
351
|
SELECT e.id, e.file_path AS filePath, e.entry_json, e.search_text AS searchText,
|
|
299
352
|
bm25(entries_fts) AS bm25Score
|
|
300
353
|
FROM entries_fts f
|
|
301
|
-
JOIN entries e ON e.id =
|
|
354
|
+
JOIN entries e ON e.id = f.entry_id
|
|
302
355
|
WHERE entries_fts MATCH ?
|
|
303
356
|
ORDER BY bm25Score
|
|
304
357
|
LIMIT ?
|
|
@@ -307,13 +360,26 @@ export function searchFts(db, query, limit, entryType) {
|
|
|
307
360
|
}
|
|
308
361
|
try {
|
|
309
362
|
const rows = db.prepare(sql).all(...params);
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
entry
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
363
|
+
// CR-6: Guard against corrupt JSON — skip the row rather than crashing
|
|
364
|
+
const results = [];
|
|
365
|
+
for (const row of rows) {
|
|
366
|
+
let entry;
|
|
367
|
+
try {
|
|
368
|
+
entry = JSON.parse(row.entry_json);
|
|
369
|
+
}
|
|
370
|
+
catch {
|
|
371
|
+
console.warn(`[db] searchFts: skipping entry id=${row.id} — corrupt entry_json`);
|
|
372
|
+
continue;
|
|
373
|
+
}
|
|
374
|
+
results.push({
|
|
375
|
+
id: row.id,
|
|
376
|
+
filePath: row.filePath,
|
|
377
|
+
entry,
|
|
378
|
+
searchText: row.searchText,
|
|
379
|
+
bm25Score: row.bm25Score,
|
|
380
|
+
});
|
|
381
|
+
}
|
|
382
|
+
return results;
|
|
317
383
|
}
|
|
318
384
|
catch {
|
|
319
385
|
return [];
|
|
@@ -323,11 +389,13 @@ function sanitizeFtsQuery(query) {
|
|
|
323
389
|
const tokens = query
|
|
324
390
|
.replace(/[^a-zA-Z0-9\s]/g, " ")
|
|
325
391
|
.split(/\s+/)
|
|
326
|
-
.filter((t) => t.length >=
|
|
392
|
+
.filter((t) => t.length >= 2);
|
|
327
393
|
if (tokens.length === 0)
|
|
328
394
|
return "";
|
|
329
|
-
// Use
|
|
330
|
-
|
|
395
|
+
// MD-1: Use OR so that any matching token returns results (better recall for
|
|
396
|
+
// exploratory search). Use unquoted tokens so the porter stemmer can
|
|
397
|
+
// normalize word forms.
|
|
398
|
+
return tokens.join(" OR ");
|
|
331
399
|
}
|
|
332
400
|
// ── All entries ─────────────────────────────────────────────────────────────
|
|
333
401
|
export function getAllEntries(db, entryType) {
|
|
@@ -343,15 +411,28 @@ export function getAllEntries(db, entryType) {
|
|
|
343
411
|
params = [];
|
|
344
412
|
}
|
|
345
413
|
const rows = db.prepare(sql).all(...params);
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
414
|
+
// CR-6: Guard against corrupt JSON — skip the row rather than crashing
|
|
415
|
+
const entries = [];
|
|
416
|
+
for (const row of rows) {
|
|
417
|
+
let entry;
|
|
418
|
+
try {
|
|
419
|
+
entry = JSON.parse(row.entry_json);
|
|
420
|
+
}
|
|
421
|
+
catch {
|
|
422
|
+
console.warn(`[db] getAllEntries: skipping entry id=${row.id} — corrupt entry_json`);
|
|
423
|
+
continue;
|
|
424
|
+
}
|
|
425
|
+
entries.push({
|
|
426
|
+
id: row.id,
|
|
427
|
+
entryKey: row.entry_key,
|
|
428
|
+
dirPath: row.dir_path,
|
|
429
|
+
filePath: row.file_path,
|
|
430
|
+
stashDir: row.stash_dir,
|
|
431
|
+
entry,
|
|
432
|
+
searchText: row.search_text,
|
|
433
|
+
});
|
|
434
|
+
}
|
|
435
|
+
return entries;
|
|
355
436
|
}
|
|
356
437
|
export function getEntryCount(db) {
|
|
357
438
|
const row = db.prepare("SELECT COUNT(*) AS cnt FROM entries").get();
|
|
@@ -361,19 +442,41 @@ export function getEntryById(db, id) {
|
|
|
361
442
|
const row = db.prepare("SELECT file_path, entry_json FROM entries WHERE id = ?").get(id);
|
|
362
443
|
if (!row)
|
|
363
444
|
return undefined;
|
|
364
|
-
|
|
445
|
+
// CR-6: Guard against corrupt JSON
|
|
446
|
+
let entry;
|
|
447
|
+
try {
|
|
448
|
+
entry = JSON.parse(row.entry_json);
|
|
449
|
+
}
|
|
450
|
+
catch {
|
|
451
|
+
console.warn(`[db] getEntryById: skipping entry id=${id} — corrupt entry_json`);
|
|
452
|
+
return undefined;
|
|
453
|
+
}
|
|
454
|
+
return { filePath: row.file_path, entry };
|
|
365
455
|
}
|
|
366
456
|
export function getEntriesByDir(db, dirPath) {
|
|
367
457
|
const rows = db
|
|
368
458
|
.prepare("SELECT id, entry_key, dir_path, file_path, stash_dir, entry_json, search_text FROM entries WHERE dir_path = ?")
|
|
369
459
|
.all(dirPath);
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
460
|
+
// CR-6: Guard against corrupt JSON — skip the row rather than crashing
|
|
461
|
+
const entries = [];
|
|
462
|
+
for (const row of rows) {
|
|
463
|
+
let entry;
|
|
464
|
+
try {
|
|
465
|
+
entry = JSON.parse(row.entry_json);
|
|
466
|
+
}
|
|
467
|
+
catch {
|
|
468
|
+
console.warn(`[db] getEntriesByDir: skipping entry id=${row.id} — corrupt entry_json`);
|
|
469
|
+
continue;
|
|
470
|
+
}
|
|
471
|
+
entries.push({
|
|
472
|
+
id: row.id,
|
|
473
|
+
entryKey: row.entry_key,
|
|
474
|
+
dirPath: row.dir_path,
|
|
475
|
+
filePath: row.file_path,
|
|
476
|
+
stashDir: row.stash_dir,
|
|
477
|
+
entry,
|
|
478
|
+
searchText: row.search_text,
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
return entries;
|
|
379
482
|
}
|
package/dist/embedder.js
CHANGED
|
@@ -1,22 +1,29 @@
|
|
|
1
1
|
import { fetchWithTimeout } from "./common";
|
|
2
|
-
|
|
2
|
+
import { warn } from "./warn";
|
|
3
|
+
// Cache the promise itself (not the resolved result) so concurrent calls share
|
|
4
|
+
// the same initialisation work and never download the model twice.
|
|
5
|
+
let localEmbedderPromise;
|
|
3
6
|
async function getLocalEmbedder() {
|
|
4
|
-
if (!
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
7
|
+
if (!localEmbedderPromise) {
|
|
8
|
+
localEmbedderPromise = (async () => {
|
|
9
|
+
let pipeline;
|
|
10
|
+
try {
|
|
11
|
+
const mod = await import("@xenova/transformers");
|
|
12
|
+
pipeline = mod.pipeline;
|
|
13
|
+
}
|
|
14
|
+
catch {
|
|
15
|
+
throw new Error("Semantic search requires @xenova/transformers. Install it with: npm install @xenova/transformers");
|
|
16
|
+
}
|
|
17
|
+
const pipelineFn = pipeline;
|
|
18
|
+
return pipelineFn("feature-extraction", "Xenova/all-MiniLM-L6-v2");
|
|
19
|
+
})();
|
|
20
|
+
// HI-13: Clear the cached promise on failure so the next call retries
|
|
21
|
+
// instead of permanently rejecting every subsequent call with the same error.
|
|
22
|
+
localEmbedderPromise.catch(() => {
|
|
23
|
+
localEmbedderPromise = undefined;
|
|
24
|
+
});
|
|
18
25
|
}
|
|
19
|
-
return
|
|
26
|
+
return localEmbedderPromise;
|
|
20
27
|
}
|
|
21
28
|
async function embedLocal(text) {
|
|
22
29
|
const model = await getLocalEmbedder();
|
|
@@ -111,13 +118,24 @@ async function embedRemoteBatch(texts, config) {
|
|
|
111
118
|
if (!json.data || json.data.length !== batch.length) {
|
|
112
119
|
throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}`);
|
|
113
120
|
}
|
|
114
|
-
|
|
121
|
+
for (const [idx, d] of json.data.entries()) {
|
|
122
|
+
if (!Array.isArray(d.embedding)) {
|
|
123
|
+
throw new Error(`Unexpected embedding at batch index ${idx}: missing or invalid`);
|
|
124
|
+
}
|
|
125
|
+
results.push(d.embedding);
|
|
126
|
+
}
|
|
115
127
|
}
|
|
116
128
|
return results;
|
|
117
129
|
}
|
|
118
130
|
// ── Similarity ──────────────────────────────────────────────────────────────
|
|
119
131
|
export function cosineSimilarity(a, b) {
|
|
120
|
-
|
|
132
|
+
if (a.length !== b.length) {
|
|
133
|
+
// MD-4: Return 0 on dimension mismatch rather than silently computing on a
|
|
134
|
+
// truncated view, which would produce meaningless similarity scores.
|
|
135
|
+
warn("cosineSimilarity: vector dimension mismatch (%d vs %d) — re-index recommended", a.length, b.length);
|
|
136
|
+
return 0;
|
|
137
|
+
}
|
|
138
|
+
const len = a.length;
|
|
121
139
|
if (len === 0)
|
|
122
140
|
return 0;
|
|
123
141
|
let dot = 0, magA = 0, magB = 0;
|
package/dist/errors.js
CHANGED
|
@@ -10,6 +10,8 @@ export class ConfigError extends Error {
|
|
|
10
10
|
constructor(msg) {
|
|
11
11
|
super(msg);
|
|
12
12
|
this.name = "ConfigError";
|
|
13
|
+
// Fixes `instanceof` checks under ES5 transpilation targets.
|
|
14
|
+
Object.setPrototypeOf(this, new.target.prototype);
|
|
13
15
|
}
|
|
14
16
|
}
|
|
15
17
|
/** Raised when the user supplies invalid arguments or input. */
|
|
@@ -17,6 +19,8 @@ export class UsageError extends Error {
|
|
|
17
19
|
constructor(msg) {
|
|
18
20
|
super(msg);
|
|
19
21
|
this.name = "UsageError";
|
|
22
|
+
// Fixes `instanceof` checks under ES5 transpilation targets.
|
|
23
|
+
Object.setPrototypeOf(this, new.target.prototype);
|
|
20
24
|
}
|
|
21
25
|
}
|
|
22
26
|
/** Raised when a requested resource (asset, entry, file) is not found. */
|
|
@@ -24,5 +28,7 @@ export class NotFoundError extends Error {
|
|
|
24
28
|
constructor(msg) {
|
|
25
29
|
super(msg);
|
|
26
30
|
this.name = "NotFoundError";
|
|
31
|
+
// Fixes `instanceof` checks under ES5 transpilation targets.
|
|
32
|
+
Object.setPrototypeOf(this, new.target.prototype);
|
|
27
33
|
}
|
|
28
34
|
}
|
package/dist/file-context.js
CHANGED
|
@@ -68,23 +68,22 @@ export function buildFileContext(stashRoot, absPath) {
|
|
|
68
68
|
const matchers = [];
|
|
69
69
|
/** Renderer lookup by name. */
|
|
70
70
|
const renderers = new Map();
|
|
71
|
-
let
|
|
71
|
+
let builtinsPromise;
|
|
72
72
|
/**
|
|
73
73
|
* Ensure that built-in matchers and renderers are registered.
|
|
74
74
|
* Called lazily on first use of runMatchers/getRenderer.
|
|
75
|
+
* Stores the in-progress promise so parallel callers don't double-register.
|
|
75
76
|
*/
|
|
76
|
-
function ensureBuiltinsRegistered() {
|
|
77
|
-
if (
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
registerBuiltinMatchers();
|
|
87
|
-
registerBuiltinRenderers();
|
|
77
|
+
async function ensureBuiltinsRegistered() {
|
|
78
|
+
if (!builtinsPromise) {
|
|
79
|
+
builtinsPromise = (async () => {
|
|
80
|
+
const { registerBuiltinMatchers } = await import("./matchers.js");
|
|
81
|
+
const { registerBuiltinRenderers } = await import("./renderers.js");
|
|
82
|
+
registerBuiltinMatchers();
|
|
83
|
+
registerBuiltinRenderers();
|
|
84
|
+
})();
|
|
85
|
+
}
|
|
86
|
+
return builtinsPromise;
|
|
88
87
|
}
|
|
89
88
|
/**
|
|
90
89
|
* Register an AssetMatcher.
|
|
@@ -106,15 +105,15 @@ export function registerRenderer(renderer) {
|
|
|
106
105
|
/**
|
|
107
106
|
* Look up a renderer by name.
|
|
108
107
|
*/
|
|
109
|
-
export function getRenderer(name) {
|
|
110
|
-
ensureBuiltinsRegistered();
|
|
108
|
+
export async function getRenderer(name) {
|
|
109
|
+
await ensureBuiltinsRegistered();
|
|
111
110
|
return renderers.get(name);
|
|
112
111
|
}
|
|
113
112
|
/**
|
|
114
113
|
* Return all registered renderers (snapshot, safe to iterate).
|
|
115
114
|
*/
|
|
116
|
-
export function getAllRenderers() {
|
|
117
|
-
ensureBuiltinsRegistered();
|
|
115
|
+
export async function getAllRenderers() {
|
|
116
|
+
await ensureBuiltinsRegistered();
|
|
118
117
|
return Array.from(renderers.values());
|
|
119
118
|
}
|
|
120
119
|
/**
|
|
@@ -128,8 +127,8 @@ export function getAllRenderers() {
|
|
|
128
127
|
* (this lets user-registered matchers override built-in ones).
|
|
129
128
|
* 4. Returns null when no matcher claims the file.
|
|
130
129
|
*/
|
|
131
|
-
export function runMatchers(ctx) {
|
|
132
|
-
ensureBuiltinsRegistered();
|
|
130
|
+
export async function runMatchers(ctx) {
|
|
131
|
+
await ensureBuiltinsRegistered();
|
|
133
132
|
// Collect (result, registrationIndex) pairs from all matchers.
|
|
134
133
|
const hits = [];
|
|
135
134
|
for (let i = 0; i < matchers.length; i++) {
|
package/dist/frontmatter.js
CHANGED
|
@@ -8,6 +8,16 @@
|
|
|
8
8
|
* Parse YAML-subset frontmatter from a Markdown (or similar) string.
|
|
9
9
|
*
|
|
10
10
|
* Returns the parsed key-value data and the remaining body content.
|
|
11
|
+
*
|
|
12
|
+
* **Limitations**: This is a hand-rolled YAML-subset parser with intentional
|
|
13
|
+
* constraints for simplicity and safety:
|
|
14
|
+
* - **No list support**: YAML block sequences (`- item`) and flow arrays
|
|
15
|
+
* (`[a, b, c]`) are silently ignored. List-valued frontmatter keys will
|
|
16
|
+
* produce an empty string or be skipped. Callers must NOT rely on list-
|
|
17
|
+
* valued frontmatter.
|
|
18
|
+
* - **No nested objects beyond one level**: Only a single level of indented
|
|
19
|
+
* key-value pairs is supported.
|
|
20
|
+
* - **Scalar values only**: string, boolean, and number scalars are supported.
|
|
11
21
|
*/
|
|
12
22
|
export function parseFrontmatter(raw) {
|
|
13
23
|
const parsedBlock = parseFrontmatterBlock(raw);
|
|
@@ -46,12 +56,18 @@ export function parseFrontmatter(raw) {
|
|
|
46
56
|
};
|
|
47
57
|
}
|
|
48
58
|
export function parseFrontmatterBlock(raw) {
|
|
49
|
-
|
|
59
|
+
// Handle both LF and CRLF line endings throughout.
|
|
60
|
+
// The closing --- may be preceded by \r\n; capture and strip trailing \r
|
|
61
|
+
// from the frontmatter block so key parsing sees clean LF-terminated lines.
|
|
62
|
+
const match = raw.match(/^---\r?\n([\s\S]*?)\r?\n---(?:\r\n|\r|\n|$)([\s\S]*)$/);
|
|
50
63
|
if (!match)
|
|
51
64
|
return null;
|
|
65
|
+
// Strip any \r characters from the frontmatter block to normalise CRLF → LF
|
|
66
|
+
const frontmatter = match[1].replace(/\r/g, "");
|
|
67
|
+
const content = match[2];
|
|
52
68
|
return {
|
|
53
|
-
frontmatter
|
|
54
|
-
content
|
|
69
|
+
frontmatter,
|
|
70
|
+
content,
|
|
55
71
|
bodyStartLine: countLines(raw.slice(0, match[0].length - match[2].length)) + 1,
|
|
56
72
|
};
|
|
57
73
|
}
|