@mirk/store 0.4.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/sqlite.d.ts +3 -1
- package/dist/adapters/sqlite.js +100 -54
- package/dist/chunk-6YJ66JFO.js +15 -0
- package/dist/chunk-DP4D7CJY.js +59 -0
- package/dist/chunk-KPMRRYTL.js +14 -0
- package/dist/{chunk-EDVHBRXG.js → chunk-OUY3D4RT.js} +45 -1
- package/dist/chunk-TY472NYD.js +91 -0
- package/dist/filter-B9hP-TKF.d.ts +8 -0
- package/dist/graph.d.ts +71 -0
- package/dist/graph.js +97 -0
- package/dist/index.d.ts +5 -2
- package/dist/index.js +18 -2
- package/dist/search.d.ts +25 -0
- package/dist/search.js +16 -0
- package/dist/sql.d.ts +27 -0
- package/dist/sql.js +14 -0
- package/dist/types-0B0Tw1fz.d.ts +49 -0
- package/dist/{types-BqSZEMAB.d.ts → types-B0XrD10b.d.ts} +28 -2
- package/dist/vector.d.ts +8 -2
- package/dist/vector.js +8 -2
- package/package.json +22 -10
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
2
|
import { b as SyncStore } from '../types-DyQLNtxa.js';
|
|
3
|
-
import { d as VectorStore } from '../types-
|
|
3
|
+
import { d as VectorStore } from '../types-B0XrD10b.js';
|
|
4
|
+
import { c as SearchStore } from '../types-0B0Tw1fz.js';
|
|
4
5
|
|
|
5
6
|
interface SqliteAdapterOptions {
|
|
6
7
|
/** Path to the SQLite database file. Use ":memory:" for in-memory. */
|
|
@@ -19,6 +20,7 @@ declare class SqliteAdapter {
|
|
|
19
20
|
private readonly db;
|
|
20
21
|
readonly kv: SyncStore;
|
|
21
22
|
readonly vector: VectorStore;
|
|
23
|
+
readonly search: SearchStore;
|
|
22
24
|
constructor(opts: SqliteAdapterOptions);
|
|
23
25
|
/** Close the underlying connection (shared by both facets). */
|
|
24
26
|
close(): void;
|
package/dist/adapters/sqlite.js
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
import {
|
|
2
|
+
sanitizeFtsQuery
|
|
3
|
+
} from "../chunk-6YJ66JFO.js";
|
|
4
|
+
import {
|
|
5
|
+
buildLimitOffset,
|
|
6
|
+
buildOrderBy,
|
|
7
|
+
buildWhereClause,
|
|
8
|
+
hashName
|
|
9
|
+
} from "../chunk-DP4D7CJY.js";
|
|
1
10
|
import {
|
|
2
11
|
assertDimensions,
|
|
3
12
|
bufferToVector,
|
|
@@ -5,6 +14,9 @@ import {
|
|
|
5
14
|
isUsableVector,
|
|
6
15
|
vectorToBuffer
|
|
7
16
|
} from "../chunk-BXM3YDOC.js";
|
|
17
|
+
import {
|
|
18
|
+
matchesWhere
|
|
19
|
+
} from "../chunk-KPMRRYTL.js";
|
|
8
20
|
|
|
9
21
|
// src/adapters/sqlite.ts
|
|
10
22
|
import Database from "better-sqlite3";
|
|
@@ -30,6 +42,7 @@ var SqliteAdapter = class {
|
|
|
30
42
|
db;
|
|
31
43
|
kv;
|
|
32
44
|
vector;
|
|
45
|
+
search;
|
|
33
46
|
constructor(opts) {
|
|
34
47
|
const ownsDb = opts.db === void 0;
|
|
35
48
|
this.db = opts.db ?? new Database(opts.path);
|
|
@@ -37,6 +50,7 @@ var SqliteAdapter = class {
|
|
|
37
50
|
this.db.pragma("journal_mode = WAL");
|
|
38
51
|
this.kv = new SqliteKvFacet(this.db);
|
|
39
52
|
this.vector = new SqliteVectorFacet(this.db, opts.path, opts.dimensions, opts.forceJsCosine);
|
|
53
|
+
this.search = new SqliteSearchFacet(this.db);
|
|
40
54
|
} catch (err) {
|
|
41
55
|
if (ownsDb) {
|
|
42
56
|
try {
|
|
@@ -263,6 +277,9 @@ var SqliteVectorFacet = class {
|
|
|
263
277
|
metadata: row.metadata === null ? void 0 : JSON.parse(row.metadata)
|
|
264
278
|
};
|
|
265
279
|
}
|
|
280
|
+
has(collection, id) {
|
|
281
|
+
return this.db.prepare(`SELECT 1 FROM vectors WHERE collection = ? AND id = ?`).get(collection, id) !== void 0;
|
|
282
|
+
}
|
|
266
283
|
remove(collection, id) {
|
|
267
284
|
if (this.accelerated) {
|
|
268
285
|
const row = this.db.prepare(`SELECT rowid FROM vectors WHERE collection = ? AND id = ?`).get(collection, id);
|
|
@@ -280,13 +297,14 @@ var SqliteVectorFacet = class {
|
|
|
280
297
|
this.requireDims(query);
|
|
281
298
|
const topK = opts?.topK ?? 10;
|
|
282
299
|
const minScore = opts?.minScore;
|
|
283
|
-
|
|
300
|
+
const hasFilters = !!(opts?.where || opts?.whereNot);
|
|
301
|
+
if (this.accelerated && isUsableVector(query) && !hasFilters) {
|
|
284
302
|
try {
|
|
285
303
|
return this.searchVec(collection, query, topK, minScore);
|
|
286
304
|
} catch {
|
|
287
305
|
}
|
|
288
306
|
}
|
|
289
|
-
return this.searchJs(collection, query, topK, minScore);
|
|
307
|
+
return this.searchJs(collection, query, topK, minScore, opts?.where, opts?.whereNot);
|
|
290
308
|
}
|
|
291
309
|
searchVec(collection, query, topK, minScore) {
|
|
292
310
|
const table = this.ensureVecTable(collection);
|
|
@@ -310,75 +328,103 @@ var SqliteVectorFacet = class {
|
|
|
310
328
|
out.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
|
|
311
329
|
return out;
|
|
312
330
|
}
|
|
313
|
-
searchJs(collection, query, topK, minScore) {
|
|
331
|
+
searchJs(collection, query, topK, minScore, where, whereNot) {
|
|
314
332
|
const rows = this.db.prepare(`SELECT id, vec, metadata FROM vectors WHERE collection = ?`).all(collection);
|
|
315
333
|
const scored = [];
|
|
316
334
|
for (const row of rows) {
|
|
335
|
+
const meta = row.metadata === null ? void 0 : JSON.parse(row.metadata);
|
|
336
|
+
if (where && !matchesWhere(meta, where)) continue;
|
|
337
|
+
if (whereNot && matchesWhere(meta, whereNot)) continue;
|
|
317
338
|
const vec = bufferToVector(row.vec);
|
|
318
339
|
if (!isUsableVector(vec)) continue;
|
|
319
340
|
const score = cosineSimilarity(query, vec);
|
|
320
341
|
if (!Number.isFinite(score)) continue;
|
|
321
342
|
if (minScore !== void 0 && score < minScore) continue;
|
|
322
|
-
scored.push({
|
|
323
|
-
id: row.id,
|
|
324
|
-
score,
|
|
325
|
-
metadata: row.metadata === null ? void 0 : JSON.parse(row.metadata)
|
|
326
|
-
});
|
|
343
|
+
scored.push({ id: row.id, score, metadata: meta });
|
|
327
344
|
}
|
|
328
345
|
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
|
|
329
346
|
return scored.slice(0, topK);
|
|
330
347
|
}
|
|
331
348
|
};
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
function buildWhereClause(filter) {
|
|
336
|
-
if (!filter?.where || Object.keys(filter.where).length === 0) {
|
|
337
|
-
return { clause: "", params: [] };
|
|
338
|
-
}
|
|
339
|
-
const conditions = [];
|
|
340
|
-
const params = [];
|
|
341
|
-
for (const [key, value] of Object.entries(filter.where)) {
|
|
342
|
-
const path = jsonPath(key);
|
|
343
|
-
if (value === null) {
|
|
344
|
-
conditions.push(`json_type(data, ?) = 'null'`);
|
|
345
|
-
params.push(path);
|
|
346
|
-
} else {
|
|
347
|
-
const bound = typeof value === "boolean" ? value ? 1 : 0 : value;
|
|
348
|
-
conditions.push(`json_extract(data, ?) = ?`);
|
|
349
|
-
params.push(path, bound);
|
|
350
|
-
}
|
|
349
|
+
var SqliteSearchFacet = class {
|
|
350
|
+
constructor(db) {
|
|
351
|
+
this.db = db;
|
|
351
352
|
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
const dir = filter.sortDir === "desc" ? "DESC" : "ASC";
|
|
357
|
-
const path = jsonPath(filter.sortBy);
|
|
358
|
-
return {
|
|
359
|
-
clause: ` ORDER BY json_extract(data, ?) IS NULL, json_extract(data, ?) ${dir}`,
|
|
360
|
-
params: [path, path]
|
|
361
|
-
};
|
|
362
|
-
}
|
|
363
|
-
function hashName(s) {
|
|
364
|
-
let h = 2166136261;
|
|
365
|
-
for (let i = 0; i < s.length; i++) {
|
|
366
|
-
h ^= s.charCodeAt(i);
|
|
367
|
-
h = Math.imul(h, 16777619);
|
|
353
|
+
db;
|
|
354
|
+
ensured = /* @__PURE__ */ new Set();
|
|
355
|
+
baseTable(collection) {
|
|
356
|
+
return `search_docs_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
368
357
|
}
|
|
369
|
-
|
|
370
|
-
}
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
358
|
+
ftsTable(collection) {
|
|
359
|
+
return `search_fts_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
360
|
+
}
|
|
361
|
+
ensure(collection) {
|
|
362
|
+
const docs = this.baseTable(collection);
|
|
363
|
+
const fts = this.ftsTable(collection);
|
|
364
|
+
if (this.ensured.has(docs)) return { docs, fts };
|
|
365
|
+
this.db.exec(`
|
|
366
|
+
CREATE TABLE IF NOT EXISTS ${docs} (
|
|
367
|
+
id TEXT PRIMARY KEY,
|
|
368
|
+
text TEXT NOT NULL,
|
|
369
|
+
meta_json TEXT
|
|
370
|
+
);
|
|
371
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS ${fts} USING fts5(
|
|
372
|
+
text, content='${docs}', content_rowid='rowid', tokenize='unicode61'
|
|
373
|
+
);
|
|
374
|
+
CREATE TRIGGER IF NOT EXISTS ${docs}_ai AFTER INSERT ON ${docs} BEGIN
|
|
375
|
+
INSERT INTO ${fts}(rowid, text) VALUES (new.rowid, new.text);
|
|
376
|
+
END;
|
|
377
|
+
CREATE TRIGGER IF NOT EXISTS ${docs}_ad AFTER DELETE ON ${docs} BEGIN
|
|
378
|
+
INSERT INTO ${fts}(${fts}, rowid, text) VALUES('delete', old.rowid, old.text);
|
|
379
|
+
END;
|
|
380
|
+
CREATE TRIGGER IF NOT EXISTS ${docs}_au AFTER UPDATE ON ${docs} BEGIN
|
|
381
|
+
INSERT INTO ${fts}(${fts}, rowid, text) VALUES('delete', old.rowid, old.text);
|
|
382
|
+
INSERT INTO ${fts}(rowid, text) VALUES (new.rowid, new.text);
|
|
383
|
+
END;
|
|
384
|
+
`);
|
|
385
|
+
this.ensured.add(docs);
|
|
386
|
+
return { docs, fts };
|
|
375
387
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
388
|
+
index(collection, doc) {
|
|
389
|
+
const { docs } = this.ensure(collection);
|
|
390
|
+
const metaJson = doc.meta === void 0 ? null : JSON.stringify(doc.meta);
|
|
391
|
+
this.db.prepare(
|
|
392
|
+
`INSERT INTO ${docs}(id, text, meta_json) VALUES (?, ?, ?)
|
|
393
|
+
ON CONFLICT(id) DO UPDATE SET text = excluded.text, meta_json = excluded.meta_json`
|
|
394
|
+
).run(doc.id, doc.text, metaJson);
|
|
379
395
|
}
|
|
380
|
-
|
|
381
|
-
|
|
396
|
+
indexMany(collection, docs) {
|
|
397
|
+
const tx = this.db.transaction((items) => {
|
|
398
|
+
for (const doc of items) this.index(collection, doc);
|
|
399
|
+
});
|
|
400
|
+
tx(docs);
|
|
401
|
+
}
|
|
402
|
+
remove(collection, id) {
|
|
403
|
+
const { docs } = this.ensure(collection);
|
|
404
|
+
return this.db.prepare(`DELETE FROM ${docs} WHERE id = ?`).run(id).changes > 0;
|
|
405
|
+
}
|
|
406
|
+
search(collection, query, opts) {
|
|
407
|
+
const { docs, fts } = this.ensure(collection);
|
|
408
|
+
const sanitized = sanitizeFtsQuery(query);
|
|
409
|
+
if (sanitized.length === 0) return [];
|
|
410
|
+
const rows = this.db.prepare(
|
|
411
|
+
`SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}) AS bm
|
|
412
|
+
FROM ${fts}
|
|
413
|
+
JOIN ${docs} d ON d.rowid = ${fts}.rowid
|
|
414
|
+
WHERE ${fts} MATCH ?
|
|
415
|
+
ORDER BY bm, d.id`
|
|
416
|
+
).all(sanitized);
|
|
417
|
+
const limit = opts?.limit ?? 10;
|
|
418
|
+
const where = opts?.filter?.where;
|
|
419
|
+
const out = [];
|
|
420
|
+
for (const r of rows) {
|
|
421
|
+
const meta = r.meta_json === null ? {} : JSON.parse(r.meta_json);
|
|
422
|
+
if (where && !matchesWhere(meta, where)) continue;
|
|
423
|
+
out.push({ id: r.id, score: -r.bm, meta });
|
|
424
|
+
}
|
|
425
|
+
return out.slice(0, limit);
|
|
426
|
+
}
|
|
427
|
+
};
|
|
382
428
|
export {
|
|
383
429
|
SqliteAdapter
|
|
384
430
|
};
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
// src/search/tokenize.ts
|
|
2
|
+
function tokenize(text) {
|
|
3
|
+
if (!text) return [];
|
|
4
|
+
return text.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? [];
|
|
5
|
+
}
|
|
6
|
+
function sanitizeFtsQuery(q) {
|
|
7
|
+
const tokens = tokenize(q);
|
|
8
|
+
if (tokens.length === 0) return "";
|
|
9
|
+
return tokens.map((tok) => `"${tok.replace(/"/g, '""')}"`).join(" OR ");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export {
|
|
13
|
+
tokenize,
|
|
14
|
+
sanitizeFtsQuery
|
|
15
|
+
};
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
// src/sql.ts
|
|
2
|
+
function jsonPath(field) {
|
|
3
|
+
return `$."${field.replace(/"/g, '""')}"`;
|
|
4
|
+
}
|
|
5
|
+
function buildWhereClause(filter) {
|
|
6
|
+
if (!filter?.where || Object.keys(filter.where).length === 0) {
|
|
7
|
+
return { clause: "", params: [] };
|
|
8
|
+
}
|
|
9
|
+
const conditions = [];
|
|
10
|
+
const params = [];
|
|
11
|
+
for (const [key, value] of Object.entries(filter.where)) {
|
|
12
|
+
const path = jsonPath(key);
|
|
13
|
+
if (value === null) {
|
|
14
|
+
conditions.push(`json_type(data, ?) = 'null'`);
|
|
15
|
+
params.push(path);
|
|
16
|
+
} else {
|
|
17
|
+
const bound = typeof value === "boolean" ? value ? 1 : 0 : value;
|
|
18
|
+
conditions.push(`json_extract(data, ?) = ?`);
|
|
19
|
+
params.push(path, bound);
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
return { clause: ` WHERE ${conditions.join(" AND ")}`, params };
|
|
23
|
+
}
|
|
24
|
+
function buildOrderBy(filter) {
|
|
25
|
+
if (!filter?.sortBy) return { clause: "", params: [] };
|
|
26
|
+
const dir = filter.sortDir === "desc" ? "DESC" : "ASC";
|
|
27
|
+
const path = jsonPath(filter.sortBy);
|
|
28
|
+
return {
|
|
29
|
+
clause: ` ORDER BY json_extract(data, ?) IS NULL, json_extract(data, ?) ${dir}`,
|
|
30
|
+
params: [path, path]
|
|
31
|
+
};
|
|
32
|
+
}
|
|
33
|
+
function buildLimitOffset(filter) {
|
|
34
|
+
let sql = "";
|
|
35
|
+
if (filter?.limit !== void 0) {
|
|
36
|
+
sql += ` LIMIT ${Math.max(0, Math.floor(filter.limit))}`;
|
|
37
|
+
}
|
|
38
|
+
if (filter?.offset !== void 0 && filter.offset > 0) {
|
|
39
|
+
if (!sql.includes("LIMIT")) sql += " LIMIT -1";
|
|
40
|
+
sql += ` OFFSET ${Math.max(0, Math.floor(filter.offset))}`;
|
|
41
|
+
}
|
|
42
|
+
return sql;
|
|
43
|
+
}
|
|
44
|
+
function hashName(s) {
|
|
45
|
+
let h = 2166136261;
|
|
46
|
+
for (let i = 0; i < s.length; i++) {
|
|
47
|
+
h ^= s.charCodeAt(i);
|
|
48
|
+
h = Math.imul(h, 16777619);
|
|
49
|
+
}
|
|
50
|
+
return (h >>> 0).toString(36);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export {
|
|
54
|
+
jsonPath,
|
|
55
|
+
buildWhereClause,
|
|
56
|
+
buildOrderBy,
|
|
57
|
+
buildLimitOffset,
|
|
58
|
+
hashName
|
|
59
|
+
};
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// src/vector/filter.ts
|
|
2
|
+
function matchesWhere(metadata, filter) {
|
|
3
|
+
if (!metadata) return false;
|
|
4
|
+
for (const [key, expected] of Object.entries(filter)) {
|
|
5
|
+
const actual = metadata[key];
|
|
6
|
+
if (actual === expected) continue;
|
|
7
|
+
if (JSON.stringify(actual) !== JSON.stringify(expected)) return false;
|
|
8
|
+
}
|
|
9
|
+
return true;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export {
|
|
13
|
+
matchesWhere
|
|
14
|
+
};
|
|
@@ -3,6 +3,9 @@ import {
|
|
|
3
3
|
cosineSimilarity,
|
|
4
4
|
isUsableVector
|
|
5
5
|
} from "./chunk-BXM3YDOC.js";
|
|
6
|
+
import {
|
|
7
|
+
matchesWhere
|
|
8
|
+
} from "./chunk-KPMRRYTL.js";
|
|
6
9
|
|
|
7
10
|
// src/vector/memory.ts
|
|
8
11
|
var InMemoryVectorStore = class {
|
|
@@ -26,6 +29,9 @@ var InMemoryVectorStore = class {
|
|
|
26
29
|
const doc = this.collections.get(collection)?.get(id);
|
|
27
30
|
return doc ?? null;
|
|
28
31
|
}
|
|
32
|
+
has(collection, id) {
|
|
33
|
+
return this.collections.get(collection)?.has(id) ?? false;
|
|
34
|
+
}
|
|
29
35
|
remove(collection, id) {
|
|
30
36
|
return this.collections.get(collection)?.delete(id) ?? false;
|
|
31
37
|
}
|
|
@@ -40,6 +46,8 @@ var InMemoryVectorStore = class {
|
|
|
40
46
|
if (!coll) return [];
|
|
41
47
|
const scored = [];
|
|
42
48
|
for (const doc of coll.values()) {
|
|
49
|
+
if (opts?.where && !matchesWhere(doc.metadata, opts.where)) continue;
|
|
50
|
+
if (opts?.whereNot && matchesWhere(doc.metadata, opts.whereNot)) continue;
|
|
43
51
|
if (!isUsableVector(doc.vector)) continue;
|
|
44
52
|
const score = cosineSimilarity(query, doc.vector);
|
|
45
53
|
if (!Number.isFinite(score)) continue;
|
|
@@ -59,6 +67,42 @@ var InMemoryVectorStore = class {
|
|
|
59
67
|
}
|
|
60
68
|
};
|
|
61
69
|
|
|
70
|
+
// src/vector/to-async-vector.ts
|
|
71
|
+
var AsyncVectorStoreAdapter = class {
|
|
72
|
+
constructor(sync) {
|
|
73
|
+
this.sync = sync;
|
|
74
|
+
}
|
|
75
|
+
sync;
|
|
76
|
+
get meta() {
|
|
77
|
+
return this.sync.meta;
|
|
78
|
+
}
|
|
79
|
+
async upsert(collection, doc) {
|
|
80
|
+
this.sync.upsert(collection, doc);
|
|
81
|
+
}
|
|
82
|
+
async upsertMany(collection, docs) {
|
|
83
|
+
this.sync.upsertMany(collection, docs);
|
|
84
|
+
}
|
|
85
|
+
async get(collection, id) {
|
|
86
|
+
return this.sync.get(collection, id);
|
|
87
|
+
}
|
|
88
|
+
async has(collection, id) {
|
|
89
|
+
return this.sync.has(collection, id);
|
|
90
|
+
}
|
|
91
|
+
async remove(collection, id) {
|
|
92
|
+
return this.sync.remove(collection, id);
|
|
93
|
+
}
|
|
94
|
+
async count(collection) {
|
|
95
|
+
return this.sync.count(collection);
|
|
96
|
+
}
|
|
97
|
+
async search(collection, query, opts) {
|
|
98
|
+
return this.sync.search(collection, query, opts);
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
function toAsyncVector(store) {
|
|
102
|
+
return new AsyncVectorStoreAdapter(store);
|
|
103
|
+
}
|
|
104
|
+
|
|
62
105
|
export {
|
|
63
|
-
InMemoryVectorStore
|
|
106
|
+
InMemoryVectorStore,
|
|
107
|
+
toAsyncVector
|
|
64
108
|
};
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
import {
|
|
2
|
+
tokenize
|
|
3
|
+
} from "./chunk-6YJ66JFO.js";
|
|
4
|
+
import {
|
|
5
|
+
matchesWhere
|
|
6
|
+
} from "./chunk-KPMRRYTL.js";
|
|
7
|
+
|
|
8
|
+
// src/search/memory.ts
|
|
9
|
+
var K1 = 1.2;
|
|
10
|
+
var B = 0.75;
|
|
11
|
+
var InMemorySearchStore = class {
|
|
12
|
+
collections = /* @__PURE__ */ new Map();
|
|
13
|
+
index(collection, doc) {
|
|
14
|
+
const coll = this.collectionFor(collection);
|
|
15
|
+
this.removeFromColl(coll, doc.id);
|
|
16
|
+
const tokens = tokenize(doc.text);
|
|
17
|
+
const tf = /* @__PURE__ */ new Map();
|
|
18
|
+
for (const t of tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
|
|
19
|
+
const indexed = {
|
|
20
|
+
id: doc.id,
|
|
21
|
+
meta: doc.meta ?? {},
|
|
22
|
+
tokens,
|
|
23
|
+
tf,
|
|
24
|
+
dl: tokens.length
|
|
25
|
+
};
|
|
26
|
+
coll.docs.set(doc.id, indexed);
|
|
27
|
+
coll.totalLen += indexed.dl;
|
|
28
|
+
for (const term of tf.keys()) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
|
|
29
|
+
}
|
|
30
|
+
indexMany(collection, docs) {
|
|
31
|
+
for (const doc of docs) this.index(collection, doc);
|
|
32
|
+
}
|
|
33
|
+
remove(collection, id) {
|
|
34
|
+
const coll = this.collections.get(collection);
|
|
35
|
+
if (!coll) return false;
|
|
36
|
+
return this.removeFromColl(coll, id);
|
|
37
|
+
}
|
|
38
|
+
search(collection, query, opts) {
|
|
39
|
+
const coll = this.collections.get(collection);
|
|
40
|
+
const qTokens = tokenize(query);
|
|
41
|
+
if (!coll || qTokens.length === 0) return [];
|
|
42
|
+
const limit = opts?.limit ?? 10;
|
|
43
|
+
const where = opts?.filter?.where;
|
|
44
|
+
const n = coll.docs.size;
|
|
45
|
+
const avgdl = n > 0 ? coll.totalLen / n : 0;
|
|
46
|
+
const scored = [];
|
|
47
|
+
for (const doc of coll.docs.values()) {
|
|
48
|
+
if (where && !matchesWhere(doc.meta, where)) continue;
|
|
49
|
+
let matched = false;
|
|
50
|
+
let score = 0;
|
|
51
|
+
for (const qt of qTokens) {
|
|
52
|
+
const tf = doc.tf.get(qt) ?? 0;
|
|
53
|
+
if (tf === 0) continue;
|
|
54
|
+
matched = true;
|
|
55
|
+
const df = coll.df.get(qt) ?? 0;
|
|
56
|
+
const idf = Math.log((n - df + 0.5) / (df + 0.5));
|
|
57
|
+
if (idf <= 0) continue;
|
|
58
|
+
const denom = tf + K1 * (1 - B + B * (avgdl > 0 ? doc.dl / avgdl : 0));
|
|
59
|
+
score += idf * (tf * (K1 + 1)) / denom;
|
|
60
|
+
}
|
|
61
|
+
if (!matched) continue;
|
|
62
|
+
scored.push({ id: doc.id, score, meta: doc.meta });
|
|
63
|
+
}
|
|
64
|
+
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
|
|
65
|
+
return scored.slice(0, limit);
|
|
66
|
+
}
|
|
67
|
+
removeFromColl(coll, id) {
|
|
68
|
+
const doc = coll.docs.get(id);
|
|
69
|
+
if (!doc) return false;
|
|
70
|
+
coll.docs.delete(id);
|
|
71
|
+
coll.totalLen -= doc.dl;
|
|
72
|
+
for (const term of doc.tf.keys()) {
|
|
73
|
+
const next = (coll.df.get(term) ?? 0) - 1;
|
|
74
|
+
if (next <= 0) coll.df.delete(term);
|
|
75
|
+
else coll.df.set(term, next);
|
|
76
|
+
}
|
|
77
|
+
return true;
|
|
78
|
+
}
|
|
79
|
+
collectionFor(name) {
|
|
80
|
+
let coll = this.collections.get(name);
|
|
81
|
+
if (!coll) {
|
|
82
|
+
coll = { docs: /* @__PURE__ */ new Map(), df: /* @__PURE__ */ new Map(), totalLen: 0 };
|
|
83
|
+
this.collections.set(name, coll);
|
|
84
|
+
}
|
|
85
|
+
return coll;
|
|
86
|
+
}
|
|
87
|
+
};
|
|
88
|
+
|
|
89
|
+
export {
|
|
90
|
+
InMemorySearchStore
|
|
91
|
+
};
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/** Returns true when `metadata` satisfies ALL exact-match conditions in `filter`.
|
|
2
|
+
* A missing metadata object never satisfies a non-empty filter (no field can match).
|
|
3
|
+
* Comparison is deep-equal via JSON round-trip — the same fidelity as metadata
|
|
4
|
+
* persisted and re-parsed from disk; avoids reference-equality surprises for
|
|
5
|
+
* objects/arrays in the filter. */
|
|
6
|
+
declare function matchesWhere(metadata: Record<string, unknown> | undefined, filter: Record<string, unknown>): boolean;
|
|
7
|
+
|
|
8
|
+
export { matchesWhere as m };
|
package/dist/graph.d.ts
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import { A as AsyncStore, S as StoreFilter } from './types-DyQLNtxa.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* A directed edge stored as a flat collection record. `from`/`to` are node ids,
|
|
5
|
+
* `type` is the relation kind. Arbitrary metadata fields (e.g. `published`,
|
|
6
|
+
* `weight`) live alongside and are matchable via StoreFilter.where / edgeFilter.
|
|
7
|
+
*/
|
|
8
|
+
interface Edge {
|
|
9
|
+
id: string;
|
|
10
|
+
from: string;
|
|
11
|
+
to: string;
|
|
12
|
+
type: string;
|
|
13
|
+
[field: string]: unknown;
|
|
14
|
+
}
|
|
15
|
+
/** Which way to walk an edge. "both" treats every edge as bidirectional. */
|
|
16
|
+
type Direction = "out" | "in" | "both";
|
|
17
|
+
/**
|
|
18
|
+
* Single-hop adjacent edges of `opts.from`.
|
|
19
|
+
*
|
|
20
|
+
* - "out": edges where `from === opts.from`.
|
|
21
|
+
* - "in": edges where `to === opts.from`.
|
|
22
|
+
* - "both": the union of the two (deduped by edge id).
|
|
23
|
+
*
|
|
24
|
+
* `opts.edgeFilter` (a StoreFilter on edge fields) is applied at the store level;
|
|
25
|
+
* `opts.edgeTypes`, if given, is applied in-memory afterward (the port's `where`
|
|
26
|
+
* is exact-match only and cannot express "type IN [...]").
|
|
27
|
+
*
|
|
28
|
+
* Returns the full stored edge record untouched — no projection. All extra fields
|
|
29
|
+
* (e.g. `from_type`, `to_type`, `weight`) are present on every returned edge.
|
|
30
|
+
*/
|
|
31
|
+
declare function neighbors(store: AsyncStore, collection: string, opts: {
|
|
32
|
+
from: string;
|
|
33
|
+
direction?: Direction;
|
|
34
|
+
edgeTypes?: string[];
|
|
35
|
+
edgeFilter?: StoreFilter;
|
|
36
|
+
}): Promise<Edge[]>;
|
|
37
|
+
/**
|
|
38
|
+
* Batched, fanout-free BFS to `depth` hops from `start`.
|
|
39
|
+
*
|
|
40
|
+
* Load-once strategy (the fanout avoidance): the candidate edge set is loaded
|
|
41
|
+
* with ONE store call — `store.list(collection, edgeFilter)` — applying the
|
|
42
|
+
* caller's policy `edgeFilter` at load. Everything after is in-memory: filter by
|
|
43
|
+
* `edgeTypes`, build an adjacency index, then BFS level-by-level. No per-node
|
|
44
|
+
* query fan-out, regardless of graph size.
|
|
45
|
+
*
|
|
46
|
+
* Return shape:
|
|
47
|
+
* - `nodes`: reached node ids, EXCLUDING `start`. `start` is the origin, not a
|
|
48
|
+
* discovered neighbor — depth 0 therefore yields `[]`, depth 1 yields the
|
|
49
|
+
* direct neighbors, etc. Cycles terminate: a node is expanded at most once.
|
|
50
|
+
* - `edges`: every edge actually traversed, deduped by id.
|
|
51
|
+
*
|
|
52
|
+
* Both arrays are sorted by id so results are deterministic across backings
|
|
53
|
+
* (the parity test relies on this). depth ≤ 0 → `{ nodes: [], edges: [] }`.
|
|
54
|
+
*
|
|
55
|
+
* "both" adjacency: an edge {from:a,to:b} makes a→b AND b→a reachable.
|
|
56
|
+
*
|
|
57
|
+
* Returns the full stored edge record untouched — no projection. All extra fields
|
|
58
|
+
* (e.g. `from_type`, `to_type`, `weight`) are present on every returned edge.
|
|
59
|
+
*/
|
|
60
|
+
declare function traverse(store: AsyncStore, collection: string, opts: {
|
|
61
|
+
start: string;
|
|
62
|
+
depth: number;
|
|
63
|
+
direction?: Direction;
|
|
64
|
+
edgeTypes?: string[];
|
|
65
|
+
edgeFilter?: StoreFilter;
|
|
66
|
+
}): Promise<{
|
|
67
|
+
nodes: string[];
|
|
68
|
+
edges: Edge[];
|
|
69
|
+
}>;
|
|
70
|
+
|
|
71
|
+
export { type Direction, type Edge, neighbors, traverse };
|
package/dist/graph.js
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
// src/graph.ts
|
|
2
|
+
function filterByTypes(edges, edgeTypes) {
|
|
3
|
+
if (!edgeTypes || edgeTypes.length === 0) return edges;
|
|
4
|
+
const set = new Set(edgeTypes);
|
|
5
|
+
return edges.filter((e) => set.has(e.type));
|
|
6
|
+
}
|
|
7
|
+
function dedupById(edges) {
|
|
8
|
+
const seen = /* @__PURE__ */ new Set();
|
|
9
|
+
const out = [];
|
|
10
|
+
for (const e of edges) {
|
|
11
|
+
if (seen.has(e.id)) continue;
|
|
12
|
+
seen.add(e.id);
|
|
13
|
+
out.push(e);
|
|
14
|
+
}
|
|
15
|
+
return out;
|
|
16
|
+
}
|
|
17
|
+
function withWhere(filter, override) {
|
|
18
|
+
return { ...filter, where: { ...filter?.where, ...override } };
|
|
19
|
+
}
|
|
20
|
+
async function neighbors(store, collection, opts) {
|
|
21
|
+
const direction = opts.direction ?? "out";
|
|
22
|
+
let edges;
|
|
23
|
+
if (direction === "out") {
|
|
24
|
+
edges = await store.list(
|
|
25
|
+
collection,
|
|
26
|
+
withWhere(opts.edgeFilter, { from: opts.from })
|
|
27
|
+
);
|
|
28
|
+
} else if (direction === "in") {
|
|
29
|
+
edges = await store.list(
|
|
30
|
+
collection,
|
|
31
|
+
withWhere(opts.edgeFilter, { to: opts.from })
|
|
32
|
+
);
|
|
33
|
+
} else {
|
|
34
|
+
const [out, inc] = await Promise.all([
|
|
35
|
+
store.list(collection, withWhere(opts.edgeFilter, { from: opts.from })),
|
|
36
|
+
store.list(collection, withWhere(opts.edgeFilter, { to: opts.from }))
|
|
37
|
+
]);
|
|
38
|
+
edges = dedupById([...out, ...inc]);
|
|
39
|
+
}
|
|
40
|
+
return filterByTypes(edges, opts.edgeTypes);
|
|
41
|
+
}
|
|
42
|
+
async function traverse(store, collection, opts) {
|
|
43
|
+
const direction = opts.direction ?? "out";
|
|
44
|
+
if (!Number.isFinite(opts.depth) || opts.depth <= 0) {
|
|
45
|
+
return { nodes: [], edges: [] };
|
|
46
|
+
}
|
|
47
|
+
const loaded = await store.list(collection, opts.edgeFilter);
|
|
48
|
+
const all = filterByTypes(loaded, opts.edgeTypes);
|
|
49
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
50
|
+
const addAdj = (node, edge) => {
|
|
51
|
+
const list = adjacency.get(node);
|
|
52
|
+
if (list) list.push(edge);
|
|
53
|
+
else adjacency.set(node, [edge]);
|
|
54
|
+
};
|
|
55
|
+
for (const e of all) {
|
|
56
|
+
if (direction === "out") {
|
|
57
|
+
addAdj(e.from, e);
|
|
58
|
+
} else if (direction === "in") {
|
|
59
|
+
addAdj(e.to, e);
|
|
60
|
+
} else {
|
|
61
|
+
addAdj(e.from, e);
|
|
62
|
+
addAdj(e.to, e);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
const visited = /* @__PURE__ */ new Set([opts.start]);
|
|
66
|
+
const reached = [];
|
|
67
|
+
const traversedEdges = [];
|
|
68
|
+
const seenEdgeIds = /* @__PURE__ */ new Set();
|
|
69
|
+
let frontier = [opts.start];
|
|
70
|
+
for (let hop = 0; hop < opts.depth && frontier.length > 0; hop++) {
|
|
71
|
+
const next = [];
|
|
72
|
+
for (const node of frontier) {
|
|
73
|
+
const out = adjacency.get(node);
|
|
74
|
+
if (!out) continue;
|
|
75
|
+
for (const edge of out) {
|
|
76
|
+
if (!seenEdgeIds.has(edge.id)) {
|
|
77
|
+
seenEdgeIds.add(edge.id);
|
|
78
|
+
traversedEdges.push(edge);
|
|
79
|
+
}
|
|
80
|
+
const neighbor = edge.from === node ? edge.to : edge.from;
|
|
81
|
+
if (!visited.has(neighbor)) {
|
|
82
|
+
visited.add(neighbor);
|
|
83
|
+
reached.push(neighbor);
|
|
84
|
+
next.push(neighbor);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
frontier = next;
|
|
89
|
+
}
|
|
90
|
+
reached.sort((a, b) => a < b ? -1 : a > b ? 1 : 0);
|
|
91
|
+
traversedEdges.sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
|
|
92
|
+
return { nodes: reached, edges: traversedEdges };
|
|
93
|
+
}
|
|
94
|
+
export {
|
|
95
|
+
neighbors,
|
|
96
|
+
traverse
|
|
97
|
+
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
export { A as AsyncStore, S as StoreFilter, a as StoreMeta, b as SyncStore } from './types-DyQLNtxa.js';
|
|
2
2
|
export { InMemoryKv, toAsync } from './kv.js';
|
|
3
|
-
export { V as Vector, a as VectorDocument, b as VectorSearchOptions, c as VectorSearchResult, d as VectorStore, e as VectorStoreMeta } from './types-
|
|
4
|
-
export { InMemoryVectorStore, assertDimensions, bufferToVector, cosineSimilarity, isUsableVector, vectorToBuffer } from './vector.js';
|
|
3
|
+
export { A as AsyncVectorStore, V as Vector, a as VectorDocument, b as VectorSearchOptions, c as VectorSearchResult, d as VectorStore, e as VectorStoreMeta } from './types-B0XrD10b.js';
|
|
4
|
+
export { InMemoryVectorStore, assertDimensions, bufferToVector, cosineSimilarity, isUsableVector, toAsyncVector, vectorToBuffer } from './vector.js';
|
|
5
|
+
export { m as matchesWhere } from './filter-B9hP-TKF.js';
|
|
6
|
+
export { S as SearchDocument, a as SearchOptions, b as SearchResult, c as SearchStore } from './types-0B0Tw1fz.js';
|
|
7
|
+
export { InMemorySearchStore, sanitizeFtsQuery, tokenize } from './search.js';
|
package/dist/index.js
CHANGED
|
@@ -3,8 +3,16 @@ import {
|
|
|
3
3
|
toAsync
|
|
4
4
|
} from "./chunk-ZI4JA6IU.js";
|
|
5
5
|
import {
|
|
6
|
-
|
|
7
|
-
} from "./chunk-
|
|
6
|
+
InMemorySearchStore
|
|
7
|
+
} from "./chunk-TY472NYD.js";
|
|
8
|
+
import {
|
|
9
|
+
InMemoryVectorStore,
|
|
10
|
+
toAsyncVector
|
|
11
|
+
} from "./chunk-OUY3D4RT.js";
|
|
12
|
+
import {
|
|
13
|
+
sanitizeFtsQuery,
|
|
14
|
+
tokenize
|
|
15
|
+
} from "./chunk-6YJ66JFO.js";
|
|
8
16
|
import {
|
|
9
17
|
assertDimensions,
|
|
10
18
|
bufferToVector,
|
|
@@ -12,13 +20,21 @@ import {
|
|
|
12
20
|
isUsableVector,
|
|
13
21
|
vectorToBuffer
|
|
14
22
|
} from "./chunk-BXM3YDOC.js";
|
|
23
|
+
import {
|
|
24
|
+
matchesWhere
|
|
25
|
+
} from "./chunk-KPMRRYTL.js";
|
|
15
26
|
export {
|
|
16
27
|
InMemoryStore as InMemoryKv,
|
|
28
|
+
InMemorySearchStore,
|
|
17
29
|
InMemoryVectorStore,
|
|
18
30
|
assertDimensions,
|
|
19
31
|
bufferToVector,
|
|
20
32
|
cosineSimilarity,
|
|
21
33
|
isUsableVector,
|
|
34
|
+
matchesWhere,
|
|
35
|
+
sanitizeFtsQuery,
|
|
22
36
|
toAsync,
|
|
37
|
+
toAsyncVector,
|
|
38
|
+
tokenize,
|
|
23
39
|
vectorToBuffer
|
|
24
40
|
};
|
package/dist/search.d.ts
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { c as SearchStore, S as SearchDocument, a as SearchOptions, b as SearchResult } from './types-0B0Tw1fz.js';
|
|
2
|
+
export { m as matchesWhere } from './filter-B9hP-TKF.js';
|
|
3
|
+
import './types-DyQLNtxa.js';
|
|
4
|
+
|
|
5
|
+
declare class InMemorySearchStore implements SearchStore {
|
|
6
|
+
private readonly collections;
|
|
7
|
+
index<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, doc: SearchDocument<M>): void;
|
|
8
|
+
indexMany<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, docs: ReadonlyArray<SearchDocument<M>>): void;
|
|
9
|
+
remove(collection: string, id: string): boolean;
|
|
10
|
+
search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: string, opts?: SearchOptions): SearchResult<M>[];
|
|
11
|
+
private removeFromColl;
|
|
12
|
+
private collectionFor;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/** Tokenize `text` into lowercase unicode word tokens. A reasonable FTS5
|
|
16
|
+
* `unicode61`-ish split: letters and digits are token characters, everything
|
|
17
|
+
* else is a separator. */
|
|
18
|
+
declare function tokenize(text: string): string[];
|
|
19
|
+
/** Sanitize a raw query into an FTS5 MATCH expression: tokenize, double-quote
|
|
20
|
+
* each token (doubling embedded quotes), OR them. Mirrors the knowledge/session
|
|
21
|
+
* index sanitizers so user punctuation / operators never throw. Returns "" for
|
|
22
|
+
* an empty/whitespace query — callers treat that as "no results". */
|
|
23
|
+
declare function sanitizeFtsQuery(q: string): string;
|
|
24
|
+
|
|
25
|
+
export { InMemorySearchStore, SearchDocument, SearchOptions, SearchResult, SearchStore, sanitizeFtsQuery, tokenize };
|
package/dist/search.js
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import {
|
|
2
|
+
InMemorySearchStore
|
|
3
|
+
} from "./chunk-TY472NYD.js";
|
|
4
|
+
import {
|
|
5
|
+
sanitizeFtsQuery,
|
|
6
|
+
tokenize
|
|
7
|
+
} from "./chunk-6YJ66JFO.js";
|
|
8
|
+
import {
|
|
9
|
+
matchesWhere
|
|
10
|
+
} from "./chunk-KPMRRYTL.js";
|
|
11
|
+
export {
|
|
12
|
+
InMemorySearchStore,
|
|
13
|
+
matchesWhere,
|
|
14
|
+
sanitizeFtsQuery,
|
|
15
|
+
tokenize
|
|
16
|
+
};
|
package/dist/sql.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { S as StoreFilter } from './types-DyQLNtxa.js';
|
|
2
|
+
|
|
3
|
+
/** A bound SQL parameter — the common subset both better-sqlite3 and @libsql/client
|
|
4
|
+
* accept. Booleans are pre-converted to 0/1 by the builders (better-sqlite3 rejects
|
|
5
|
+
* a raw boolean), so they never appear here. */
|
|
6
|
+
type SqlParam = string | number | bigint | null;
|
|
7
|
+
/** A field name is ONE top-level JSON key, never a nested path. Build the JSON path
|
|
8
|
+
* `$."field"` (with `"` in the field doubled per SQLite JSON-path quoting) so a
|
|
9
|
+
* dotted name (`"a.b"`) resolves to the single top-level key `a.b`, matching the
|
|
10
|
+
* in-memory reference's `record[key]` lookup — not the nested path `$.a.b`. Returned
|
|
11
|
+
* as a value to BIND, never interpolated into SQL: field names are caller-supplied,
|
|
12
|
+
* so inlining them would be a SQL-injection vector. */
|
|
13
|
+
declare function jsonPath(field: string): string;
|
|
14
|
+
declare function buildWhereClause(filter?: StoreFilter): {
|
|
15
|
+
clause: string;
|
|
16
|
+
params: SqlParam[];
|
|
17
|
+
};
|
|
18
|
+
declare function buildOrderBy(filter?: StoreFilter): {
|
|
19
|
+
clause: string;
|
|
20
|
+
params: SqlParam[];
|
|
21
|
+
};
|
|
22
|
+
declare function buildLimitOffset(filter?: StoreFilter): string;
|
|
23
|
+
/** Deterministic 32-bit FNV-1a hash → base36. Used to make collection table names
|
|
24
|
+
* injective (distinct collection names never alias to one physical table). */
|
|
25
|
+
declare function hashName(s: string): string;
|
|
26
|
+
|
|
27
|
+
export { type SqlParam, buildLimitOffset, buildOrderBy, buildWhereClause, hashName, jsonPath };
|
package/dist/sql.js
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { S as StoreFilter } from './types-DyQLNtxa.js';
|
|
2
|
+
|
|
3
|
+
/** A document to be full-text indexed. `meta` is an exact-match-filterable
|
|
4
|
+
* payload persisted as JSON by disk-backed backends, so it must be JSON-
|
|
5
|
+
* serializable; values JSON can't represent (`undefined`, functions) are dropped
|
|
6
|
+
* on persistence — don't rely on them. */
|
|
7
|
+
interface SearchDocument<M = Record<string, unknown>> {
|
|
8
|
+
/** Unique id within the collection. */
|
|
9
|
+
id: string;
|
|
10
|
+
/** The text to tokenize and index. */
|
|
11
|
+
text: string;
|
|
12
|
+
/** Typed context stored alongside the text. */
|
|
13
|
+
meta?: M;
|
|
14
|
+
}
|
|
15
|
+
interface SearchOptions {
|
|
16
|
+
/** Maximum results to return. Default: 10. */
|
|
17
|
+
limit?: number;
|
|
18
|
+
/** Exact-match filter on document `meta` (uses `filter.where`, evaluated with
|
|
19
|
+
* the shared `matchesWhere`). Applied before limit, after the FTS MATCH. */
|
|
20
|
+
filter?: StoreFilter;
|
|
21
|
+
}
|
|
22
|
+
/** A ranked search hit. `score` is the bm25 relevance — higher is more relevant. */
|
|
23
|
+
interface SearchResult<M = Record<string, unknown>> {
|
|
24
|
+
id: string;
|
|
25
|
+
/** bm25 relevance. Higher = more relevant. (sqlite exposes `-bm25(fts)`; the
|
|
26
|
+
* in-memory reference computes the same quantity directly.) */
|
|
27
|
+
score: number;
|
|
28
|
+
/** The document's `meta`, or `{}` when none was indexed. */
|
|
29
|
+
meta: M;
|
|
30
|
+
}
|
|
31
|
+
/** A synchronous full-text search store with per-collection documents.
|
|
32
|
+
*
|
|
33
|
+
* Ranking is bm25 with the FTS5 default parameters (k1 = 1.2, b = 0.75). The
|
|
34
|
+
* in-memory reference and the sqlite `.search` facet produce the same RANKING
|
|
35
|
+
* on clear relevance differences; exact float scores need not match across
|
|
36
|
+
* backends (the same parity caveat as /vector). */
|
|
37
|
+
interface SearchStore {
|
|
38
|
+
/** Insert or replace a document by (collection, id). */
|
|
39
|
+
index<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, doc: SearchDocument<M>): void;
|
|
40
|
+
/** Insert or replace many documents in one collection. */
|
|
41
|
+
indexMany<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, docs: ReadonlyArray<SearchDocument<M>>): void;
|
|
42
|
+
/** Remove a document by id. Returns true if it existed. */
|
|
43
|
+
remove(collection: string, id: string): boolean;
|
|
44
|
+
/** bm25-ranked keyword search, highest score first. Supports an exact-match
|
|
45
|
+
* meta `filter`. An empty/whitespace query returns `[]`. */
|
|
46
|
+
search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: string, opts?: SearchOptions): SearchResult<M>[];
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export type { SearchDocument as S, SearchOptions as a, SearchResult as b, SearchStore as c };
|
|
@@ -32,6 +32,13 @@ interface VectorSearchOptions {
|
|
|
32
32
|
topK?: number;
|
|
33
33
|
/** Minimum cosine similarity; results below are excluded. Default: no floor. */
|
|
34
34
|
minScore?: number;
|
|
35
|
+
/** Pre-KNN filter: only consider documents whose metadata satisfies ALL of these
|
|
36
|
+
* exact-match conditions. Applied before scoring, so topK is out of the passing
|
|
37
|
+
* set only. `where` and `whereNot` are independent — both may be provided. */
|
|
38
|
+
where?: Record<string, unknown>;
|
|
39
|
+
/** Pre-KNN filter: exclude documents whose metadata satisfies ANY of these
|
|
40
|
+
* exact-match conditions. Applied before scoring. */
|
|
41
|
+
whereNot?: Record<string, unknown>;
|
|
35
42
|
}
|
|
36
43
|
/** A synchronous vector similarity store with per-collection documents.
|
|
37
44
|
*
|
|
@@ -46,12 +53,31 @@ interface VectorStore {
|
|
|
46
53
|
upsertMany<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, docs: ReadonlyArray<VectorDocument<M>>): void;
|
|
47
54
|
/** Fetch a document by id, or null. */
|
|
48
55
|
get<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, id: string): VectorDocument<M> | null;
|
|
56
|
+
/** Check whether a document exists by (collection, id). */
|
|
57
|
+
has(collection: string, id: string): boolean;
|
|
49
58
|
/** Remove a document by id. Returns true if it existed. */
|
|
50
59
|
remove(collection: string, id: string): boolean;
|
|
51
60
|
/** Number of documents in a collection. */
|
|
52
61
|
count(collection: string): number;
|
|
53
|
-
/** k-nearest-neighbour search by cosine similarity, highest score first.
|
|
62
|
+
/** k-nearest-neighbour search by cosine similarity, highest score first.
|
|
63
|
+
* Supports pre-KNN filtering via opts.where / opts.whereNot. */
|
|
54
64
|
search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: Vector, opts?: VectorSearchOptions): VectorSearchResult<M>[];
|
|
55
65
|
}
|
|
66
|
+
/** The async face of the same vector store. Remote backends (e.g. Qdrant) implement
|
|
67
|
+
* this natively; sync backends reach it via {@link toAsyncVector}.
|
|
68
|
+
*
|
|
69
|
+
* Hand-written rather than derived from VectorStore via a mapped type: per-method
|
|
70
|
+
* generics (`get<M>`, `search<M>`, …) collapse to `unknown` under inference — same
|
|
71
|
+
* constraint as the KV AsyncStore twin in src/types.ts. */
|
|
72
|
+
interface AsyncVectorStore {
|
|
73
|
+
readonly meta: VectorStoreMeta;
|
|
74
|
+
upsert<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, doc: VectorDocument<M>): Promise<void>;
|
|
75
|
+
upsertMany<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, docs: ReadonlyArray<VectorDocument<M>>): Promise<void>;
|
|
76
|
+
get<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, id: string): Promise<VectorDocument<M> | null>;
|
|
77
|
+
has(collection: string, id: string): Promise<boolean>;
|
|
78
|
+
remove(collection: string, id: string): Promise<boolean>;
|
|
79
|
+
count(collection: string): Promise<number>;
|
|
80
|
+
search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: Vector, opts?: VectorSearchOptions): Promise<VectorSearchResult<M>[]>;
|
|
81
|
+
}
|
|
56
82
|
|
|
57
|
-
export type { Vector as V, VectorDocument as a, VectorSearchOptions as b, VectorSearchResult as c, VectorStore as d, VectorStoreMeta as e };
|
|
83
|
+
export type { AsyncVectorStore as A, Vector as V, VectorDocument as a, VectorSearchOptions as b, VectorSearchResult as c, VectorStore as d, VectorStoreMeta as e };
|
package/dist/vector.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { V as Vector, d as VectorStore, e as VectorStoreMeta, a as VectorDocument, b as VectorSearchOptions, c as VectorSearchResult } from './types-
|
|
1
|
+
import { V as Vector, d as VectorStore, e as VectorStoreMeta, a as VectorDocument, b as VectorSearchOptions, c as VectorSearchResult, A as AsyncVectorStore } from './types-B0XrD10b.js';
|
|
2
|
+
export { m as matchesWhere } from './filter-B9hP-TKF.js';
|
|
2
3
|
|
|
3
4
|
/** Cosine similarity in [-1, 1]. Exact. Shared by the in-memory backend and the
|
|
4
5
|
* sqlite backend's search path. Returns 0 for length mismatch or a zero vector
|
|
@@ -32,10 +33,15 @@ declare class InMemoryVectorStore implements VectorStore {
|
|
|
32
33
|
upsert<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, doc: VectorDocument<M>): void;
|
|
33
34
|
upsertMany<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, docs: ReadonlyArray<VectorDocument<M>>): void;
|
|
34
35
|
get<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, id: string): VectorDocument<M> | null;
|
|
36
|
+
has(collection: string, id: string): boolean;
|
|
35
37
|
remove(collection: string, id: string): boolean;
|
|
36
38
|
count(collection: string): number;
|
|
37
39
|
search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: Vector, opts?: VectorSearchOptions): VectorSearchResult<M>[];
|
|
38
40
|
private collectionFor;
|
|
39
41
|
}
|
|
40
42
|
|
|
41
|
-
|
|
43
|
+
/** Lift a synchronous VectorStore to the {@link AsyncVectorStore} interface. The
|
|
44
|
+
* bridge only goes this direction — sync ⊂ async. */
|
|
45
|
+
declare function toAsyncVector(store: VectorStore): AsyncVectorStore;
|
|
46
|
+
|
|
47
|
+
export { AsyncVectorStore, InMemoryVectorStore, Vector, VectorDocument, VectorSearchOptions, VectorSearchResult, VectorStore, VectorStoreMeta, assertDimensions, bufferToVector, cosineSimilarity, isUsableVector, toAsyncVector, vectorToBuffer };
|
package/dist/vector.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import {
|
|
2
|
-
InMemoryVectorStore
|
|
3
|
-
|
|
2
|
+
InMemoryVectorStore,
|
|
3
|
+
toAsyncVector
|
|
4
|
+
} from "./chunk-OUY3D4RT.js";
|
|
4
5
|
import {
|
|
5
6
|
assertDimensions,
|
|
6
7
|
bufferToVector,
|
|
@@ -8,11 +9,16 @@ import {
|
|
|
8
9
|
isUsableVector,
|
|
9
10
|
vectorToBuffer
|
|
10
11
|
} from "./chunk-BXM3YDOC.js";
|
|
12
|
+
import {
|
|
13
|
+
matchesWhere
|
|
14
|
+
} from "./chunk-KPMRRYTL.js";
|
|
11
15
|
export {
|
|
12
16
|
InMemoryVectorStore,
|
|
13
17
|
assertDimensions,
|
|
14
18
|
bufferToVector,
|
|
15
19
|
cosineSimilarity,
|
|
16
20
|
isUsableVector,
|
|
21
|
+
matchesWhere,
|
|
22
|
+
toAsyncVector,
|
|
17
23
|
vectorToBuffer
|
|
18
24
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mirk/store",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Code-split storage ports + source adapters under one namespace. KV + collection store and vector similarity store as interface subpaths; the sqlite source-adapter implements both over one connection.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -25,6 +25,7 @@
|
|
|
25
25
|
"homepage": "https://github.com/nightwork-dev/mirk/tree/main/packages/store#readme",
|
|
26
26
|
"bugs": "https://github.com/nightwork-dev/mirk/issues",
|
|
27
27
|
"main": "./dist/index.js",
|
|
28
|
+
"types": "./dist/index.d.ts",
|
|
28
29
|
"exports": {
|
|
29
30
|
".": {
|
|
30
31
|
"import": "./dist/index.js",
|
|
@@ -38,6 +39,18 @@
|
|
|
38
39
|
"import": "./dist/vector.js",
|
|
39
40
|
"types": "./dist/vector.d.ts"
|
|
40
41
|
},
|
|
42
|
+
"./search": {
|
|
43
|
+
"import": "./dist/search.js",
|
|
44
|
+
"types": "./dist/search.d.ts"
|
|
45
|
+
},
|
|
46
|
+
"./graph": {
|
|
47
|
+
"import": "./dist/graph.js",
|
|
48
|
+
"types": "./dist/graph.d.ts"
|
|
49
|
+
},
|
|
50
|
+
"./sql": {
|
|
51
|
+
"import": "./dist/sql.js",
|
|
52
|
+
"types": "./dist/sql.d.ts"
|
|
53
|
+
},
|
|
41
54
|
"./sqlite": {
|
|
42
55
|
"import": "./dist/adapters/sqlite.js",
|
|
43
56
|
"types": "./dist/adapters/sqlite.d.ts"
|
|
@@ -46,6 +59,12 @@
|
|
|
46
59
|
"files": [
|
|
47
60
|
"dist"
|
|
48
61
|
],
|
|
62
|
+
"scripts": {
|
|
63
|
+
"test": "vitest run",
|
|
64
|
+
"test:watch": "vitest",
|
|
65
|
+
"typecheck": "tsc --noEmit",
|
|
66
|
+
"build": "rm -rf dist && tsup src/index.ts src/kv.ts src/vector.ts src/search.ts src/graph.ts src/sql.ts src/adapters/sqlite.ts --format esm --dts --external better-sqlite3 --external sqlite-vec"
|
|
67
|
+
},
|
|
49
68
|
"peerDependencies": {
|
|
50
69
|
"better-sqlite3": "^11.0.0 || ^12.0.0",
|
|
51
70
|
"sqlite-vec": "^0.1.0"
|
|
@@ -69,12 +88,5 @@
|
|
|
69
88
|
"publishConfig": {
|
|
70
89
|
"registry": "https://registry.npmjs.org",
|
|
71
90
|
"access": "public"
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
"test": "vitest run",
|
|
75
|
-
"test:watch": "vitest",
|
|
76
|
-
"typecheck": "tsc --noEmit",
|
|
77
|
-
"build": "rm -rf dist && tsup src/index.ts src/kv.ts src/vector.ts src/adapters/sqlite.ts --format esm --dts --external better-sqlite3 --external sqlite-vec"
|
|
78
|
-
},
|
|
79
|
-
"types": "./dist/index.d.ts"
|
|
80
|
-
}
|
|
91
|
+
}
|
|
92
|
+
}
|