@mirk/store 0.4.2 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -8
- package/dist/adapters/sqlite.d.ts +6 -3
- package/dist/adapters/sqlite.js +265 -69
- package/dist/chunk-77IIKHQW.js +120 -0
- package/dist/{chunk-ZI4JA6IU.js → chunk-DKCPULXT.js} +17 -0
- package/dist/chunk-DP4D7CJY.js +59 -0
- package/dist/chunk-KPMRRYTL.js +14 -0
- package/dist/chunk-N27Y55CA.js +67 -0
- package/dist/{chunk-EDVHBRXG.js → chunk-OUY3D4RT.js} +45 -1
- package/dist/filter-B9hP-TKF.d.ts +8 -0
- package/dist/graph.d.ts +98 -0
- package/dist/graph.js +161 -0
- package/dist/index.d.ts +6 -3
- package/dist/index.js +19 -3
- package/dist/kv.d.ts +6 -4
- package/dist/kv.js +1 -1
- package/dist/search.d.ts +26 -0
- package/dist/search.js +16 -0
- package/dist/sql.d.ts +27 -0
- package/dist/sql.js +14 -0
- package/dist/{types-BqSZEMAB.d.ts → types-B0XrD10b.d.ts} +28 -2
- package/dist/types-BLZTMsQO.d.ts +69 -0
- package/dist/{types-DyQLNtxa.d.ts → types-DHLdLZ_W.d.ts} +12 -1
- package/dist/vector.d.ts +8 -2
- package/dist/vector.js +8 -2
- package/package.json +16 -4
package/README.md
CHANGED
|
@@ -24,11 +24,13 @@ npm install sqlite-vec
|
|
|
24
24
|
| `@mirk/store` | the ports + their in-memory references + `toAsync` + cosine helpers | none |
|
|
25
25
|
| `@mirk/store/kv` | `SyncStore` port (key-value + collections), `InMemoryKv`, `toAsync` | none |
|
|
26
26
|
| `@mirk/store/vector` | `VectorStore` port, `InMemoryVectorStore`, cosine helpers | none |
|
|
27
|
-
| `@mirk/store/
|
|
27
|
+
| `@mirk/store/search` | `SearchStore` port, `InMemorySearchStore`, BM25-style keyword search | none |
|
|
28
|
+
| `@mirk/store/graph` | graph helpers over the collection port (`neighbors`, `traverse`, `traverseFrontierBatched`) | none |
|
|
29
|
+
| `@mirk/store/sqlite` | the SQLite **source adapter** — one connection, `.kv` + `.vector` + `.search` facets | `better-sqlite3` (peer), `sqlite-vec` (optional peer) |
|
|
28
30
|
|
|
29
31
|
Source adapters are reached **only** through their own subpath (e.g. `/sqlite`) — the root and the
|
|
30
|
-
port subpaths never re-export them, so importing `@mirk/store`, `/kv`,
|
|
31
|
-
native binding into a consumer bundle.
|
|
32
|
+
port subpaths never re-export them, so importing `@mirk/store`, `/kv`, `/vector`, `/search`, or
|
|
33
|
+
`/graph` never drags a native binding into a consumer bundle.
|
|
32
34
|
|
|
33
35
|
## Quickstart — zero native deps
|
|
34
36
|
|
|
@@ -60,19 +62,51 @@ kv.count("posts"); // 1
|
|
|
60
62
|
kv.remove("posts", "p1");
|
|
61
63
|
```
|
|
62
64
|
|
|
65
|
+
## Full-text search
|
|
66
|
+
|
|
67
|
+
`SearchStore` indexes documents by id and returns BM25-ranked keyword matches. Use `text` for the
|
|
68
|
+
single-field shorthand or `fields` for named columns with query-time weighting:
|
|
69
|
+
|
|
70
|
+
```ts
|
|
71
|
+
import { InMemorySearchStore } from "@mirk/store/search";
|
|
72
|
+
|
|
73
|
+
const search = new InMemorySearchStore();
|
|
74
|
+
search.index("pages", { id: "a", fields: { title: "Opal guide", body: "plain body" } });
|
|
75
|
+
search.index("pages", { id: "b", fields: { title: "plain title", body: "Opal guide" } });
|
|
76
|
+
search.search("pages", "opal", { fieldWeights: { title: 4, body: 1 } }); // [a, b]
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The first indexed document fixes a collection's field schema; later documents must use the same
|
|
80
|
+
field names. `text` and `fields: { text }` are the same single-field schema for backwards
|
|
81
|
+
compatibility.
|
|
82
|
+
|
|
83
|
+
## Graph helpers
|
|
84
|
+
|
|
85
|
+
`@mirk/store/graph` stores edges as ordinary collection records and traverses them through the
|
|
86
|
+
existing collection port. Policy stays caller-owned through `StoreFilter`.
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
import { traverse } from "@mirk/store/graph";
|
|
90
|
+
|
|
91
|
+
const hits = traverse(kv, { start: "node:a", depth: 2, direction: "out" });
|
|
92
|
+
```
|
|
93
|
+
|
|
63
94
|
## SQLite adapter — one connection, many capabilities
|
|
64
95
|
|
|
65
|
-
`SqliteAdapter` opens a single `better-sqlite3` database and exposes
|
|
66
|
-
and
|
|
96
|
+
`SqliteAdapter` opens a single `better-sqlite3` database and exposes `.kv` (`SyncStore`), `.vector`
|
|
97
|
+
(`VectorStore`), and `.search` (`SearchStore`) facets over it:
|
|
67
98
|
|
|
68
99
|
```ts
|
|
69
100
|
import { SqliteAdapter } from "@mirk/store/sqlite";
|
|
70
101
|
|
|
71
|
-
//
|
|
72
|
-
const db = new SqliteAdapter({ path: "data.db"
|
|
102
|
+
// .kv and .search work immediately; vector dimensions infer on first write.
|
|
103
|
+
const db = new SqliteAdapter({ path: "data.db" });
|
|
73
104
|
|
|
74
105
|
db.kv.set("user:1", { name: "Ada" });
|
|
75
106
|
|
|
107
|
+
db.search.index("pages", { id: "intro", fields: { title: "Intro", body: "hello world" } });
|
|
108
|
+
db.search.search("pages", "hello", { fieldWeights: { title: 4, body: 1 } });
|
|
109
|
+
|
|
76
110
|
const embedding = new Float32Array(768); // your real embedding here
|
|
77
111
|
const query = new Float32Array(768);
|
|
78
112
|
db.vector.upsert("docs", { id: "a", vector: embedding });
|
|
@@ -87,7 +121,7 @@ db.close();
|
|
|
87
121
|
|---|---|---|
|
|
88
122
|
| `path` | `string` | DB file path, or `":memory:"`. |
|
|
89
123
|
| `db` | `Database` | Reuse an existing `better-sqlite3` connection instead of opening one. |
|
|
90
|
-
| `dimensions` | `number` |
|
|
124
|
+
| `dimensions` | `number` | Optional embedding dimensionality. If omitted, inferred and persisted from the first vector `upsert` / `upsertMany`; `search` still requires known dimensions. |
|
|
91
125
|
| `forceJsCosine` | `boolean` | Pin the exact JS-cosine path even when `sqlite-vec` is installed (mainly for tests). |
|
|
92
126
|
|
|
93
127
|
Vectors (`Vector` is a `Float32Array`) are stored as little-endian float32 BLOBs and ranked by
|
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
|
-
import {
|
|
3
|
-
import { d as VectorStore } from '../types-
|
|
2
|
+
import { c as SyncStore } from '../types-DHLdLZ_W.js';
|
|
3
|
+
import { d as VectorStore } from '../types-B0XrD10b.js';
|
|
4
|
+
import { c as SearchStore } from '../types-BLZTMsQO.js';
|
|
4
5
|
|
|
5
6
|
interface SqliteAdapterOptions {
|
|
6
7
|
/** Path to the SQLite database file. Use ":memory:" for in-memory. */
|
|
7
8
|
path: string;
|
|
8
9
|
/** Existing better-sqlite3 instance to reuse (shares one connection). */
|
|
9
10
|
db?: Database.Database;
|
|
10
|
-
/** Embedding dimensions.
|
|
11
|
+
/** Embedding dimensions. Optional: when omitted, `.vector` persists the
|
|
12
|
+
* dimensions from the first upsert/upsertMany call. KV/search work without it. */
|
|
11
13
|
dimensions?: number;
|
|
12
14
|
/** Force the exact JS-cosine search path even when sqlite-vec is installed.
|
|
13
15
|
* Mainly for parity testing; production should leave this off. */
|
|
@@ -19,6 +21,7 @@ declare class SqliteAdapter {
|
|
|
19
21
|
private readonly db;
|
|
20
22
|
readonly kv: SyncStore;
|
|
21
23
|
readonly vector: VectorStore;
|
|
24
|
+
readonly search: SearchStore;
|
|
22
25
|
constructor(opts: SqliteAdapterOptions);
|
|
23
26
|
/** Close the underlying connection (shared by both facets). */
|
|
24
27
|
close(): void;
|
package/dist/adapters/sqlite.js
CHANGED
|
@@ -1,3 +1,18 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_SEARCH_FIELD,
|
|
3
|
+
assertSameSearchFields,
|
|
4
|
+
assertValidFieldWeightValues,
|
|
5
|
+
fieldWeightsFor,
|
|
6
|
+
normalizeSearchDocument,
|
|
7
|
+
sanitizeFtsQuery
|
|
8
|
+
} from "../chunk-N27Y55CA.js";
|
|
9
|
+
import {
|
|
10
|
+
buildLimitOffset,
|
|
11
|
+
buildOrderBy,
|
|
12
|
+
buildWhereClause,
|
|
13
|
+
hashName,
|
|
14
|
+
jsonPath
|
|
15
|
+
} from "../chunk-DP4D7CJY.js";
|
|
1
16
|
import {
|
|
2
17
|
assertDimensions,
|
|
3
18
|
bufferToVector,
|
|
@@ -5,6 +20,9 @@ import {
|
|
|
5
20
|
isUsableVector,
|
|
6
21
|
vectorToBuffer
|
|
7
22
|
} from "../chunk-BXM3YDOC.js";
|
|
23
|
+
import {
|
|
24
|
+
matchesWhere
|
|
25
|
+
} from "../chunk-KPMRRYTL.js";
|
|
8
26
|
|
|
9
27
|
// src/adapters/sqlite.ts
|
|
10
28
|
import Database from "better-sqlite3";
|
|
@@ -26,10 +44,43 @@ function tryLoadSqliteVec(db) {
|
|
|
26
44
|
return false;
|
|
27
45
|
}
|
|
28
46
|
}
|
|
47
|
+
function assertPositiveDimensions(dimensions) {
|
|
48
|
+
if (!Number.isInteger(dimensions) || dimensions <= 0) {
|
|
49
|
+
throw new Error(`Vector dimensions must be a positive integer; got ${dimensions}.`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
function sqlParam(value) {
|
|
53
|
+
if (value === null) return null;
|
|
54
|
+
if (typeof value === "boolean") return value ? 1 : 0;
|
|
55
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "bigint") {
|
|
56
|
+
return value;
|
|
57
|
+
}
|
|
58
|
+
throw new Error("Store IN queries only support JSON scalar values.");
|
|
59
|
+
}
|
|
60
|
+
function buildJsonInWhere(field, values, hasPriorWhere) {
|
|
61
|
+
const path = jsonPath(field);
|
|
62
|
+
const params = [];
|
|
63
|
+
const nonNull = values.filter((value) => value !== null).map(sqlParam);
|
|
64
|
+
const hasNull = values.some((value) => value === null);
|
|
65
|
+
const parts = [];
|
|
66
|
+
if (nonNull.length > 0) {
|
|
67
|
+
parts.push(`json_extract(data, ?) IN (${nonNull.map(() => "?").join(", ")})`);
|
|
68
|
+
params.push(path, ...nonNull);
|
|
69
|
+
}
|
|
70
|
+
if (hasNull) {
|
|
71
|
+
parts.push(`json_type(data, ?) = 'null'`);
|
|
72
|
+
params.push(path);
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
clause: `${hasPriorWhere ? " AND" : " WHERE"} (${parts.join(" OR ")})`,
|
|
76
|
+
params
|
|
77
|
+
};
|
|
78
|
+
}
|
|
29
79
|
var SqliteAdapter = class {
|
|
30
80
|
db;
|
|
31
81
|
kv;
|
|
32
82
|
vector;
|
|
83
|
+
search;
|
|
33
84
|
constructor(opts) {
|
|
34
85
|
const ownsDb = opts.db === void 0;
|
|
35
86
|
this.db = opts.db ?? new Database(opts.path);
|
|
@@ -37,6 +88,7 @@ var SqliteAdapter = class {
|
|
|
37
88
|
this.db.pragma("journal_mode = WAL");
|
|
38
89
|
this.kv = new SqliteKvFacet(this.db);
|
|
39
90
|
this.vector = new SqliteVectorFacet(this.db, opts.path, opts.dimensions, opts.forceJsCosine);
|
|
91
|
+
this.search = new SqliteSearchFacet(this.db);
|
|
40
92
|
} catch (err) {
|
|
41
93
|
if (ownsDb) {
|
|
42
94
|
try {
|
|
@@ -122,6 +174,17 @@ var SqliteKvFacet = class {
|
|
|
122
174
|
const rows = this.db.prepare(sql).all(...where.params, ...orderBy.params);
|
|
123
175
|
return rows.map((r) => JSON.parse(r.data));
|
|
124
176
|
}
|
|
177
|
+
listWhereIn(collection, field, values, filter) {
|
|
178
|
+
if (values.length === 0) return [];
|
|
179
|
+
const table = this.ensureTable(collection);
|
|
180
|
+
const where = buildWhereClause(filter);
|
|
181
|
+
const inWhere = buildJsonInWhere(field, values, where.clause.length > 0);
|
|
182
|
+
const orderBy = buildOrderBy(filter);
|
|
183
|
+
const limitOffset = buildLimitOffset(filter);
|
|
184
|
+
const sql = `SELECT data FROM ${table}${where.clause}${inWhere.clause}${orderBy.clause}${limitOffset}`;
|
|
185
|
+
const rows = this.db.prepare(sql).all(...where.params, ...inWhere.params, ...orderBy.params);
|
|
186
|
+
return rows.map((r) => JSON.parse(r.data));
|
|
187
|
+
}
|
|
125
188
|
getById(collection, id) {
|
|
126
189
|
const table = this.ensureTable(collection);
|
|
127
190
|
const row = this.db.prepare(`SELECT data FROM ${table} WHERE id = ?`).get(id);
|
|
@@ -148,8 +211,10 @@ var SqliteKvFacet = class {
|
|
|
148
211
|
}
|
|
149
212
|
};
|
|
150
213
|
var SqliteVectorFacet = class {
|
|
151
|
-
constructor(db, path, dimensions, forceJsCosine) {
|
|
214
|
+
constructor(db, path, dimensions, forceJsCosine = false) {
|
|
152
215
|
this.db = db;
|
|
216
|
+
this.path = path;
|
|
217
|
+
this.forceJsCosine = forceJsCosine;
|
|
153
218
|
this.db.exec(
|
|
154
219
|
`CREATE TABLE IF NOT EXISTS vectors (
|
|
155
220
|
collection TEXT NOT NULL,
|
|
@@ -169,31 +234,51 @@ var SqliteVectorFacet = class {
|
|
|
169
234
|
`Vector store at ${path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
|
|
170
235
|
);
|
|
171
236
|
}
|
|
237
|
+
this.refreshVectorMeta();
|
|
172
238
|
} else if (dimensions !== void 0) {
|
|
173
|
-
this.dimensions
|
|
174
|
-
this.db.prepare(`INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)`).run(String(dimensions));
|
|
175
|
-
} else {
|
|
176
|
-
this.dimensions = -1;
|
|
239
|
+
this.initializeDims(dimensions);
|
|
177
240
|
}
|
|
178
|
-
this.accelerated = !forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
|
|
179
|
-
this.meta = {
|
|
180
|
-
backend: "sqlite",
|
|
181
|
-
dimensions: Math.max(this.dimensions, 0),
|
|
182
|
-
accelerated: this.accelerated
|
|
183
|
-
};
|
|
184
241
|
}
|
|
185
242
|
db;
|
|
186
|
-
|
|
187
|
-
|
|
243
|
+
path;
|
|
244
|
+
forceJsCosine;
|
|
245
|
+
meta = { backend: "sqlite", dimensions: 0, accelerated: false };
|
|
246
|
+
dimensions = -1;
|
|
188
247
|
/** True when sqlite-vec loaded and the vec0 acceleration path is live. */
|
|
189
|
-
accelerated;
|
|
248
|
+
accelerated = false;
|
|
190
249
|
vecTablesEnsured = /* @__PURE__ */ new Set();
|
|
191
|
-
|
|
250
|
+
initializeDims(dimensions) {
|
|
251
|
+
assertPositiveDimensions(dimensions);
|
|
252
|
+
if (this.dimensions >= 0) {
|
|
253
|
+
if (dimensions !== this.dimensions) {
|
|
254
|
+
throw new Error(
|
|
255
|
+
`Vector store at ${this.path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
this.dimensions = dimensions;
|
|
261
|
+
this.db.prepare(
|
|
262
|
+
`INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)
|
|
263
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value`
|
|
264
|
+
).run(String(dimensions));
|
|
265
|
+
this.refreshVectorMeta();
|
|
266
|
+
}
|
|
267
|
+
refreshVectorMeta() {
|
|
268
|
+
this.accelerated = !this.forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
|
|
269
|
+
this.meta.dimensions = Math.max(this.dimensions, 0);
|
|
270
|
+
this.meta.accelerated = this.accelerated;
|
|
271
|
+
}
|
|
272
|
+
requireKnownDims(v) {
|
|
192
273
|
if (this.dimensions < 0) {
|
|
193
|
-
throw new Error("SqliteAdapter.vector
|
|
274
|
+
throw new Error("SqliteAdapter.vector has no dimensions yet \u2014 pass { dimensions } when opening or upsert a vector first.");
|
|
194
275
|
}
|
|
195
276
|
assertDimensions(v, this.dimensions);
|
|
196
277
|
}
|
|
278
|
+
ensureDimsForWrite(v) {
|
|
279
|
+
if (this.dimensions < 0) this.initializeDims(v.length);
|
|
280
|
+
assertDimensions(v, this.dimensions);
|
|
281
|
+
}
|
|
197
282
|
// ── vec0 acceleration helpers ───────────────────────────────────────────
|
|
198
283
|
vecTableName(collection) {
|
|
199
284
|
return `vectors_vec_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
@@ -233,7 +318,7 @@ var SqliteVectorFacet = class {
|
|
|
233
318
|
}
|
|
234
319
|
}
|
|
235
320
|
upsert(collection, doc) {
|
|
236
|
-
this.
|
|
321
|
+
this.ensureDimsForWrite(doc.vector);
|
|
237
322
|
const write = this.db.transaction(() => {
|
|
238
323
|
this.db.prepare(
|
|
239
324
|
`INSERT INTO vectors(collection, id, vec, metadata) VALUES (?, ?, ?, ?)
|
|
@@ -249,6 +334,14 @@ var SqliteVectorFacet = class {
|
|
|
249
334
|
write();
|
|
250
335
|
}
|
|
251
336
|
upsertMany(collection, docs) {
|
|
337
|
+
const first = docs[0];
|
|
338
|
+
if (!first) return;
|
|
339
|
+
const dimensions = this.dimensions >= 0 ? this.dimensions : first.vector.length;
|
|
340
|
+
assertPositiveDimensions(dimensions);
|
|
341
|
+
for (const doc of docs) {
|
|
342
|
+
assertDimensions(doc.vector, dimensions);
|
|
343
|
+
}
|
|
344
|
+
if (this.dimensions < 0) this.initializeDims(dimensions);
|
|
252
345
|
const tx = this.db.transaction((items) => {
|
|
253
346
|
for (const doc of items) this.upsert(collection, doc);
|
|
254
347
|
});
|
|
@@ -263,6 +356,9 @@ var SqliteVectorFacet = class {
|
|
|
263
356
|
metadata: row.metadata === null ? void 0 : JSON.parse(row.metadata)
|
|
264
357
|
};
|
|
265
358
|
}
|
|
359
|
+
has(collection, id) {
|
|
360
|
+
return this.db.prepare(`SELECT 1 FROM vectors WHERE collection = ? AND id = ?`).get(collection, id) !== void 0;
|
|
361
|
+
}
|
|
266
362
|
remove(collection, id) {
|
|
267
363
|
if (this.accelerated) {
|
|
268
364
|
const row = this.db.prepare(`SELECT rowid FROM vectors WHERE collection = ? AND id = ?`).get(collection, id);
|
|
@@ -277,16 +373,17 @@ var SqliteVectorFacet = class {
|
|
|
277
373
|
return row.n;
|
|
278
374
|
}
|
|
279
375
|
search(collection, query, opts) {
|
|
280
|
-
this.
|
|
376
|
+
this.requireKnownDims(query);
|
|
281
377
|
const topK = opts?.topK ?? 10;
|
|
282
378
|
const minScore = opts?.minScore;
|
|
283
|
-
|
|
379
|
+
const hasFilters = !!(opts?.where || opts?.whereNot);
|
|
380
|
+
if (this.accelerated && isUsableVector(query) && !hasFilters) {
|
|
284
381
|
try {
|
|
285
382
|
return this.searchVec(collection, query, topK, minScore);
|
|
286
383
|
} catch {
|
|
287
384
|
}
|
|
288
385
|
}
|
|
289
|
-
return this.searchJs(collection, query, topK, minScore);
|
|
386
|
+
return this.searchJs(collection, query, topK, minScore, opts?.where, opts?.whereNot);
|
|
290
387
|
}
|
|
291
388
|
searchVec(collection, query, topK, minScore) {
|
|
292
389
|
const table = this.ensureVecTable(collection);
|
|
@@ -310,75 +407,174 @@ var SqliteVectorFacet = class {
|
|
|
310
407
|
out.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
|
|
311
408
|
return out;
|
|
312
409
|
}
|
|
313
|
-
searchJs(collection, query, topK, minScore) {
|
|
410
|
+
searchJs(collection, query, topK, minScore, where, whereNot) {
|
|
314
411
|
const rows = this.db.prepare(`SELECT id, vec, metadata FROM vectors WHERE collection = ?`).all(collection);
|
|
315
412
|
const scored = [];
|
|
316
413
|
for (const row of rows) {
|
|
414
|
+
const meta = row.metadata === null ? void 0 : JSON.parse(row.metadata);
|
|
415
|
+
if (where && !matchesWhere(meta, where)) continue;
|
|
416
|
+
if (whereNot && matchesWhere(meta, whereNot)) continue;
|
|
317
417
|
const vec = bufferToVector(row.vec);
|
|
318
418
|
if (!isUsableVector(vec)) continue;
|
|
319
419
|
const score = cosineSimilarity(query, vec);
|
|
320
420
|
if (!Number.isFinite(score)) continue;
|
|
321
421
|
if (minScore !== void 0 && score < minScore) continue;
|
|
322
|
-
scored.push({
|
|
323
|
-
id: row.id,
|
|
324
|
-
score,
|
|
325
|
-
metadata: row.metadata === null ? void 0 : JSON.parse(row.metadata)
|
|
326
|
-
});
|
|
422
|
+
scored.push({ id: row.id, score, metadata: meta });
|
|
327
423
|
}
|
|
328
424
|
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
|
|
329
425
|
return scored.slice(0, topK);
|
|
330
426
|
}
|
|
331
427
|
};
|
|
332
|
-
function
|
|
333
|
-
|
|
428
|
+
function searchColumnName(field, index) {
|
|
429
|
+
if (field === DEFAULT_SEARCH_FIELD) return DEFAULT_SEARCH_FIELD;
|
|
430
|
+
return `f${index}_${hashName(field)}`;
|
|
431
|
+
}
|
|
432
|
+
function searchSchema(fields) {
|
|
433
|
+
return { fields: [...fields], columns: fields.map(searchColumnName) };
|
|
334
434
|
}
|
|
335
|
-
function
|
|
336
|
-
|
|
337
|
-
|
|
435
|
+
function quoteSqlIdent(identifier) {
|
|
436
|
+
return `"${identifier.replace(/"/g, '""')}"`;
|
|
437
|
+
}
|
|
438
|
+
function searchFieldDefs(schema) {
|
|
439
|
+
return schema.columns.map((column) => `${quoteSqlIdent(column)} TEXT NOT NULL`).join(",\n ");
|
|
440
|
+
}
|
|
441
|
+
function rowColumnRefs(prefix, schema) {
|
|
442
|
+
return schema.columns.map((column) => `${prefix}.${quoteSqlIdent(column)}`).join(", ");
|
|
443
|
+
}
|
|
444
|
+
var SqliteSearchFacet = class {
|
|
445
|
+
constructor(db) {
|
|
446
|
+
this.db = db;
|
|
447
|
+
this.db.exec(`
|
|
448
|
+
CREATE TABLE IF NOT EXISTS ${this.schemaTable} (
|
|
449
|
+
collection TEXT PRIMARY KEY,
|
|
450
|
+
fields_json TEXT NOT NULL
|
|
451
|
+
);
|
|
452
|
+
`);
|
|
338
453
|
}
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
454
|
+
db;
|
|
455
|
+
ensured = /* @__PURE__ */ new Set();
|
|
456
|
+
schemaTable = "_mirk_search_schema";
|
|
457
|
+
baseTable(collection) {
|
|
458
|
+
return `search_docs_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
459
|
+
}
|
|
460
|
+
ftsTable(collection) {
|
|
461
|
+
return `search_fts_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
462
|
+
}
|
|
463
|
+
tableExists(table) {
|
|
464
|
+
return this.db.prepare("SELECT 1 AS ok FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?").get(table) !== void 0;
|
|
465
|
+
}
|
|
466
|
+
loadSchema(collection) {
|
|
467
|
+
const row = this.db.prepare(`SELECT fields_json FROM ${this.schemaTable} WHERE collection = ?`).get(collection);
|
|
468
|
+
if (row) {
|
|
469
|
+
const fields2 = JSON.parse(row.fields_json);
|
|
470
|
+
return searchSchema(fields2);
|
|
471
|
+
}
|
|
472
|
+
const docs = this.baseTable(collection);
|
|
473
|
+
if (!this.tableExists(docs)) return void 0;
|
|
474
|
+
const pragma = this.db.prepare(`PRAGMA table_info(${quoteSqlIdent(docs)})`).all();
|
|
475
|
+
if (!pragma.some((col) => col.name === DEFAULT_SEARCH_FIELD)) return void 0;
|
|
476
|
+
const fields = [DEFAULT_SEARCH_FIELD];
|
|
477
|
+
this.db.prepare(`INSERT OR IGNORE INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
|
|
478
|
+
return searchSchema(fields);
|
|
479
|
+
}
|
|
480
|
+
schemaForIndex(collection, normalized) {
|
|
481
|
+
const existing = this.loadSchema(collection);
|
|
482
|
+
if (existing) {
|
|
483
|
+
assertSameSearchFields(existing.fields, normalized.names, collection);
|
|
484
|
+
return existing;
|
|
350
485
|
}
|
|
486
|
+
const fields = [...normalized.names];
|
|
487
|
+
this.db.prepare(`INSERT INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
|
|
488
|
+
return searchSchema(fields);
|
|
489
|
+
}
|
|
490
|
+
ensure(collection, schema) {
|
|
491
|
+
const docs = this.baseTable(collection);
|
|
492
|
+
const fts = this.ftsTable(collection);
|
|
493
|
+
const key = `${docs}:${schema.fields.join("\0")}`;
|
|
494
|
+
if (this.ensured.has(key)) return { docs, fts };
|
|
495
|
+
const qDocs = quoteSqlIdent(docs);
|
|
496
|
+
const qFts = quoteSqlIdent(fts);
|
|
497
|
+
const qColumns = schema.columns.map(quoteSqlIdent).join(", ");
|
|
498
|
+
const newColumns = rowColumnRefs("new", schema);
|
|
499
|
+
const oldColumns = rowColumnRefs("old", schema);
|
|
500
|
+
const fieldDefs = searchFieldDefs(schema);
|
|
501
|
+
this.db.exec(`
|
|
502
|
+
CREATE TABLE IF NOT EXISTS ${qDocs} (
|
|
503
|
+
id TEXT PRIMARY KEY,
|
|
504
|
+
${fieldDefs},
|
|
505
|
+
meta_json TEXT
|
|
506
|
+
);
|
|
507
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS ${qFts} USING fts5(
|
|
508
|
+
${qColumns}, content='${docs}', content_rowid='rowid', tokenize='unicode61'
|
|
509
|
+
);
|
|
510
|
+
CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ai`)} AFTER INSERT ON ${qDocs} BEGIN
|
|
511
|
+
INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
|
|
512
|
+
END;
|
|
513
|
+
CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ad`)} AFTER DELETE ON ${qDocs} BEGIN
|
|
514
|
+
INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
|
|
515
|
+
END;
|
|
516
|
+
CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_au`)} AFTER UPDATE ON ${qDocs} BEGIN
|
|
517
|
+
INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
|
|
518
|
+
INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
|
|
519
|
+
END;
|
|
520
|
+
`);
|
|
521
|
+
this.ensured.add(key);
|
|
522
|
+
return { docs, fts };
|
|
523
|
+
}
|
|
524
|
+
index(collection, doc) {
|
|
525
|
+
const normalized = normalizeSearchDocument(doc);
|
|
526
|
+
const schema = this.schemaForIndex(collection, normalized);
|
|
527
|
+
const { docs } = this.ensure(collection, schema);
|
|
528
|
+
const qDocs = quoteSqlIdent(docs);
|
|
529
|
+
const qColumns = schema.columns.map(quoteSqlIdent);
|
|
530
|
+
const insertColumns = ["id", ...schema.columns, "meta_json"].map(quoteSqlIdent).join(", ");
|
|
531
|
+
const placeholders = Array.from({ length: schema.columns.length + 2 }, () => "?").join(", ");
|
|
532
|
+
const updateSet = [...qColumns.map((col) => `${col} = excluded.${col}`), "meta_json = excluded.meta_json"].join(", ");
|
|
533
|
+
const metaJson = doc.meta === void 0 ? null : JSON.stringify(doc.meta);
|
|
534
|
+
this.db.prepare(
|
|
535
|
+
`INSERT INTO ${qDocs}(${insertColumns}) VALUES (${placeholders})
|
|
536
|
+
ON CONFLICT(id) DO UPDATE SET ${updateSet}`
|
|
537
|
+
).run(doc.id, ...schema.fields.map((field) => normalized.values[field] ?? ""), metaJson);
|
|
351
538
|
}
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
const path = jsonPath(filter.sortBy);
|
|
358
|
-
return {
|
|
359
|
-
clause: ` ORDER BY json_extract(data, ?) IS NULL, json_extract(data, ?) ${dir}`,
|
|
360
|
-
params: [path, path]
|
|
361
|
-
};
|
|
362
|
-
}
|
|
363
|
-
function hashName(s) {
|
|
364
|
-
let h = 2166136261;
|
|
365
|
-
for (let i = 0; i < s.length; i++) {
|
|
366
|
-
h ^= s.charCodeAt(i);
|
|
367
|
-
h = Math.imul(h, 16777619);
|
|
539
|
+
indexMany(collection, docs) {
|
|
540
|
+
const tx = this.db.transaction((items) => {
|
|
541
|
+
for (const doc of items) this.index(collection, doc);
|
|
542
|
+
});
|
|
543
|
+
tx(docs);
|
|
368
544
|
}
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
sql += ` LIMIT ${Math.max(0, Math.floor(filter.limit))}`;
|
|
545
|
+
remove(collection, id) {
|
|
546
|
+
const schema = this.loadSchema(collection);
|
|
547
|
+
if (!schema) return false;
|
|
548
|
+
const { docs } = this.ensure(collection, schema);
|
|
549
|
+
return this.db.prepare(`DELETE FROM ${quoteSqlIdent(docs)} WHERE id = ?`).run(id).changes > 0;
|
|
375
550
|
}
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
551
|
+
search(collection, query, opts) {
|
|
552
|
+
const sanitized = sanitizeFtsQuery(query);
|
|
553
|
+
assertValidFieldWeightValues(opts?.fieldWeights);
|
|
554
|
+
if (sanitized.length === 0) return [];
|
|
555
|
+
const schema = this.loadSchema(collection);
|
|
556
|
+
if (!schema) return [];
|
|
557
|
+
const { docs, fts } = this.ensure(collection, schema);
|
|
558
|
+
const weights = fieldWeightsFor(schema.fields, opts?.fieldWeights);
|
|
559
|
+
const weightArgs = weights.length > 0 ? `, ${weights.map((weight) => String(weight)).join(", ")}` : "";
|
|
560
|
+
const rows = this.db.prepare(
|
|
561
|
+
`SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}${weightArgs}) AS bm
|
|
562
|
+
FROM ${quoteSqlIdent(fts)}
|
|
563
|
+
JOIN ${quoteSqlIdent(docs)} d ON d.rowid = ${fts}.rowid
|
|
564
|
+
WHERE ${fts} MATCH ?
|
|
565
|
+
ORDER BY bm, d.id`
|
|
566
|
+
).all(sanitized);
|
|
567
|
+
const limit = opts?.limit ?? 10;
|
|
568
|
+
const where = opts?.filter?.where;
|
|
569
|
+
const out = [];
|
|
570
|
+
for (const r of rows) {
|
|
571
|
+
const meta = r.meta_json === null ? {} : JSON.parse(r.meta_json);
|
|
572
|
+
if (where && !matchesWhere(meta, where)) continue;
|
|
573
|
+
out.push({ id: r.id, score: -r.bm, meta });
|
|
574
|
+
}
|
|
575
|
+
return out.slice(0, limit);
|
|
379
576
|
}
|
|
380
|
-
|
|
381
|
-
}
|
|
577
|
+
};
|
|
382
578
|
export {
|
|
383
579
|
SqliteAdapter
|
|
384
580
|
};
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
import {
|
|
2
|
+
assertSameSearchFields,
|
|
3
|
+
assertValidFieldWeightValues,
|
|
4
|
+
fieldWeightsFor,
|
|
5
|
+
normalizeSearchDocument,
|
|
6
|
+
tokenize
|
|
7
|
+
} from "./chunk-N27Y55CA.js";
|
|
8
|
+
import {
|
|
9
|
+
matchesWhere
|
|
10
|
+
} from "./chunk-KPMRRYTL.js";
|
|
11
|
+
|
|
12
|
+
// src/search/memory.ts
|
|
13
|
+
var K1 = 1.2;
|
|
14
|
+
var B = 0.75;
|
|
15
|
+
var InMemorySearchStore = class {
|
|
16
|
+
collections = /* @__PURE__ */ new Map();
|
|
17
|
+
index(collection, doc) {
|
|
18
|
+
const normalized = normalizeSearchDocument(doc);
|
|
19
|
+
const coll = this.collectionFor(collection, normalized.names);
|
|
20
|
+
assertSameSearchFields(coll.fields, normalized.names, collection);
|
|
21
|
+
this.removeFromColl(coll, doc.id);
|
|
22
|
+
const tfByField = /* @__PURE__ */ new Map();
|
|
23
|
+
const terms = /* @__PURE__ */ new Set();
|
|
24
|
+
let dl = 0;
|
|
25
|
+
for (const field of coll.fields) {
|
|
26
|
+
const tokens = tokenize(normalized.values[field] ?? "");
|
|
27
|
+
dl += tokens.length;
|
|
28
|
+
const tf = /* @__PURE__ */ new Map();
|
|
29
|
+
for (const token of tokens) {
|
|
30
|
+
tf.set(token, (tf.get(token) ?? 0) + 1);
|
|
31
|
+
terms.add(token);
|
|
32
|
+
}
|
|
33
|
+
tfByField.set(field, tf);
|
|
34
|
+
}
|
|
35
|
+
coll.totalLen += dl;
|
|
36
|
+
const indexed = {
|
|
37
|
+
id: doc.id,
|
|
38
|
+
meta: doc.meta ?? {},
|
|
39
|
+
tfByField,
|
|
40
|
+
dl,
|
|
41
|
+
terms
|
|
42
|
+
};
|
|
43
|
+
coll.docs.set(doc.id, indexed);
|
|
44
|
+
for (const term of terms) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
|
|
45
|
+
}
|
|
46
|
+
indexMany(collection, docs) {
|
|
47
|
+
for (const doc of docs) this.index(collection, doc);
|
|
48
|
+
}
|
|
49
|
+
remove(collection, id) {
|
|
50
|
+
const coll = this.collections.get(collection);
|
|
51
|
+
if (!coll) return false;
|
|
52
|
+
return this.removeFromColl(coll, id);
|
|
53
|
+
}
|
|
54
|
+
search(collection, query, opts) {
|
|
55
|
+
const coll = this.collections.get(collection);
|
|
56
|
+
const qTokens = tokenize(query);
|
|
57
|
+
assertValidFieldWeightValues(opts?.fieldWeights);
|
|
58
|
+
if (!coll || qTokens.length === 0) return [];
|
|
59
|
+
const limit = opts?.limit ?? 10;
|
|
60
|
+
const where = opts?.filter?.where;
|
|
61
|
+
const fieldWeights = fieldWeightsFor(coll.fields, opts?.fieldWeights);
|
|
62
|
+
const n = coll.docs.size;
|
|
63
|
+
const scored = [];
|
|
64
|
+
for (const doc of coll.docs.values()) {
|
|
65
|
+
if (where && !matchesWhere(doc.meta, where)) continue;
|
|
66
|
+
let matched = false;
|
|
67
|
+
let score = 0;
|
|
68
|
+
const avgdl = n > 0 ? coll.totalLen / n : 0;
|
|
69
|
+
for (const qt of qTokens) {
|
|
70
|
+
const df = coll.df.get(qt) ?? 0;
|
|
71
|
+
if (df === 0) continue;
|
|
72
|
+
let weightedTf = 0;
|
|
73
|
+
for (let i = 0; i < coll.fields.length; i++) {
|
|
74
|
+
const field = coll.fields[i];
|
|
75
|
+
const tf = doc.tfByField.get(field)?.get(qt) ?? 0;
|
|
76
|
+
weightedTf += fieldWeights[i] * tf;
|
|
77
|
+
}
|
|
78
|
+
if (weightedTf === 0) continue;
|
|
79
|
+
matched = true;
|
|
80
|
+
const idf = Math.log((n - df + 0.5) / (df + 0.5));
|
|
81
|
+
if (idf <= 0) continue;
|
|
82
|
+
const denom = weightedTf + K1 * (1 - B + B * (avgdl > 0 ? doc.dl / avgdl : 0));
|
|
83
|
+
score += idf * (weightedTf * (K1 + 1)) / denom;
|
|
84
|
+
}
|
|
85
|
+
if (!matched) continue;
|
|
86
|
+
scored.push({ id: doc.id, score, meta: doc.meta });
|
|
87
|
+
}
|
|
88
|
+
scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
|
|
89
|
+
return scored.slice(0, limit);
|
|
90
|
+
}
|
|
91
|
+
removeFromColl(coll, id) {
|
|
92
|
+
const doc = coll.docs.get(id);
|
|
93
|
+
if (!doc) return false;
|
|
94
|
+
coll.docs.delete(id);
|
|
95
|
+
coll.totalLen -= doc.dl;
|
|
96
|
+
for (const term of doc.terms) {
|
|
97
|
+
const next = (coll.df.get(term) ?? 0) - 1;
|
|
98
|
+
if (next <= 0) coll.df.delete(term);
|
|
99
|
+
else coll.df.set(term, next);
|
|
100
|
+
}
|
|
101
|
+
return true;
|
|
102
|
+
}
|
|
103
|
+
collectionFor(name, fields) {
|
|
104
|
+
let coll = this.collections.get(name);
|
|
105
|
+
if (!coll) {
|
|
106
|
+
coll = {
|
|
107
|
+
fields: [...fields],
|
|
108
|
+
docs: /* @__PURE__ */ new Map(),
|
|
109
|
+
df: /* @__PURE__ */ new Map(),
|
|
110
|
+
totalLen: 0
|
|
111
|
+
};
|
|
112
|
+
this.collections.set(name, coll);
|
|
113
|
+
}
|
|
114
|
+
return coll;
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
|
|
118
|
+
export {
|
|
119
|
+
InMemorySearchStore
|
|
120
|
+
};
|