@mirk/store 0.4.2 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,11 +24,13 @@ npm install sqlite-vec
24
24
  | `@mirk/store` | the ports + their in-memory references + `toAsync` + cosine helpers | none |
25
25
  | `@mirk/store/kv` | `SyncStore` port (key-value + collections), `InMemoryKv`, `toAsync` | none |
26
26
  | `@mirk/store/vector` | `VectorStore` port, `InMemoryVectorStore`, cosine helpers | none |
27
- | `@mirk/store/sqlite` | the SQLite **source adapter** — one connection, `.kv` + `.vector` facets | `better-sqlite3` (peer), `sqlite-vec` (optional peer) |
27
+ | `@mirk/store/search` | `SearchStore` port, `InMemorySearchStore`, BM25-style keyword search | none |
28
+ | `@mirk/store/graph` | graph helpers over the collection port (`neighbors`, `traverse`, `traverseFrontierBatched`) | none |
29
+ | `@mirk/store/sqlite` | the SQLite **source adapter** — one connection, `.kv` + `.vector` + `.search` facets | `better-sqlite3` (peer), `sqlite-vec` (optional peer) |
28
30
 
29
31
  Source adapters are reached **only** through their own subpath (e.g. `/sqlite`) — the root and the
30
- port subpaths never re-export them, so importing `@mirk/store`, `/kv`, or `/vector` never drags a
31
- native binding into a consumer bundle.
32
+ port subpaths never re-export them, so importing `@mirk/store`, `/kv`, `/vector`, `/search`, or
33
+ `/graph` never drags a native binding into a consumer bundle.
32
34
 
33
35
  ## Quickstart — zero native deps
34
36
 
@@ -60,19 +62,51 @@ kv.count("posts"); // 1
60
62
  kv.remove("posts", "p1");
61
63
  ```
62
64
 
65
+ ## Full-text search
66
+
67
+ `SearchStore` indexes documents by id and returns BM25-ranked keyword matches. Use `text` for the
68
+ single-field shorthand or `fields` for named columns with query-time weighting:
69
+
70
+ ```ts
71
+ import { InMemorySearchStore } from "@mirk/store/search";
72
+
73
+ const search = new InMemorySearchStore();
74
+ search.index("pages", { id: "a", fields: { title: "Opal guide", body: "plain body" } });
75
+ search.index("pages", { id: "b", fields: { title: "plain title", body: "Opal guide" } });
76
+ search.search("pages", "opal", { fieldWeights: { title: 4, body: 1 } }); // [a, b]
77
+ ```
78
+
79
+ The first indexed document fixes a collection's field schema; later documents must use the same
80
+ field names. `text` and `fields: { text }` are the same single-field schema for backwards
81
+ compatibility.
82
+
83
+ ## Graph helpers
84
+
85
+ `@mirk/store/graph` stores edges as ordinary collection records and traverses them through the
86
+ existing collection port. Policy stays caller-owned through `StoreFilter`.
87
+
88
+ ```ts
89
+ import { traverse } from "@mirk/store/graph";
90
+
91
+ const hits = traverse(kv, { start: "node:a", depth: 2, direction: "out" });
92
+ ```
93
+
63
94
  ## SQLite adapter — one connection, many capabilities
64
95
 
65
- `SqliteAdapter` opens a single `better-sqlite3` database and exposes a `.kv` facet (a `SyncStore`)
66
- and a `.vector` facet (a `VectorStore`) over it:
96
+ `SqliteAdapter` opens a single `better-sqlite3` database and exposes `.kv` (`SyncStore`), `.vector`
97
+ (`VectorStore`), and `.search` (`SearchStore`) facets over it:
67
98
 
68
99
  ```ts
69
100
  import { SqliteAdapter } from "@mirk/store/sqlite";
70
101
 
71
- // `dimensions` is required to use the .vector facet; .kv works without it.
72
- const db = new SqliteAdapter({ path: "data.db", dimensions: 768 });
102
+ // .kv and .search work immediately; vector dimensions infer on first write.
103
+ const db = new SqliteAdapter({ path: "data.db" });
73
104
 
74
105
  db.kv.set("user:1", { name: "Ada" });
75
106
 
107
+ db.search.index("pages", { id: "intro", fields: { title: "Intro", body: "hello world" } });
108
+ db.search.search("pages", "hello", { fieldWeights: { title: 4, body: 1 } });
109
+
76
110
  const embedding = new Float32Array(768); // your real embedding here
77
111
  const query = new Float32Array(768);
78
112
  db.vector.upsert("docs", { id: "a", vector: embedding });
@@ -87,7 +121,7 @@ db.close();
87
121
  |---|---|---|
88
122
  | `path` | `string` | DB file path, or `":memory:"`. |
89
123
  | `db` | `Database` | Reuse an existing `better-sqlite3` connection instead of opening one. |
90
- | `dimensions` | `number` | Embedding dimensionality. Required before using `.vector`; persisted and enforced on reopen. |
124
+ | `dimensions` | `number` | Optional embedding dimensionality. If omitted, inferred and persisted from the first vector `upsert` / `upsertMany`; `search` still requires known dimensions. |
91
125
  | `forceJsCosine` | `boolean` | Pin the exact JS-cosine path even when `sqlite-vec` is installed (mainly for tests). |
92
126
 
93
127
  Vectors (`Vector` is a `Float32Array`) are stored as little-endian float32 BLOBs and ranked by
@@ -1,13 +1,15 @@
1
1
  import Database from 'better-sqlite3';
2
- import { b as SyncStore } from '../types-DyQLNtxa.js';
3
- import { d as VectorStore } from '../types-BqSZEMAB.js';
2
+ import { c as SyncStore } from '../types-DHLdLZ_W.js';
3
+ import { d as VectorStore } from '../types-B0XrD10b.js';
4
+ import { c as SearchStore } from '../types-BLZTMsQO.js';
4
5
 
5
6
  interface SqliteAdapterOptions {
6
7
  /** Path to the SQLite database file. Use ":memory:" for in-memory. */
7
8
  path: string;
8
9
  /** Existing better-sqlite3 instance to reuse (shares one connection). */
9
10
  db?: Database.Database;
10
- /** Embedding dimensions. Required to use the `.vector` facet; KV works without it. */
11
+ /** Embedding dimensions. Optional: when omitted, `.vector` persists the
12
+ * dimensions from the first upsert/upsertMany call. KV/search work without it. */
11
13
  dimensions?: number;
12
14
  /** Force the exact JS-cosine search path even when sqlite-vec is installed.
13
15
  * Mainly for parity testing; production should leave this off. */
@@ -19,6 +21,7 @@ declare class SqliteAdapter {
19
21
  private readonly db;
20
22
  readonly kv: SyncStore;
21
23
  readonly vector: VectorStore;
24
+ readonly search: SearchStore;
22
25
  constructor(opts: SqliteAdapterOptions);
23
26
  /** Close the underlying connection (shared by both facets). */
24
27
  close(): void;
@@ -1,3 +1,18 @@
1
+ import {
2
+ DEFAULT_SEARCH_FIELD,
3
+ assertSameSearchFields,
4
+ assertValidFieldWeightValues,
5
+ fieldWeightsFor,
6
+ normalizeSearchDocument,
7
+ sanitizeFtsQuery
8
+ } from "../chunk-N27Y55CA.js";
9
+ import {
10
+ buildLimitOffset,
11
+ buildOrderBy,
12
+ buildWhereClause,
13
+ hashName,
14
+ jsonPath
15
+ } from "../chunk-DP4D7CJY.js";
1
16
  import {
2
17
  assertDimensions,
3
18
  bufferToVector,
@@ -5,6 +20,9 @@ import {
5
20
  isUsableVector,
6
21
  vectorToBuffer
7
22
  } from "../chunk-BXM3YDOC.js";
23
+ import {
24
+ matchesWhere
25
+ } from "../chunk-KPMRRYTL.js";
8
26
 
9
27
  // src/adapters/sqlite.ts
10
28
  import Database from "better-sqlite3";
@@ -26,10 +44,43 @@ function tryLoadSqliteVec(db) {
26
44
  return false;
27
45
  }
28
46
  }
47
+ function assertPositiveDimensions(dimensions) {
48
+ if (!Number.isInteger(dimensions) || dimensions <= 0) {
49
+ throw new Error(`Vector dimensions must be a positive integer; got ${dimensions}.`);
50
+ }
51
+ }
52
+ function sqlParam(value) {
53
+ if (value === null) return null;
54
+ if (typeof value === "boolean") return value ? 1 : 0;
55
+ if (typeof value === "string" || typeof value === "number" || typeof value === "bigint") {
56
+ return value;
57
+ }
58
+ throw new Error("Store IN queries only support JSON scalar values.");
59
+ }
60
+ function buildJsonInWhere(field, values, hasPriorWhere) {
61
+ const path = jsonPath(field);
62
+ const params = [];
63
+ const nonNull = values.filter((value) => value !== null).map(sqlParam);
64
+ const hasNull = values.some((value) => value === null);
65
+ const parts = [];
66
+ if (nonNull.length > 0) {
67
+ parts.push(`json_extract(data, ?) IN (${nonNull.map(() => "?").join(", ")})`);
68
+ params.push(path, ...nonNull);
69
+ }
70
+ if (hasNull) {
71
+ parts.push(`json_type(data, ?) = 'null'`);
72
+ params.push(path);
73
+ }
74
+ return {
75
+ clause: `${hasPriorWhere ? " AND" : " WHERE"} (${parts.join(" OR ")})`,
76
+ params
77
+ };
78
+ }
29
79
  var SqliteAdapter = class {
30
80
  db;
31
81
  kv;
32
82
  vector;
83
+ search;
33
84
  constructor(opts) {
34
85
  const ownsDb = opts.db === void 0;
35
86
  this.db = opts.db ?? new Database(opts.path);
@@ -37,6 +88,7 @@ var SqliteAdapter = class {
37
88
  this.db.pragma("journal_mode = WAL");
38
89
  this.kv = new SqliteKvFacet(this.db);
39
90
  this.vector = new SqliteVectorFacet(this.db, opts.path, opts.dimensions, opts.forceJsCosine);
91
+ this.search = new SqliteSearchFacet(this.db);
40
92
  } catch (err) {
41
93
  if (ownsDb) {
42
94
  try {
@@ -122,6 +174,17 @@ var SqliteKvFacet = class {
122
174
  const rows = this.db.prepare(sql).all(...where.params, ...orderBy.params);
123
175
  return rows.map((r) => JSON.parse(r.data));
124
176
  }
177
+ listWhereIn(collection, field, values, filter) {
178
+ if (values.length === 0) return [];
179
+ const table = this.ensureTable(collection);
180
+ const where = buildWhereClause(filter);
181
+ const inWhere = buildJsonInWhere(field, values, where.clause.length > 0);
182
+ const orderBy = buildOrderBy(filter);
183
+ const limitOffset = buildLimitOffset(filter);
184
+ const sql = `SELECT data FROM ${table}${where.clause}${inWhere.clause}${orderBy.clause}${limitOffset}`;
185
+ const rows = this.db.prepare(sql).all(...where.params, ...inWhere.params, ...orderBy.params);
186
+ return rows.map((r) => JSON.parse(r.data));
187
+ }
125
188
  getById(collection, id) {
126
189
  const table = this.ensureTable(collection);
127
190
  const row = this.db.prepare(`SELECT data FROM ${table} WHERE id = ?`).get(id);
@@ -148,8 +211,10 @@ var SqliteKvFacet = class {
148
211
  }
149
212
  };
150
213
  var SqliteVectorFacet = class {
151
- constructor(db, path, dimensions, forceJsCosine) {
214
+ constructor(db, path, dimensions, forceJsCosine = false) {
152
215
  this.db = db;
216
+ this.path = path;
217
+ this.forceJsCosine = forceJsCosine;
153
218
  this.db.exec(
154
219
  `CREATE TABLE IF NOT EXISTS vectors (
155
220
  collection TEXT NOT NULL,
@@ -169,31 +234,51 @@ var SqliteVectorFacet = class {
169
234
  `Vector store at ${path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
170
235
  );
171
236
  }
237
+ this.refreshVectorMeta();
172
238
  } else if (dimensions !== void 0) {
173
- this.dimensions = dimensions;
174
- this.db.prepare(`INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)`).run(String(dimensions));
175
- } else {
176
- this.dimensions = -1;
239
+ this.initializeDims(dimensions);
177
240
  }
178
- this.accelerated = !forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
179
- this.meta = {
180
- backend: "sqlite",
181
- dimensions: Math.max(this.dimensions, 0),
182
- accelerated: this.accelerated
183
- };
184
241
  }
185
242
  db;
186
- meta;
187
- dimensions;
243
+ path;
244
+ forceJsCosine;
245
+ meta = { backend: "sqlite", dimensions: 0, accelerated: false };
246
+ dimensions = -1;
188
247
  /** True when sqlite-vec loaded and the vec0 acceleration path is live. */
189
- accelerated;
248
+ accelerated = false;
190
249
  vecTablesEnsured = /* @__PURE__ */ new Set();
191
- requireDims(v) {
250
+ initializeDims(dimensions) {
251
+ assertPositiveDimensions(dimensions);
252
+ if (this.dimensions >= 0) {
253
+ if (dimensions !== this.dimensions) {
254
+ throw new Error(
255
+ `Vector store at ${this.path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
256
+ );
257
+ }
258
+ return;
259
+ }
260
+ this.dimensions = dimensions;
261
+ this.db.prepare(
262
+ `INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)
263
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value`
264
+ ).run(String(dimensions));
265
+ this.refreshVectorMeta();
266
+ }
267
+ refreshVectorMeta() {
268
+ this.accelerated = !this.forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
269
+ this.meta.dimensions = Math.max(this.dimensions, 0);
270
+ this.meta.accelerated = this.accelerated;
271
+ }
272
+ requireKnownDims(v) {
192
273
  if (this.dimensions < 0) {
193
- throw new Error("SqliteAdapter.vector requires `dimensions` \u2014 pass { dimensions } when opening.");
274
+ throw new Error("SqliteAdapter.vector has no dimensions yet \u2014 pass { dimensions } when opening or upsert a vector first.");
194
275
  }
195
276
  assertDimensions(v, this.dimensions);
196
277
  }
278
+ ensureDimsForWrite(v) {
279
+ if (this.dimensions < 0) this.initializeDims(v.length);
280
+ assertDimensions(v, this.dimensions);
281
+ }
197
282
  // ── vec0 acceleration helpers ───────────────────────────────────────────
198
283
  vecTableName(collection) {
199
284
  return `vectors_vec_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
@@ -233,7 +318,7 @@ var SqliteVectorFacet = class {
233
318
  }
234
319
  }
235
320
  upsert(collection, doc) {
236
- this.requireDims(doc.vector);
321
+ this.ensureDimsForWrite(doc.vector);
237
322
  const write = this.db.transaction(() => {
238
323
  this.db.prepare(
239
324
  `INSERT INTO vectors(collection, id, vec, metadata) VALUES (?, ?, ?, ?)
@@ -249,6 +334,14 @@ var SqliteVectorFacet = class {
249
334
  write();
250
335
  }
251
336
  upsertMany(collection, docs) {
337
+ const first = docs[0];
338
+ if (!first) return;
339
+ const dimensions = this.dimensions >= 0 ? this.dimensions : first.vector.length;
340
+ assertPositiveDimensions(dimensions);
341
+ for (const doc of docs) {
342
+ assertDimensions(doc.vector, dimensions);
343
+ }
344
+ if (this.dimensions < 0) this.initializeDims(dimensions);
252
345
  const tx = this.db.transaction((items) => {
253
346
  for (const doc of items) this.upsert(collection, doc);
254
347
  });
@@ -263,6 +356,9 @@ var SqliteVectorFacet = class {
263
356
  metadata: row.metadata === null ? void 0 : JSON.parse(row.metadata)
264
357
  };
265
358
  }
359
+ has(collection, id) {
360
+ return this.db.prepare(`SELECT 1 FROM vectors WHERE collection = ? AND id = ?`).get(collection, id) !== void 0;
361
+ }
266
362
  remove(collection, id) {
267
363
  if (this.accelerated) {
268
364
  const row = this.db.prepare(`SELECT rowid FROM vectors WHERE collection = ? AND id = ?`).get(collection, id);
@@ -277,16 +373,17 @@ var SqliteVectorFacet = class {
277
373
  return row.n;
278
374
  }
279
375
  search(collection, query, opts) {
280
- this.requireDims(query);
376
+ this.requireKnownDims(query);
281
377
  const topK = opts?.topK ?? 10;
282
378
  const minScore = opts?.minScore;
283
- if (this.accelerated && isUsableVector(query)) {
379
+ const hasFilters = !!(opts?.where || opts?.whereNot);
380
+ if (this.accelerated && isUsableVector(query) && !hasFilters) {
284
381
  try {
285
382
  return this.searchVec(collection, query, topK, minScore);
286
383
  } catch {
287
384
  }
288
385
  }
289
- return this.searchJs(collection, query, topK, minScore);
386
+ return this.searchJs(collection, query, topK, minScore, opts?.where, opts?.whereNot);
290
387
  }
291
388
  searchVec(collection, query, topK, minScore) {
292
389
  const table = this.ensureVecTable(collection);
@@ -310,75 +407,174 @@ var SqliteVectorFacet = class {
310
407
  out.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
311
408
  return out;
312
409
  }
313
- searchJs(collection, query, topK, minScore) {
410
+ searchJs(collection, query, topK, minScore, where, whereNot) {
314
411
  const rows = this.db.prepare(`SELECT id, vec, metadata FROM vectors WHERE collection = ?`).all(collection);
315
412
  const scored = [];
316
413
  for (const row of rows) {
414
+ const meta = row.metadata === null ? void 0 : JSON.parse(row.metadata);
415
+ if (where && !matchesWhere(meta, where)) continue;
416
+ if (whereNot && matchesWhere(meta, whereNot)) continue;
317
417
  const vec = bufferToVector(row.vec);
318
418
  if (!isUsableVector(vec)) continue;
319
419
  const score = cosineSimilarity(query, vec);
320
420
  if (!Number.isFinite(score)) continue;
321
421
  if (minScore !== void 0 && score < minScore) continue;
322
- scored.push({
323
- id: row.id,
324
- score,
325
- metadata: row.metadata === null ? void 0 : JSON.parse(row.metadata)
326
- });
422
+ scored.push({ id: row.id, score, metadata: meta });
327
423
  }
328
424
  scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
329
425
  return scored.slice(0, topK);
330
426
  }
331
427
  };
332
- function jsonPath(field) {
333
- return `$."${field.replace(/"/g, '""')}"`;
428
+ function searchColumnName(field, index) {
429
+ if (field === DEFAULT_SEARCH_FIELD) return DEFAULT_SEARCH_FIELD;
430
+ return `f${index}_${hashName(field)}`;
431
+ }
432
+ function searchSchema(fields) {
433
+ return { fields: [...fields], columns: fields.map(searchColumnName) };
334
434
  }
335
- function buildWhereClause(filter) {
336
- if (!filter?.where || Object.keys(filter.where).length === 0) {
337
- return { clause: "", params: [] };
435
+ function quoteSqlIdent(identifier) {
436
+ return `"${identifier.replace(/"/g, '""')}"`;
437
+ }
438
+ function searchFieldDefs(schema) {
439
+ return schema.columns.map((column) => `${quoteSqlIdent(column)} TEXT NOT NULL`).join(",\n ");
440
+ }
441
+ function rowColumnRefs(prefix, schema) {
442
+ return schema.columns.map((column) => `${prefix}.${quoteSqlIdent(column)}`).join(", ");
443
+ }
444
+ var SqliteSearchFacet = class {
445
+ constructor(db) {
446
+ this.db = db;
447
+ this.db.exec(`
448
+ CREATE TABLE IF NOT EXISTS ${this.schemaTable} (
449
+ collection TEXT PRIMARY KEY,
450
+ fields_json TEXT NOT NULL
451
+ );
452
+ `);
338
453
  }
339
- const conditions = [];
340
- const params = [];
341
- for (const [key, value] of Object.entries(filter.where)) {
342
- const path = jsonPath(key);
343
- if (value === null) {
344
- conditions.push(`json_type(data, ?) = 'null'`);
345
- params.push(path);
346
- } else {
347
- const bound = typeof value === "boolean" ? value ? 1 : 0 : value;
348
- conditions.push(`json_extract(data, ?) = ?`);
349
- params.push(path, bound);
454
+ db;
455
+ ensured = /* @__PURE__ */ new Set();
456
+ schemaTable = "_mirk_search_schema";
457
+ baseTable(collection) {
458
+ return `search_docs_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
459
+ }
460
+ ftsTable(collection) {
461
+ return `search_fts_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
462
+ }
463
+ tableExists(table) {
464
+ return this.db.prepare("SELECT 1 AS ok FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?").get(table) !== void 0;
465
+ }
466
+ loadSchema(collection) {
467
+ const row = this.db.prepare(`SELECT fields_json FROM ${this.schemaTable} WHERE collection = ?`).get(collection);
468
+ if (row) {
469
+ const fields2 = JSON.parse(row.fields_json);
470
+ return searchSchema(fields2);
471
+ }
472
+ const docs = this.baseTable(collection);
473
+ if (!this.tableExists(docs)) return void 0;
474
+ const pragma = this.db.prepare(`PRAGMA table_info(${quoteSqlIdent(docs)})`).all();
475
+ if (!pragma.some((col) => col.name === DEFAULT_SEARCH_FIELD)) return void 0;
476
+ const fields = [DEFAULT_SEARCH_FIELD];
477
+ this.db.prepare(`INSERT OR IGNORE INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
478
+ return searchSchema(fields);
479
+ }
480
+ schemaForIndex(collection, normalized) {
481
+ const existing = this.loadSchema(collection);
482
+ if (existing) {
483
+ assertSameSearchFields(existing.fields, normalized.names, collection);
484
+ return existing;
350
485
  }
486
+ const fields = [...normalized.names];
487
+ this.db.prepare(`INSERT INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
488
+ return searchSchema(fields);
489
+ }
490
+ ensure(collection, schema) {
491
+ const docs = this.baseTable(collection);
492
+ const fts = this.ftsTable(collection);
493
+ const key = `${docs}:${schema.fields.join("\0")}`;
494
+ if (this.ensured.has(key)) return { docs, fts };
495
+ const qDocs = quoteSqlIdent(docs);
496
+ const qFts = quoteSqlIdent(fts);
497
+ const qColumns = schema.columns.map(quoteSqlIdent).join(", ");
498
+ const newColumns = rowColumnRefs("new", schema);
499
+ const oldColumns = rowColumnRefs("old", schema);
500
+ const fieldDefs = searchFieldDefs(schema);
501
+ this.db.exec(`
502
+ CREATE TABLE IF NOT EXISTS ${qDocs} (
503
+ id TEXT PRIMARY KEY,
504
+ ${fieldDefs},
505
+ meta_json TEXT
506
+ );
507
+ CREATE VIRTUAL TABLE IF NOT EXISTS ${qFts} USING fts5(
508
+ ${qColumns}, content='${docs}', content_rowid='rowid', tokenize='unicode61'
509
+ );
510
+ CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ai`)} AFTER INSERT ON ${qDocs} BEGIN
511
+ INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
512
+ END;
513
+ CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ad`)} AFTER DELETE ON ${qDocs} BEGIN
514
+ INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
515
+ END;
516
+ CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_au`)} AFTER UPDATE ON ${qDocs} BEGIN
517
+ INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
518
+ INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
519
+ END;
520
+ `);
521
+ this.ensured.add(key);
522
+ return { docs, fts };
523
+ }
524
+ index(collection, doc) {
525
+ const normalized = normalizeSearchDocument(doc);
526
+ const schema = this.schemaForIndex(collection, normalized);
527
+ const { docs } = this.ensure(collection, schema);
528
+ const qDocs = quoteSqlIdent(docs);
529
+ const qColumns = schema.columns.map(quoteSqlIdent);
530
+ const insertColumns = ["id", ...schema.columns, "meta_json"].map(quoteSqlIdent).join(", ");
531
+ const placeholders = Array.from({ length: schema.columns.length + 2 }, () => "?").join(", ");
532
+ const updateSet = [...qColumns.map((col) => `${col} = excluded.${col}`), "meta_json = excluded.meta_json"].join(", ");
533
+ const metaJson = doc.meta === void 0 ? null : JSON.stringify(doc.meta);
534
+ this.db.prepare(
535
+ `INSERT INTO ${qDocs}(${insertColumns}) VALUES (${placeholders})
536
+ ON CONFLICT(id) DO UPDATE SET ${updateSet}`
537
+ ).run(doc.id, ...schema.fields.map((field) => normalized.values[field] ?? ""), metaJson);
351
538
  }
352
- return { clause: ` WHERE ${conditions.join(" AND ")}`, params };
353
- }
354
- function buildOrderBy(filter) {
355
- if (!filter?.sortBy) return { clause: "", params: [] };
356
- const dir = filter.sortDir === "desc" ? "DESC" : "ASC";
357
- const path = jsonPath(filter.sortBy);
358
- return {
359
- clause: ` ORDER BY json_extract(data, ?) IS NULL, json_extract(data, ?) ${dir}`,
360
- params: [path, path]
361
- };
362
- }
363
- function hashName(s) {
364
- let h = 2166136261;
365
- for (let i = 0; i < s.length; i++) {
366
- h ^= s.charCodeAt(i);
367
- h = Math.imul(h, 16777619);
539
+ indexMany(collection, docs) {
540
+ const tx = this.db.transaction((items) => {
541
+ for (const doc of items) this.index(collection, doc);
542
+ });
543
+ tx(docs);
368
544
  }
369
- return (h >>> 0).toString(36);
370
- }
371
- function buildLimitOffset(filter) {
372
- let sql = "";
373
- if (filter?.limit !== void 0) {
374
- sql += ` LIMIT ${Math.max(0, Math.floor(filter.limit))}`;
545
+ remove(collection, id) {
546
+ const schema = this.loadSchema(collection);
547
+ if (!schema) return false;
548
+ const { docs } = this.ensure(collection, schema);
549
+ return this.db.prepare(`DELETE FROM ${quoteSqlIdent(docs)} WHERE id = ?`).run(id).changes > 0;
375
550
  }
376
- if (filter?.offset !== void 0 && filter.offset > 0) {
377
- if (!sql.includes("LIMIT")) sql += " LIMIT -1";
378
- sql += ` OFFSET ${Math.max(0, Math.floor(filter.offset))}`;
551
+ search(collection, query, opts) {
552
+ const sanitized = sanitizeFtsQuery(query);
553
+ assertValidFieldWeightValues(opts?.fieldWeights);
554
+ if (sanitized.length === 0) return [];
555
+ const schema = this.loadSchema(collection);
556
+ if (!schema) return [];
557
+ const { docs, fts } = this.ensure(collection, schema);
558
+ const weights = fieldWeightsFor(schema.fields, opts?.fieldWeights);
559
+ const weightArgs = weights.length > 0 ? `, ${weights.map((weight) => String(weight)).join(", ")}` : "";
560
+ const rows = this.db.prepare(
561
+ `SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}${weightArgs}) AS bm
562
+ FROM ${quoteSqlIdent(fts)}
563
+ JOIN ${quoteSqlIdent(docs)} d ON d.rowid = ${fts}.rowid
564
+ WHERE ${fts} MATCH ?
565
+ ORDER BY bm, d.id`
566
+ ).all(sanitized);
567
+ const limit = opts?.limit ?? 10;
568
+ const where = opts?.filter?.where;
569
+ const out = [];
570
+ for (const r of rows) {
571
+ const meta = r.meta_json === null ? {} : JSON.parse(r.meta_json);
572
+ if (where && !matchesWhere(meta, where)) continue;
573
+ out.push({ id: r.id, score: -r.bm, meta });
574
+ }
575
+ return out.slice(0, limit);
379
576
  }
380
- return sql;
381
- }
577
+ };
382
578
  export {
383
579
  SqliteAdapter
384
580
  };
@@ -0,0 +1,120 @@
1
+ import {
2
+ assertSameSearchFields,
3
+ assertValidFieldWeightValues,
4
+ fieldWeightsFor,
5
+ normalizeSearchDocument,
6
+ tokenize
7
+ } from "./chunk-N27Y55CA.js";
8
+ import {
9
+ matchesWhere
10
+ } from "./chunk-KPMRRYTL.js";
11
+
12
+ // src/search/memory.ts
13
+ var K1 = 1.2;
14
+ var B = 0.75;
15
+ var InMemorySearchStore = class {
16
+ collections = /* @__PURE__ */ new Map();
17
+ index(collection, doc) {
18
+ const normalized = normalizeSearchDocument(doc);
19
+ const coll = this.collectionFor(collection, normalized.names);
20
+ assertSameSearchFields(coll.fields, normalized.names, collection);
21
+ this.removeFromColl(coll, doc.id);
22
+ const tfByField = /* @__PURE__ */ new Map();
23
+ const terms = /* @__PURE__ */ new Set();
24
+ let dl = 0;
25
+ for (const field of coll.fields) {
26
+ const tokens = tokenize(normalized.values[field] ?? "");
27
+ dl += tokens.length;
28
+ const tf = /* @__PURE__ */ new Map();
29
+ for (const token of tokens) {
30
+ tf.set(token, (tf.get(token) ?? 0) + 1);
31
+ terms.add(token);
32
+ }
33
+ tfByField.set(field, tf);
34
+ }
35
+ coll.totalLen += dl;
36
+ const indexed = {
37
+ id: doc.id,
38
+ meta: doc.meta ?? {},
39
+ tfByField,
40
+ dl,
41
+ terms
42
+ };
43
+ coll.docs.set(doc.id, indexed);
44
+ for (const term of terms) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
45
+ }
46
+ indexMany(collection, docs) {
47
+ for (const doc of docs) this.index(collection, doc);
48
+ }
49
+ remove(collection, id) {
50
+ const coll = this.collections.get(collection);
51
+ if (!coll) return false;
52
+ return this.removeFromColl(coll, id);
53
+ }
54
+ search(collection, query, opts) {
55
+ const coll = this.collections.get(collection);
56
+ const qTokens = tokenize(query);
57
+ assertValidFieldWeightValues(opts?.fieldWeights);
58
+ if (!coll || qTokens.length === 0) return [];
59
+ const limit = opts?.limit ?? 10;
60
+ const where = opts?.filter?.where;
61
+ const fieldWeights = fieldWeightsFor(coll.fields, opts?.fieldWeights);
62
+ const n = coll.docs.size;
63
+ const scored = [];
64
+ for (const doc of coll.docs.values()) {
65
+ if (where && !matchesWhere(doc.meta, where)) continue;
66
+ let matched = false;
67
+ let score = 0;
68
+ const avgdl = n > 0 ? coll.totalLen / n : 0;
69
+ for (const qt of qTokens) {
70
+ const df = coll.df.get(qt) ?? 0;
71
+ if (df === 0) continue;
72
+ let weightedTf = 0;
73
+ for (let i = 0; i < coll.fields.length; i++) {
74
+ const field = coll.fields[i];
75
+ const tf = doc.tfByField.get(field)?.get(qt) ?? 0;
76
+ weightedTf += fieldWeights[i] * tf;
77
+ }
78
+ if (weightedTf === 0) continue;
79
+ matched = true;
80
+ const idf = Math.log((n - df + 0.5) / (df + 0.5));
81
+ if (idf <= 0) continue;
82
+ const denom = weightedTf + K1 * (1 - B + B * (avgdl > 0 ? doc.dl / avgdl : 0));
83
+ score += idf * (weightedTf * (K1 + 1)) / denom;
84
+ }
85
+ if (!matched) continue;
86
+ scored.push({ id: doc.id, score, meta: doc.meta });
87
+ }
88
+ scored.sort((a, b) => b.score - a.score || a.id.localeCompare(b.id));
89
+ return scored.slice(0, limit);
90
+ }
91
+ removeFromColl(coll, id) {
92
+ const doc = coll.docs.get(id);
93
+ if (!doc) return false;
94
+ coll.docs.delete(id);
95
+ coll.totalLen -= doc.dl;
96
+ for (const term of doc.terms) {
97
+ const next = (coll.df.get(term) ?? 0) - 1;
98
+ if (next <= 0) coll.df.delete(term);
99
+ else coll.df.set(term, next);
100
+ }
101
+ return true;
102
+ }
103
+ collectionFor(name, fields) {
104
+ let coll = this.collections.get(name);
105
+ if (!coll) {
106
+ coll = {
107
+ fields: [...fields],
108
+ docs: /* @__PURE__ */ new Map(),
109
+ df: /* @__PURE__ */ new Map(),
110
+ totalLen: 0
111
+ };
112
+ this.collections.set(name, coll);
113
+ }
114
+ return coll;
115
+ }
116
+ };
117
+
118
+ export {
119
+ InMemorySearchStore
120
+ };