@mirk/store 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -24,11 +24,13 @@ npm install sqlite-vec
24
24
  | `@mirk/store` | the ports + their in-memory references + `toAsync` + cosine helpers | none |
25
25
  | `@mirk/store/kv` | `SyncStore` port (key-value + collections), `InMemoryKv`, `toAsync` | none |
26
26
  | `@mirk/store/vector` | `VectorStore` port, `InMemoryVectorStore`, cosine helpers | none |
27
- | `@mirk/store/sqlite` | the SQLite **source adapter** — one connection, `.kv` + `.vector` facets | `better-sqlite3` (peer), `sqlite-vec` (optional peer) |
27
+ | `@mirk/store/search` | `SearchStore` port, `InMemorySearchStore`, BM25-style keyword search | none |
28
+ | `@mirk/store/graph` | graph helpers over the collection port (`neighbors`, `traverse`, `traverseFrontierBatched`) | none |
29
+ | `@mirk/store/sqlite` | the SQLite **source adapter** — one connection, `.kv` + `.vector` + `.search` facets | `better-sqlite3` (peer), `sqlite-vec` (optional peer) |
28
30
 
29
31
  Source adapters are reached **only** through their own subpath (e.g. `/sqlite`) — the root and the
30
- port subpaths never re-export them, so importing `@mirk/store`, `/kv`, or `/vector` never drags a
31
- native binding into a consumer bundle.
32
+ port subpaths never re-export them, so importing `@mirk/store`, `/kv`, `/vector`, `/search`, or
33
+ `/graph` never drags a native binding into a consumer bundle.
32
34
 
33
35
  ## Quickstart — zero native deps
34
36
 
@@ -60,19 +62,51 @@ kv.count("posts"); // 1
60
62
  kv.remove("posts", "p1");
61
63
  ```
62
64
 
65
+ ## Full-text search
66
+
67
+ `SearchStore` indexes documents by id and returns BM25-ranked keyword matches. Use `text` for the
68
+ single-field shorthand or `fields` for named columns with query-time weighting:
69
+
70
+ ```ts
71
+ import { InMemorySearchStore } from "@mirk/store/search";
72
+
73
+ const search = new InMemorySearchStore();
74
+ search.index("pages", { id: "a", fields: { title: "Opal guide", body: "plain body" } });
75
+ search.index("pages", { id: "b", fields: { title: "plain title", body: "Opal guide" } });
76
+ search.search("pages", "opal", { fieldWeights: { title: 4, body: 1 } }); // [a, b]
77
+ ```
78
+
79
+ The first indexed document fixes a collection's field schema; later documents must use the same
80
+ field names. `text` and `fields: { text }` are the same single-field schema for backwards
81
+ compatibility.
82
+
83
+ ## Graph helpers
84
+
85
+ `@mirk/store/graph` stores edges as ordinary collection records and traverses them through the
86
+ existing collection port. Policy stays caller-owned through `StoreFilter`.
87
+
88
+ ```ts
89
+ import { traverse } from "@mirk/store/graph";
90
+
91
+ const hits = traverse(kv, { start: "node:a", depth: 2, direction: "out" });
92
+ ```
93
+
63
94
  ## SQLite adapter — one connection, many capabilities
64
95
 
65
- `SqliteAdapter` opens a single `better-sqlite3` database and exposes a `.kv` facet (a `SyncStore`)
66
- and a `.vector` facet (a `VectorStore`) over it:
96
+ `SqliteAdapter` opens a single `better-sqlite3` database and exposes `.kv` (`SyncStore`), `.vector`
97
+ (`VectorStore`), and `.search` (`SearchStore`) facets over it:
67
98
 
68
99
  ```ts
69
100
  import { SqliteAdapter } from "@mirk/store/sqlite";
70
101
 
71
- // `dimensions` is required to use the .vector facet; .kv works without it.
72
- const db = new SqliteAdapter({ path: "data.db", dimensions: 768 });
102
+ // .kv and .search work immediately; vector dimensions infer on first write.
103
+ const db = new SqliteAdapter({ path: "data.db" });
73
104
 
74
105
  db.kv.set("user:1", { name: "Ada" });
75
106
 
107
+ db.search.index("pages", { id: "intro", fields: { title: "Intro", body: "hello world" } });
108
+ db.search.search("pages", "hello", { fieldWeights: { title: 4, body: 1 } });
109
+
76
110
  const embedding = new Float32Array(768); // your real embedding here
77
111
  const query = new Float32Array(768);
78
112
  db.vector.upsert("docs", { id: "a", vector: embedding });
@@ -87,7 +121,7 @@ db.close();
87
121
  |---|---|---|
88
122
  | `path` | `string` | DB file path, or `":memory:"`. |
89
123
  | `db` | `Database` | Reuse an existing `better-sqlite3` connection instead of opening one. |
90
- | `dimensions` | `number` | Embedding dimensionality. Required before using `.vector`; persisted and enforced on reopen. |
124
+ | `dimensions` | `number` | Optional embedding dimensionality. If omitted, inferred and persisted from the first vector `upsert` / `upsertMany`; `search` still requires known dimensions. |
91
125
  | `forceJsCosine` | `boolean` | Pin the exact JS-cosine path even when `sqlite-vec` is installed (mainly for tests). |
92
126
 
93
127
  Vectors (`Vector` is a `Float32Array`) are stored as little-endian float32 BLOBs and ranked by
@@ -1,14 +1,15 @@
1
1
  import Database from 'better-sqlite3';
2
- import { b as SyncStore } from '../types-DyQLNtxa.js';
2
+ import { c as SyncStore } from '../types-DHLdLZ_W.js';
3
3
  import { d as VectorStore } from '../types-B0XrD10b.js';
4
- import { c as SearchStore } from '../types-0B0Tw1fz.js';
4
+ import { c as SearchStore } from '../types-BLZTMsQO.js';
5
5
 
6
6
  interface SqliteAdapterOptions {
7
7
  /** Path to the SQLite database file. Use ":memory:" for in-memory. */
8
8
  path: string;
9
9
  /** Existing better-sqlite3 instance to reuse (shares one connection). */
10
10
  db?: Database.Database;
11
- /** Embedding dimensions. Required to use the `.vector` facet; KV works without it. */
11
+ /** Embedding dimensions. Optional: when omitted, `.vector` persists the
12
+ * dimensions from the first upsert/upsertMany call. KV/search work without it. */
12
13
  dimensions?: number;
13
14
  /** Force the exact JS-cosine search path even when sqlite-vec is installed.
14
15
  * Mainly for parity testing; production should leave this off. */
@@ -1,11 +1,17 @@
1
1
  import {
2
+ DEFAULT_SEARCH_FIELD,
3
+ assertSameSearchFields,
4
+ assertValidFieldWeightValues,
5
+ fieldWeightsFor,
6
+ normalizeSearchDocument,
2
7
  sanitizeFtsQuery
3
- } from "../chunk-6YJ66JFO.js";
8
+ } from "../chunk-N27Y55CA.js";
4
9
  import {
5
10
  buildLimitOffset,
6
11
  buildOrderBy,
7
12
  buildWhereClause,
8
- hashName
13
+ hashName,
14
+ jsonPath
9
15
  } from "../chunk-DP4D7CJY.js";
10
16
  import {
11
17
  assertDimensions,
@@ -38,6 +44,38 @@ function tryLoadSqliteVec(db) {
38
44
  return false;
39
45
  }
40
46
  }
47
+ function assertPositiveDimensions(dimensions) {
48
+ if (!Number.isInteger(dimensions) || dimensions <= 0) {
49
+ throw new Error(`Vector dimensions must be a positive integer; got ${dimensions}.`);
50
+ }
51
+ }
52
+ function sqlParam(value) {
53
+ if (value === null) return null;
54
+ if (typeof value === "boolean") return value ? 1 : 0;
55
+ if (typeof value === "string" || typeof value === "number" || typeof value === "bigint") {
56
+ return value;
57
+ }
58
+ throw new Error("Store IN queries only support JSON scalar values.");
59
+ }
60
+ function buildJsonInWhere(field, values, hasPriorWhere) {
61
+ const path = jsonPath(field);
62
+ const params = [];
63
+ const nonNull = values.filter((value) => value !== null).map(sqlParam);
64
+ const hasNull = values.some((value) => value === null);
65
+ const parts = [];
66
+ if (nonNull.length > 0) {
67
+ parts.push(`json_extract(data, ?) IN (${nonNull.map(() => "?").join(", ")})`);
68
+ params.push(path, ...nonNull);
69
+ }
70
+ if (hasNull) {
71
+ parts.push(`json_type(data, ?) = 'null'`);
72
+ params.push(path);
73
+ }
74
+ return {
75
+ clause: `${hasPriorWhere ? " AND" : " WHERE"} (${parts.join(" OR ")})`,
76
+ params
77
+ };
78
+ }
41
79
  var SqliteAdapter = class {
42
80
  db;
43
81
  kv;
@@ -136,6 +174,17 @@ var SqliteKvFacet = class {
136
174
  const rows = this.db.prepare(sql).all(...where.params, ...orderBy.params);
137
175
  return rows.map((r) => JSON.parse(r.data));
138
176
  }
177
+ listWhereIn(collection, field, values, filter) {
178
+ if (values.length === 0) return [];
179
+ const table = this.ensureTable(collection);
180
+ const where = buildWhereClause(filter);
181
+ const inWhere = buildJsonInWhere(field, values, where.clause.length > 0);
182
+ const orderBy = buildOrderBy(filter);
183
+ const limitOffset = buildLimitOffset(filter);
184
+ const sql = `SELECT data FROM ${table}${where.clause}${inWhere.clause}${orderBy.clause}${limitOffset}`;
185
+ const rows = this.db.prepare(sql).all(...where.params, ...inWhere.params, ...orderBy.params);
186
+ return rows.map((r) => JSON.parse(r.data));
187
+ }
139
188
  getById(collection, id) {
140
189
  const table = this.ensureTable(collection);
141
190
  const row = this.db.prepare(`SELECT data FROM ${table} WHERE id = ?`).get(id);
@@ -162,8 +211,10 @@ var SqliteKvFacet = class {
162
211
  }
163
212
  };
164
213
  var SqliteVectorFacet = class {
165
- constructor(db, path, dimensions, forceJsCosine) {
214
+ constructor(db, path, dimensions, forceJsCosine = false) {
166
215
  this.db = db;
216
+ this.path = path;
217
+ this.forceJsCosine = forceJsCosine;
167
218
  this.db.exec(
168
219
  `CREATE TABLE IF NOT EXISTS vectors (
169
220
  collection TEXT NOT NULL,
@@ -183,31 +234,51 @@ var SqliteVectorFacet = class {
183
234
  `Vector store at ${path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
184
235
  );
185
236
  }
237
+ this.refreshVectorMeta();
186
238
  } else if (dimensions !== void 0) {
187
- this.dimensions = dimensions;
188
- this.db.prepare(`INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)`).run(String(dimensions));
189
- } else {
190
- this.dimensions = -1;
239
+ this.initializeDims(dimensions);
191
240
  }
192
- this.accelerated = !forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
193
- this.meta = {
194
- backend: "sqlite",
195
- dimensions: Math.max(this.dimensions, 0),
196
- accelerated: this.accelerated
197
- };
198
241
  }
199
242
  db;
200
- meta;
201
- dimensions;
243
+ path;
244
+ forceJsCosine;
245
+ meta = { backend: "sqlite", dimensions: 0, accelerated: false };
246
+ dimensions = -1;
202
247
  /** True when sqlite-vec loaded and the vec0 acceleration path is live. */
203
- accelerated;
248
+ accelerated = false;
204
249
  vecTablesEnsured = /* @__PURE__ */ new Set();
205
- requireDims(v) {
250
+ initializeDims(dimensions) {
251
+ assertPositiveDimensions(dimensions);
252
+ if (this.dimensions >= 0) {
253
+ if (dimensions !== this.dimensions) {
254
+ throw new Error(
255
+ `Vector store at ${this.path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
256
+ );
257
+ }
258
+ return;
259
+ }
260
+ this.dimensions = dimensions;
261
+ this.db.prepare(
262
+ `INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)
263
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value`
264
+ ).run(String(dimensions));
265
+ this.refreshVectorMeta();
266
+ }
267
+ refreshVectorMeta() {
268
+ this.accelerated = !this.forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
269
+ this.meta.dimensions = Math.max(this.dimensions, 0);
270
+ this.meta.accelerated = this.accelerated;
271
+ }
272
+ requireKnownDims(v) {
206
273
  if (this.dimensions < 0) {
207
- throw new Error("SqliteAdapter.vector requires `dimensions` \u2014 pass { dimensions } when opening.");
274
+ throw new Error("SqliteAdapter.vector has no dimensions yet \u2014 pass { dimensions } when opening or upsert a vector first.");
208
275
  }
209
276
  assertDimensions(v, this.dimensions);
210
277
  }
278
+ ensureDimsForWrite(v) {
279
+ if (this.dimensions < 0) this.initializeDims(v.length);
280
+ assertDimensions(v, this.dimensions);
281
+ }
211
282
  // ── vec0 acceleration helpers ───────────────────────────────────────────
212
283
  vecTableName(collection) {
213
284
  return `vectors_vec_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
@@ -247,7 +318,7 @@ var SqliteVectorFacet = class {
247
318
  }
248
319
  }
249
320
  upsert(collection, doc) {
250
- this.requireDims(doc.vector);
321
+ this.ensureDimsForWrite(doc.vector);
251
322
  const write = this.db.transaction(() => {
252
323
  this.db.prepare(
253
324
  `INSERT INTO vectors(collection, id, vec, metadata) VALUES (?, ?, ?, ?)
@@ -263,6 +334,14 @@ var SqliteVectorFacet = class {
263
334
  write();
264
335
  }
265
336
  upsertMany(collection, docs) {
337
+ const first = docs[0];
338
+ if (!first) return;
339
+ const dimensions = this.dimensions >= 0 ? this.dimensions : first.vector.length;
340
+ assertPositiveDimensions(dimensions);
341
+ for (const doc of docs) {
342
+ assertDimensions(doc.vector, dimensions);
343
+ }
344
+ if (this.dimensions < 0) this.initializeDims(dimensions);
266
345
  const tx = this.db.transaction((items) => {
267
346
  for (const doc of items) this.upsert(collection, doc);
268
347
  });
@@ -294,7 +373,7 @@ var SqliteVectorFacet = class {
294
373
  return row.n;
295
374
  }
296
375
  search(collection, query, opts) {
297
- this.requireDims(query);
376
+ this.requireKnownDims(query);
298
377
  const topK = opts?.topK ?? 10;
299
378
  const minScore = opts?.minScore;
300
379
  const hasFilters = !!(opts?.where || opts?.whereNot);
@@ -346,52 +425,116 @@ var SqliteVectorFacet = class {
346
425
  return scored.slice(0, topK);
347
426
  }
348
427
  };
428
+ function searchColumnName(field, index) {
429
+ if (field === DEFAULT_SEARCH_FIELD) return DEFAULT_SEARCH_FIELD;
430
+ return `f${index}_${hashName(field)}`;
431
+ }
432
+ function searchSchema(fields) {
433
+ return { fields: [...fields], columns: fields.map(searchColumnName) };
434
+ }
435
+ function quoteSqlIdent(identifier) {
436
+ return `"${identifier.replace(/"/g, '""')}"`;
437
+ }
438
+ function searchFieldDefs(schema) {
439
+ return schema.columns.map((column) => `${quoteSqlIdent(column)} TEXT NOT NULL`).join(",\n ");
440
+ }
441
+ function rowColumnRefs(prefix, schema) {
442
+ return schema.columns.map((column) => `${prefix}.${quoteSqlIdent(column)}`).join(", ");
443
+ }
349
444
  var SqliteSearchFacet = class {
350
445
  constructor(db) {
351
446
  this.db = db;
447
+ this.db.exec(`
448
+ CREATE TABLE IF NOT EXISTS ${this.schemaTable} (
449
+ collection TEXT PRIMARY KEY,
450
+ fields_json TEXT NOT NULL
451
+ );
452
+ `);
352
453
  }
353
454
  db;
354
455
  ensured = /* @__PURE__ */ new Set();
456
+ schemaTable = "_mirk_search_schema";
355
457
  baseTable(collection) {
356
458
  return `search_docs_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
357
459
  }
358
460
  ftsTable(collection) {
359
461
  return `search_fts_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
360
462
  }
361
- ensure(collection) {
463
+ tableExists(table) {
464
+ return this.db.prepare("SELECT 1 AS ok FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?").get(table) !== void 0;
465
+ }
466
+ loadSchema(collection) {
467
+ const row = this.db.prepare(`SELECT fields_json FROM ${this.schemaTable} WHERE collection = ?`).get(collection);
468
+ if (row) {
469
+ const fields2 = JSON.parse(row.fields_json);
470
+ return searchSchema(fields2);
471
+ }
472
+ const docs = this.baseTable(collection);
473
+ if (!this.tableExists(docs)) return void 0;
474
+ const pragma = this.db.prepare(`PRAGMA table_info(${quoteSqlIdent(docs)})`).all();
475
+ if (!pragma.some((col) => col.name === DEFAULT_SEARCH_FIELD)) return void 0;
476
+ const fields = [DEFAULT_SEARCH_FIELD];
477
+ this.db.prepare(`INSERT OR IGNORE INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
478
+ return searchSchema(fields);
479
+ }
480
+ schemaForIndex(collection, normalized) {
481
+ const existing = this.loadSchema(collection);
482
+ if (existing) {
483
+ assertSameSearchFields(existing.fields, normalized.names, collection);
484
+ return existing;
485
+ }
486
+ const fields = [...normalized.names];
487
+ this.db.prepare(`INSERT INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
488
+ return searchSchema(fields);
489
+ }
490
+ ensure(collection, schema) {
362
491
  const docs = this.baseTable(collection);
363
492
  const fts = this.ftsTable(collection);
364
- if (this.ensured.has(docs)) return { docs, fts };
493
+ const key = `${docs}:${schema.fields.join("\0")}`;
494
+ if (this.ensured.has(key)) return { docs, fts };
495
+ const qDocs = quoteSqlIdent(docs);
496
+ const qFts = quoteSqlIdent(fts);
497
+ const qColumns = schema.columns.map(quoteSqlIdent).join(", ");
498
+ const newColumns = rowColumnRefs("new", schema);
499
+ const oldColumns = rowColumnRefs("old", schema);
500
+ const fieldDefs = searchFieldDefs(schema);
365
501
  this.db.exec(`
366
- CREATE TABLE IF NOT EXISTS ${docs} (
502
+ CREATE TABLE IF NOT EXISTS ${qDocs} (
367
503
  id TEXT PRIMARY KEY,
368
- text TEXT NOT NULL,
504
+ ${fieldDefs},
369
505
  meta_json TEXT
370
506
  );
371
- CREATE VIRTUAL TABLE IF NOT EXISTS ${fts} USING fts5(
372
- text, content='${docs}', content_rowid='rowid', tokenize='unicode61'
507
+ CREATE VIRTUAL TABLE IF NOT EXISTS ${qFts} USING fts5(
508
+ ${qColumns}, content='${docs}', content_rowid='rowid', tokenize='unicode61'
373
509
  );
374
- CREATE TRIGGER IF NOT EXISTS ${docs}_ai AFTER INSERT ON ${docs} BEGIN
375
- INSERT INTO ${fts}(rowid, text) VALUES (new.rowid, new.text);
510
+ CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ai`)} AFTER INSERT ON ${qDocs} BEGIN
511
+ INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
376
512
  END;
377
- CREATE TRIGGER IF NOT EXISTS ${docs}_ad AFTER DELETE ON ${docs} BEGIN
378
- INSERT INTO ${fts}(${fts}, rowid, text) VALUES('delete', old.rowid, old.text);
513
+ CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ad`)} AFTER DELETE ON ${qDocs} BEGIN
514
+ INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
379
515
  END;
380
- CREATE TRIGGER IF NOT EXISTS ${docs}_au AFTER UPDATE ON ${docs} BEGIN
381
- INSERT INTO ${fts}(${fts}, rowid, text) VALUES('delete', old.rowid, old.text);
382
- INSERT INTO ${fts}(rowid, text) VALUES (new.rowid, new.text);
516
+ CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_au`)} AFTER UPDATE ON ${qDocs} BEGIN
517
+ INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
518
+ INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
383
519
  END;
384
520
  `);
385
- this.ensured.add(docs);
521
+ this.ensured.add(key);
386
522
  return { docs, fts };
387
523
  }
388
524
  index(collection, doc) {
389
- const { docs } = this.ensure(collection);
525
+ const normalized = normalizeSearchDocument(doc);
526
+ const schema = this.schemaForIndex(collection, normalized);
527
+ const { docs } = this.ensure(collection, schema);
528
+ const qDocs = quoteSqlIdent(docs);
529
+ const qColumns = schema.columns.map(quoteSqlIdent);
530
+ const insertColumns = ["id", ...schema.columns, "meta_json"].map(quoteSqlIdent).join(", ");
531
+ const placeholders = Array.from({ length: schema.columns.length + 2 }, () => "?").join(", ");
532
+ const updateSet = [...qColumns.map((col) => `${col} = excluded.${col}`), "meta_json = excluded.meta_json"].join(", ");
390
533
  const metaJson = doc.meta === void 0 ? null : JSON.stringify(doc.meta);
391
534
  this.db.prepare(
392
- `INSERT INTO ${docs}(id, text, meta_json) VALUES (?, ?, ?)
393
- ON CONFLICT(id) DO UPDATE SET text = excluded.text, meta_json = excluded.meta_json`
394
- ).run(doc.id, doc.text, metaJson);
535
+ `INSERT INTO ${qDocs}(${insertColumns}) VALUES (${placeholders})
536
+ ON CONFLICT(id) DO UPDATE SET ${updateSet}`
537
+ ).run(doc.id, ...schema.fields.map((field) => normalized.values[field] ?? ""), metaJson);
395
538
  }
396
539
  indexMany(collection, docs) {
397
540
  const tx = this.db.transaction((items) => {
@@ -400,17 +543,24 @@ var SqliteSearchFacet = class {
400
543
  tx(docs);
401
544
  }
402
545
  remove(collection, id) {
403
- const { docs } = this.ensure(collection);
404
- return this.db.prepare(`DELETE FROM ${docs} WHERE id = ?`).run(id).changes > 0;
546
+ const schema = this.loadSchema(collection);
547
+ if (!schema) return false;
548
+ const { docs } = this.ensure(collection, schema);
549
+ return this.db.prepare(`DELETE FROM ${quoteSqlIdent(docs)} WHERE id = ?`).run(id).changes > 0;
405
550
  }
406
551
  search(collection, query, opts) {
407
- const { docs, fts } = this.ensure(collection);
408
552
  const sanitized = sanitizeFtsQuery(query);
553
+ assertValidFieldWeightValues(opts?.fieldWeights);
409
554
  if (sanitized.length === 0) return [];
555
+ const schema = this.loadSchema(collection);
556
+ if (!schema) return [];
557
+ const { docs, fts } = this.ensure(collection, schema);
558
+ const weights = fieldWeightsFor(schema.fields, opts?.fieldWeights);
559
+ const weightArgs = weights.length > 0 ? `, ${weights.map((weight) => String(weight)).join(", ")}` : "";
410
560
  const rows = this.db.prepare(
411
- `SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}) AS bm
412
- FROM ${fts}
413
- JOIN ${docs} d ON d.rowid = ${fts}.rowid
561
+ `SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}${weightArgs}) AS bm
562
+ FROM ${quoteSqlIdent(fts)}
563
+ JOIN ${quoteSqlIdent(docs)} d ON d.rowid = ${fts}.rowid
414
564
  WHERE ${fts} MATCH ?
415
565
  ORDER BY bm, d.id`
416
566
  ).all(sanitized);
@@ -1,6 +1,10 @@
1
1
  import {
2
+ assertSameSearchFields,
3
+ assertValidFieldWeightValues,
4
+ fieldWeightsFor,
5
+ normalizeSearchDocument,
2
6
  tokenize
3
- } from "./chunk-6YJ66JFO.js";
7
+ } from "./chunk-N27Y55CA.js";
4
8
  import {
5
9
  matchesWhere
6
10
  } from "./chunk-KPMRRYTL.js";
@@ -11,21 +15,33 @@ var B = 0.75;
11
15
  var InMemorySearchStore = class {
12
16
  collections = /* @__PURE__ */ new Map();
13
17
  index(collection, doc) {
14
- const coll = this.collectionFor(collection);
18
+ const normalized = normalizeSearchDocument(doc);
19
+ const coll = this.collectionFor(collection, normalized.names);
20
+ assertSameSearchFields(coll.fields, normalized.names, collection);
15
21
  this.removeFromColl(coll, doc.id);
16
- const tokens = tokenize(doc.text);
17
- const tf = /* @__PURE__ */ new Map();
18
- for (const t of tokens) tf.set(t, (tf.get(t) ?? 0) + 1);
22
+ const tfByField = /* @__PURE__ */ new Map();
23
+ const terms = /* @__PURE__ */ new Set();
24
+ let dl = 0;
25
+ for (const field of coll.fields) {
26
+ const tokens = tokenize(normalized.values[field] ?? "");
27
+ dl += tokens.length;
28
+ const tf = /* @__PURE__ */ new Map();
29
+ for (const token of tokens) {
30
+ tf.set(token, (tf.get(token) ?? 0) + 1);
31
+ terms.add(token);
32
+ }
33
+ tfByField.set(field, tf);
34
+ }
35
+ coll.totalLen += dl;
19
36
  const indexed = {
20
37
  id: doc.id,
21
38
  meta: doc.meta ?? {},
22
- tokens,
23
- tf,
24
- dl: tokens.length
39
+ tfByField,
40
+ dl,
41
+ terms
25
42
  };
26
43
  coll.docs.set(doc.id, indexed);
27
- coll.totalLen += indexed.dl;
28
- for (const term of tf.keys()) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
44
+ for (const term of terms) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
29
45
  }
30
46
  indexMany(collection, docs) {
31
47
  for (const doc of docs) this.index(collection, doc);
@@ -38,25 +54,33 @@ var InMemorySearchStore = class {
38
54
  search(collection, query, opts) {
39
55
  const coll = this.collections.get(collection);
40
56
  const qTokens = tokenize(query);
57
+ assertValidFieldWeightValues(opts?.fieldWeights);
41
58
  if (!coll || qTokens.length === 0) return [];
42
59
  const limit = opts?.limit ?? 10;
43
60
  const where = opts?.filter?.where;
61
+ const fieldWeights = fieldWeightsFor(coll.fields, opts?.fieldWeights);
44
62
  const n = coll.docs.size;
45
- const avgdl = n > 0 ? coll.totalLen / n : 0;
46
63
  const scored = [];
47
64
  for (const doc of coll.docs.values()) {
48
65
  if (where && !matchesWhere(doc.meta, where)) continue;
49
66
  let matched = false;
50
67
  let score = 0;
68
+ const avgdl = n > 0 ? coll.totalLen / n : 0;
51
69
  for (const qt of qTokens) {
52
- const tf = doc.tf.get(qt) ?? 0;
53
- if (tf === 0) continue;
54
- matched = true;
55
70
  const df = coll.df.get(qt) ?? 0;
71
+ if (df === 0) continue;
72
+ let weightedTf = 0;
73
+ for (let i = 0; i < coll.fields.length; i++) {
74
+ const field = coll.fields[i];
75
+ const tf = doc.tfByField.get(field)?.get(qt) ?? 0;
76
+ weightedTf += fieldWeights[i] * tf;
77
+ }
78
+ if (weightedTf === 0) continue;
79
+ matched = true;
56
80
  const idf = Math.log((n - df + 0.5) / (df + 0.5));
57
81
  if (idf <= 0) continue;
58
- const denom = tf + K1 * (1 - B + B * (avgdl > 0 ? doc.dl / avgdl : 0));
59
- score += idf * (tf * (K1 + 1)) / denom;
82
+ const denom = weightedTf + K1 * (1 - B + B * (avgdl > 0 ? doc.dl / avgdl : 0));
83
+ score += idf * (weightedTf * (K1 + 1)) / denom;
60
84
  }
61
85
  if (!matched) continue;
62
86
  scored.push({ id: doc.id, score, meta: doc.meta });
@@ -69,17 +93,22 @@ var InMemorySearchStore = class {
69
93
  if (!doc) return false;
70
94
  coll.docs.delete(id);
71
95
  coll.totalLen -= doc.dl;
72
- for (const term of doc.tf.keys()) {
96
+ for (const term of doc.terms) {
73
97
  const next = (coll.df.get(term) ?? 0) - 1;
74
98
  if (next <= 0) coll.df.delete(term);
75
99
  else coll.df.set(term, next);
76
100
  }
77
101
  return true;
78
102
  }
79
- collectionFor(name) {
103
+ collectionFor(name, fields) {
80
104
  let coll = this.collections.get(name);
81
105
  if (!coll) {
82
- coll = { docs: /* @__PURE__ */ new Map(), df: /* @__PURE__ */ new Map(), totalLen: 0 };
106
+ coll = {
107
+ fields: [...fields],
108
+ docs: /* @__PURE__ */ new Map(),
109
+ df: /* @__PURE__ */ new Map(),
110
+ totalLen: 0
111
+ };
83
112
  this.collections.set(name, coll);
84
113
  }
85
114
  return coll;
@@ -1,9 +1,16 @@
1
1
  // src/to-async.ts
2
+ function hasListWhereIn(store) {
3
+ return typeof store.listWhereIn === "function";
4
+ }
2
5
  var AsyncStoreAdapter = class {
3
6
  constructor(sync) {
4
7
  this.sync = sync;
8
+ if (hasListWhereIn(sync)) {
9
+ this.listWhereIn = async (collection, field, values, filter) => sync.listWhereIn(collection, field, values, filter);
10
+ }
5
11
  }
6
12
  sync;
13
+ listWhereIn;
7
14
  get meta() {
8
15
  return this.sync.meta;
9
16
  }
@@ -85,6 +92,16 @@ var InMemoryStore = class {
85
92
  items = applyFilter(items, filter);
86
93
  return items;
87
94
  }
95
+ listWhereIn(collection, field, values, filter) {
96
+ if (values.length === 0) return [];
97
+ const set = new Set(values);
98
+ const col = this.ensureCollection(collection);
99
+ const items = [...col.values()].filter((item) => {
100
+ if (typeof item !== "object" || item === null) return false;
101
+ return set.has(item[field]);
102
+ });
103
+ return applyFilter(items, filter);
104
+ }
88
105
  getById(collection, id) {
89
106
  const col = this.ensureCollection(collection);
90
107
  const item = col.get(id);
@@ -0,0 +1,67 @@
1
+ // src/search/tokenize.ts
2
+ function tokenize(text) {
3
+ if (!text) return [];
4
+ return text.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? [];
5
+ }
6
+ function sanitizeFtsQuery(q) {
7
+ const tokens = tokenize(q);
8
+ if (tokens.length === 0) return "";
9
+ return tokens.map((tok) => `"${tok.replace(/"/g, '""')}"`).join(" OR ");
10
+ }
11
+
12
+ // src/search/fields.ts
13
+ var DEFAULT_SEARCH_FIELD = "text";
14
+ function normalizeSearchDocument(doc) {
15
+ if (doc.text !== void 0 && doc.fields !== void 0) {
16
+ throw new Error("SearchDocument must provide either `text` or `fields`, not both.");
17
+ }
18
+ if (doc.text !== void 0) {
19
+ return { names: [DEFAULT_SEARCH_FIELD], values: { [DEFAULT_SEARCH_FIELD]: doc.text } };
20
+ }
21
+ if (doc.fields === void 0) {
22
+ throw new Error("SearchDocument must provide `text` or `fields`.");
23
+ }
24
+ const names = Object.keys(doc.fields).sort((a, b) => a < b ? -1 : a > b ? 1 : 0);
25
+ if (names.length === 0) throw new Error("SearchDocument.fields must contain at least one field.");
26
+ const values = {};
27
+ for (const name of names) {
28
+ const value = doc.fields[name];
29
+ if (typeof value !== "string") {
30
+ throw new Error(`SearchDocument field "${name}" must be a string.`);
31
+ }
32
+ values[name] = value;
33
+ }
34
+ return { names, values };
35
+ }
36
+ function assertSameSearchFields(existing, incoming, collection) {
37
+ if (existing.length !== incoming.length || existing.some((name, index) => name !== incoming[index])) {
38
+ throw new Error(
39
+ `Search collection "${collection}" was initialized with fields [${existing.join(", ")}], got [${incoming.join(", ")}].`
40
+ );
41
+ }
42
+ }
43
+ function assertValidFieldWeightValues(weights) {
44
+ for (const [field, weight] of Object.entries(weights ?? {})) {
45
+ if (!Number.isFinite(weight) || weight < 0) {
46
+ throw new Error(`Search field weight for "${field}" must be a non-negative finite number.`);
47
+ }
48
+ }
49
+ }
50
+ function fieldWeightsFor(fields, weights) {
51
+ assertValidFieldWeightValues(weights);
52
+ const fieldSet = new Set(fields);
53
+ for (const field of Object.keys(weights ?? {})) {
54
+ if (!fieldSet.has(field)) throw new Error(`Unknown search field weight "${field}".`);
55
+ }
56
+ return fields.map((field) => weights?.[field] ?? 1);
57
+ }
58
+
59
+ export {
60
+ tokenize,
61
+ sanitizeFtsQuery,
62
+ DEFAULT_SEARCH_FIELD,
63
+ normalizeSearchDocument,
64
+ assertSameSearchFields,
65
+ assertValidFieldWeightValues,
66
+ fieldWeightsFor
67
+ };
package/dist/graph.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { A as AsyncStore, S as StoreFilter } from './types-DyQLNtxa.js';
1
+ import { A as AsyncStore, S as StoreFilter } from './types-DHLdLZ_W.js';
2
2
 
3
3
  /**
4
4
  * A directed edge stored as a flat collection record. `from`/`to` are node ids,
@@ -67,5 +67,32 @@ declare function traverse(store: AsyncStore, collection: string, opts: {
67
67
  nodes: string[];
68
68
  edges: Edge[];
69
69
  }>;
70
+ /**
71
+ * Frontier-IN batched BFS to `depth` hops from `start`.
72
+ *
73
+ * When the store exposes the optional `listWhereIn` capability, traversal fetches
74
+ * only edges adjacent to the current BFS frontier at each depth level:
75
+ *
76
+ * - "out": `from IN (frontier)`
77
+ * - "in": `to IN (frontier)`
78
+ * - "both": both queries, deduped by edge id
79
+ *
80
+ * `edgeFilter.where` is still pushed down into the same store query; the
81
+ * structural frontier field (`from`/`to`) overrides any same-named caller filter,
82
+ * matching `neighbors()`. `edgeTypes` remains in-memory because the base port has
83
+ * exact-match filters, not `type IN (...)`.
84
+ *
85
+ * Stores without `listWhereIn` fall back to `traverse()`'s load-once strategy.
86
+ */
87
+ declare function traverseFrontierBatched(store: AsyncStore, collection: string, opts: {
88
+ start: string;
89
+ depth: number;
90
+ direction?: Direction;
91
+ edgeTypes?: string[];
92
+ edgeFilter?: StoreFilter;
93
+ }): Promise<{
94
+ nodes: string[];
95
+ edges: Edge[];
96
+ }>;
70
97
 
71
- export { type Direction, type Edge, neighbors, traverse };
98
+ export { type Direction, type Edge, neighbors, traverse, traverseFrontierBatched };
package/dist/graph.js CHANGED
@@ -17,6 +17,28 @@ function dedupById(edges) {
17
17
  function withWhere(filter, override) {
18
18
  return { ...filter, where: { ...filter?.where, ...override } };
19
19
  }
20
+ function withoutWhereField(filter, field) {
21
+ if (!filter?.where || !(field in filter.where)) return filter;
22
+ const where = { ...filter.where };
23
+ delete where[field];
24
+ return { ...filter, where };
25
+ }
26
+ function hasListWhereIn(store) {
27
+ return typeof store.listWhereIn === "function";
28
+ }
29
+ async function frontierEdges(store, collection, frontier, direction, edgeFilter) {
30
+ if (direction === "out") {
31
+ return store.listWhereIn(collection, "from", frontier, withoutWhereField(edgeFilter, "from"));
32
+ }
33
+ if (direction === "in") {
34
+ return store.listWhereIn(collection, "to", frontier, withoutWhereField(edgeFilter, "to"));
35
+ }
36
+ const [out, inc] = await Promise.all([
37
+ store.listWhereIn(collection, "from", frontier, withoutWhereField(edgeFilter, "from")),
38
+ store.listWhereIn(collection, "to", frontier, withoutWhereField(edgeFilter, "to"))
39
+ ]);
40
+ return dedupById([...out, ...inc]);
41
+ }
20
42
  async function neighbors(store, collection, opts) {
21
43
  const direction = opts.direction ?? "out";
22
44
  let edges;
@@ -91,7 +113,49 @@ async function traverse(store, collection, opts) {
91
113
  traversedEdges.sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
92
114
  return { nodes: reached, edges: traversedEdges };
93
115
  }
116
+ async function traverseFrontierBatched(store, collection, opts) {
117
+ if (!hasListWhereIn(store)) {
118
+ return traverse(store, collection, opts);
119
+ }
120
+ const direction = opts.direction ?? "out";
121
+ if (!Number.isFinite(opts.depth) || opts.depth <= 0) {
122
+ return { nodes: [], edges: [] };
123
+ }
124
+ const visited = /* @__PURE__ */ new Set([opts.start]);
125
+ const reached = [];
126
+ const traversedEdges = [];
127
+ const seenEdgeIds = /* @__PURE__ */ new Set();
128
+ let frontier = [opts.start];
129
+ for (let hop = 0; hop < opts.depth && frontier.length > 0; hop++) {
130
+ const edges = filterByTypes(
131
+ await frontierEdges(store, collection, frontier, direction, opts.edgeFilter),
132
+ opts.edgeTypes
133
+ );
134
+ const next = [];
135
+ for (const node of frontier) {
136
+ for (const edge of edges) {
137
+ const adjacent = direction === "out" ? edge.from === node : direction === "in" ? edge.to === node : edge.from === node || edge.to === node;
138
+ if (!adjacent) continue;
139
+ if (!seenEdgeIds.has(edge.id)) {
140
+ seenEdgeIds.add(edge.id);
141
+ traversedEdges.push(edge);
142
+ }
143
+ const neighbor = edge.from === node ? edge.to : edge.from;
144
+ if (!visited.has(neighbor)) {
145
+ visited.add(neighbor);
146
+ reached.push(neighbor);
147
+ next.push(neighbor);
148
+ }
149
+ }
150
+ }
151
+ frontier = next;
152
+ }
153
+ reached.sort((a, b) => a < b ? -1 : a > b ? 1 : 0);
154
+ traversedEdges.sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
155
+ return { nodes: reached, edges: traversedEdges };
156
+ }
94
157
  export {
95
158
  neighbors,
96
- traverse
159
+ traverse,
160
+ traverseFrontierBatched
97
161
  };
package/dist/index.d.ts CHANGED
@@ -1,7 +1,7 @@
1
- export { A as AsyncStore, S as StoreFilter, a as StoreMeta, b as SyncStore } from './types-DyQLNtxa.js';
1
+ export { A as AsyncStore, a as AsyncStoreInQuery, S as StoreFilter, b as StoreMeta, c as SyncStore, d as SyncStoreInQuery } from './types-DHLdLZ_W.js';
2
2
  export { InMemoryKv, toAsync } from './kv.js';
3
3
  export { A as AsyncVectorStore, V as Vector, a as VectorDocument, b as VectorSearchOptions, c as VectorSearchResult, d as VectorStore, e as VectorStoreMeta } from './types-B0XrD10b.js';
4
4
  export { InMemoryVectorStore, assertDimensions, bufferToVector, cosineSimilarity, isUsableVector, toAsyncVector, vectorToBuffer } from './vector.js';
5
5
  export { m as matchesWhere } from './filter-B9hP-TKF.js';
6
- export { S as SearchDocument, a as SearchOptions, b as SearchResult, c as SearchStore } from './types-0B0Tw1fz.js';
6
+ export { S as SearchDocument, a as SearchOptions, b as SearchResult, c as SearchStore } from './types-BLZTMsQO.js';
7
7
  export { InMemorySearchStore, sanitizeFtsQuery, tokenize } from './search.js';
package/dist/index.js CHANGED
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  InMemoryStore,
3
3
  toAsync
4
- } from "./chunk-ZI4JA6IU.js";
4
+ } from "./chunk-DKCPULXT.js";
5
5
  import {
6
6
  InMemorySearchStore
7
- } from "./chunk-TY472NYD.js";
7
+ } from "./chunk-77IIKHQW.js";
8
8
  import {
9
9
  InMemoryVectorStore,
10
10
  toAsyncVector
@@ -12,7 +12,7 @@ import {
12
12
  import {
13
13
  sanitizeFtsQuery,
14
14
  tokenize
15
- } from "./chunk-6YJ66JFO.js";
15
+ } from "./chunk-N27Y55CA.js";
16
16
  import {
17
17
  assertDimensions,
18
18
  bufferToVector,
package/dist/kv.d.ts CHANGED
@@ -1,10 +1,11 @@
1
- import { b as SyncStore, A as AsyncStore, a as StoreMeta, S as StoreFilter } from './types-DyQLNtxa.js';
1
+ import { c as SyncStore, d as SyncStoreInQuery, A as AsyncStore, a as AsyncStoreInQuery, b as StoreMeta, S as StoreFilter } from './types-DHLdLZ_W.js';
2
2
 
3
3
  /** Lift a synchronous store to the {@link AsyncStore} interface. The bridge only
4
- * goes this direction — sync ⊂ async. */
4
+ * goes this direction — sync ⊂ async. Optional sync capabilities are lifted too. */
5
+ declare function toAsync(store: SyncStore & SyncStoreInQuery): AsyncStore & AsyncStoreInQuery;
5
6
  declare function toAsync(store: SyncStore): AsyncStore;
6
7
 
7
- declare class InMemoryStore implements SyncStore {
8
+ declare class InMemoryStore implements SyncStore, SyncStoreInQuery {
8
9
  readonly meta: StoreMeta;
9
10
  /** Key-value storage. */
10
11
  private kv;
@@ -17,6 +18,7 @@ declare class InMemoryStore implements SyncStore {
17
18
  keys(prefix?: string): string[];
18
19
  private ensureCollection;
19
20
  list<T>(collection: string, filter?: StoreFilter): T[];
21
+ listWhereIn<T>(collection: string, field: string, values: readonly unknown[], filter?: StoreFilter): T[];
20
22
  getById<T>(collection: string, id: string): T | null;
21
23
  put<T extends {
22
24
  id: string;
@@ -25,4 +27,4 @@ declare class InMemoryStore implements SyncStore {
25
27
  count(collection: string, filter?: StoreFilter): number;
26
28
  }
27
29
 
28
- export { AsyncStore, InMemoryStore as InMemoryKv, StoreFilter, StoreMeta, SyncStore, toAsync };
30
+ export { AsyncStore, AsyncStoreInQuery, InMemoryStore as InMemoryKv, StoreFilter, StoreMeta, SyncStore, SyncStoreInQuery, toAsync };
package/dist/kv.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import {
2
2
  InMemoryStore,
3
3
  toAsync
4
- } from "./chunk-ZI4JA6IU.js";
4
+ } from "./chunk-DKCPULXT.js";
5
5
  export {
6
6
  InMemoryStore as InMemoryKv,
7
7
  toAsync
package/dist/search.d.ts CHANGED
@@ -1,6 +1,7 @@
1
- import { c as SearchStore, S as SearchDocument, a as SearchOptions, b as SearchResult } from './types-0B0Tw1fz.js';
1
+ import { c as SearchStore, S as SearchDocument, a as SearchOptions, b as SearchResult } from './types-BLZTMsQO.js';
2
+ export { d as SearchFieldDocument, e as SearchTextDocument } from './types-BLZTMsQO.js';
2
3
  export { m as matchesWhere } from './filter-B9hP-TKF.js';
3
- import './types-DyQLNtxa.js';
4
+ import './types-DHLdLZ_W.js';
4
5
 
5
6
  declare class InMemorySearchStore implements SearchStore {
6
7
  private readonly collections;
package/dist/search.js CHANGED
@@ -1,10 +1,10 @@
1
1
  import {
2
2
  InMemorySearchStore
3
- } from "./chunk-TY472NYD.js";
3
+ } from "./chunk-77IIKHQW.js";
4
4
  import {
5
5
  sanitizeFtsQuery,
6
6
  tokenize
7
- } from "./chunk-6YJ66JFO.js";
7
+ } from "./chunk-N27Y55CA.js";
8
8
  import {
9
9
  matchesWhere
10
10
  } from "./chunk-KPMRRYTL.js";
package/dist/sql.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- import { S as StoreFilter } from './types-DyQLNtxa.js';
1
+ import { S as StoreFilter } from './types-DHLdLZ_W.js';
2
2
 
3
3
  /** A bound SQL parameter — the common subset both better-sqlite3 and @libsql/client
4
4
  * accept. Booleans are pre-converted to 0/1 by the builders (better-sqlite3 rejects
@@ -1,23 +1,42 @@
1
- import { S as StoreFilter } from './types-DyQLNtxa.js';
1
+ import { S as StoreFilter } from './types-DHLdLZ_W.js';
2
2
 
3
3
  /** A document to be full-text indexed. `meta` is an exact-match-filterable
4
4
  * payload persisted as JSON by disk-backed backends, so it must be JSON-
5
5
  * serializable; values JSON can't represent (`undefined`, functions) are dropped
6
6
  * on persistence — don't rely on them. */
7
- interface SearchDocument<M = Record<string, unknown>> {
7
+ interface SearchTextDocument<M = Record<string, unknown>> {
8
8
  /** Unique id within the collection. */
9
9
  id: string;
10
- /** The text to tokenize and index. */
10
+ /** Single-field text to tokenize and index. Back-compat shorthand for
11
+ * `fields: { text }`. */
11
12
  text: string;
13
+ /** Provide either `text` or `fields`, not both. */
14
+ fields?: never;
12
15
  /** Typed context stored alongside the text. */
13
16
  meta?: M;
14
17
  }
18
+ interface SearchFieldDocument<M = Record<string, unknown>> {
19
+ /** Unique id within the collection. */
20
+ id: string;
21
+ /** Named text fields to tokenize and index with optional query-time weights
22
+ * (for example `{ title, body }`). All documents in a collection must use the
23
+ * same field names. */
24
+ fields: Record<string, string>;
25
+ /** Provide either `text` or `fields`, not both. */
26
+ text?: never;
27
+ /** Typed context stored alongside the text. */
28
+ meta?: M;
29
+ }
30
+ type SearchDocument<M = Record<string, unknown>> = SearchTextDocument<M> | SearchFieldDocument<M>;
15
31
  interface SearchOptions {
16
32
  /** Maximum results to return. Default: 10. */
17
33
  limit?: number;
18
34
  /** Exact-match filter on document `meta` (uses `filter.where`, evaluated with
19
35
  * the shared `matchesWhere`). Applied before limit, after the FTS MATCH. */
20
36
  filter?: StoreFilter;
37
+ /** Per-field bm25 weights. Fields not listed default to 1. Higher means more
38
+ * important; e.g. `{ title: 4, body: 1 }` boosts title matches. */
39
+ fieldWeights?: Record<string, number>;
21
40
  }
22
41
  /** A ranked search hit. `score` is the bm25 relevance — higher is more relevant. */
23
42
  interface SearchResult<M = Record<string, unknown>> {
@@ -30,10 +49,11 @@ interface SearchResult<M = Record<string, unknown>> {
30
49
  }
31
50
  /** A synchronous full-text search store with per-collection documents.
32
51
  *
33
- * Ranking is bm25 with the FTS5 default parameters (k1 = 1.2, b = 0.75). The
34
- * in-memory reference and the sqlite `.search` facet produce the same RANKING
35
- * on clear relevance differences; exact float scores need not match across
36
- * backends (the same parity caveat as /vector). */
52
+ * Ranking is bm25 with the FTS5 default parameters (k1 = 1.2, b = 0.75), plus
53
+ * optional query-time per-field weights. The in-memory reference and the sqlite
54
+ * `.search` facet produce the same RANKING on clear relevance differences;
55
+ * exact float scores need not match across backends (the same parity caveat as
56
+ * /vector). */
37
57
  interface SearchStore {
38
58
  /** Insert or replace a document by (collection, id). */
39
59
  index<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, doc: SearchDocument<M>): void;
@@ -46,4 +66,4 @@ interface SearchStore {
46
66
  search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: string, opts?: SearchOptions): SearchResult<M>[];
47
67
  }
48
68
 
49
- export type { SearchDocument as S, SearchOptions as a, SearchResult as b, SearchStore as c };
69
+ export type { SearchDocument as S, SearchOptions as a, SearchResult as b, SearchStore as c, SearchFieldDocument as d, SearchTextDocument as e };
@@ -20,6 +20,17 @@ interface StoreFilter {
20
20
  /** Number of results to skip. */
21
21
  offset?: number;
22
22
  }
23
+ /** Optional collection capability for adapters that can push `field IN (...)`
24
+ * down to the backing store while preserving the normal StoreFilter semantics.
25
+ * Graph frontier traversal uses this when present and falls back to plain
26
+ * `list()` when absent. */
27
+ interface SyncStoreInQuery {
28
+ listWhereIn<T>(collection: string, field: string, values: readonly unknown[], filter?: StoreFilter): T[];
29
+ }
30
+ /** Async twin of SyncStoreInQuery for remote-capable stores. */
31
+ interface AsyncStoreInQuery {
32
+ listWhereIn<T>(collection: string, field: string, values: readonly unknown[], filter?: StoreFilter): Promise<T[]>;
33
+ }
23
34
  /**
24
35
  * A typed key-value + collection store. **Synchronous.**
25
36
  *
@@ -80,4 +91,4 @@ interface AsyncStore {
80
91
  count(collection: string, filter?: StoreFilter): Promise<number>;
81
92
  }
82
93
 
83
- export type { AsyncStore as A, StoreFilter as S, StoreMeta as a, SyncStore as b };
94
+ export type { AsyncStore as A, StoreFilter as S, AsyncStoreInQuery as a, StoreMeta as b, SyncStore as c, SyncStoreInQuery as d };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mirk/store",
3
- "version": "0.6.0",
3
+ "version": "0.6.1",
4
4
  "type": "module",
5
5
  "description": "Code-split storage ports + source adapters under one namespace. KV + collection store and vector similarity store as interface subpaths; the sqlite source-adapter implements both over one connection.",
6
6
  "license": "Apache-2.0",
@@ -59,12 +59,6 @@
59
59
  "files": [
60
60
  "dist"
61
61
  ],
62
- "scripts": {
63
- "test": "vitest run",
64
- "test:watch": "vitest",
65
- "typecheck": "tsc --noEmit",
66
- "build": "rm -rf dist && tsup src/index.ts src/kv.ts src/vector.ts src/search.ts src/graph.ts src/sql.ts src/adapters/sqlite.ts --format esm --dts --external better-sqlite3 --external sqlite-vec"
67
- },
68
62
  "peerDependencies": {
69
63
  "better-sqlite3": "^11.0.0 || ^12.0.0",
70
64
  "sqlite-vec": "^0.1.0"
@@ -88,5 +82,11 @@
88
82
  "publishConfig": {
89
83
  "registry": "https://registry.npmjs.org",
90
84
  "access": "public"
85
+ },
86
+ "scripts": {
87
+ "test": "vitest run",
88
+ "test:watch": "vitest",
89
+ "typecheck": "tsc --noEmit",
90
+ "build": "rm -rf dist && tsup src/index.ts src/kv.ts src/vector.ts src/search.ts src/graph.ts src/sql.ts src/adapters/sqlite.ts --format esm --dts --external better-sqlite3 --external sqlite-vec"
91
91
  }
92
- }
92
+ }
@@ -1,15 +0,0 @@
1
- // src/search/tokenize.ts
2
- function tokenize(text) {
3
- if (!text) return [];
4
- return text.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? [];
5
- }
6
- function sanitizeFtsQuery(q) {
7
- const tokens = tokenize(q);
8
- if (tokens.length === 0) return "";
9
- return tokens.map((tok) => `"${tok.replace(/"/g, '""')}"`).join(" OR ");
10
- }
11
-
12
- export {
13
- tokenize,
14
- sanitizeFtsQuery
15
- };