@mirk/store 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +42 -8
- package/dist/adapters/sqlite.d.ts +4 -3
- package/dist/adapters/sqlite.js +194 -44
- package/dist/{chunk-TY472NYD.js → chunk-77IIKHQW.js} +48 -19
- package/dist/{chunk-ZI4JA6IU.js → chunk-DKCPULXT.js} +17 -0
- package/dist/chunk-N27Y55CA.js +67 -0
- package/dist/graph.d.ts +29 -2
- package/dist/graph.js +65 -1
- package/dist/index.d.ts +2 -2
- package/dist/index.js +3 -3
- package/dist/kv.d.ts +6 -4
- package/dist/kv.js +1 -1
- package/dist/search.d.ts +3 -2
- package/dist/search.js +2 -2
- package/dist/sql.d.ts +1 -1
- package/dist/{types-0B0Tw1fz.d.ts → types-BLZTMsQO.d.ts} +28 -8
- package/dist/{types-DyQLNtxa.d.ts → types-DHLdLZ_W.d.ts} +12 -1
- package/package.json +8 -8
- package/dist/chunk-6YJ66JFO.js +0 -15
package/README.md
CHANGED
|
@@ -24,11 +24,13 @@ npm install sqlite-vec
|
|
|
24
24
|
| `@mirk/store` | the ports + their in-memory references + `toAsync` + cosine helpers | none |
|
|
25
25
|
| `@mirk/store/kv` | `SyncStore` port (key-value + collections), `InMemoryKv`, `toAsync` | none |
|
|
26
26
|
| `@mirk/store/vector` | `VectorStore` port, `InMemoryVectorStore`, cosine helpers | none |
|
|
27
|
-
| `@mirk/store/
|
|
27
|
+
| `@mirk/store/search` | `SearchStore` port, `InMemorySearchStore`, BM25-style keyword search | none |
|
|
28
|
+
| `@mirk/store/graph` | graph helpers over the collection port (`neighbors`, `traverse`, `traverseFrontierBatched`) | none |
|
|
29
|
+
| `@mirk/store/sqlite` | the SQLite **source adapter** — one connection, `.kv` + `.vector` + `.search` facets | `better-sqlite3` (peer), `sqlite-vec` (optional peer) |
|
|
28
30
|
|
|
29
31
|
Source adapters are reached **only** through their own subpath (e.g. `/sqlite`) — the root and the
|
|
30
|
-
port subpaths never re-export them, so importing `@mirk/store`, `/kv`,
|
|
31
|
-
native binding into a consumer bundle.
|
|
32
|
+
port subpaths never re-export them, so importing `@mirk/store`, `/kv`, `/vector`, `/search`, or
|
|
33
|
+
`/graph` never drags a native binding into a consumer bundle.
|
|
32
34
|
|
|
33
35
|
## Quickstart — zero native deps
|
|
34
36
|
|
|
@@ -60,19 +62,51 @@ kv.count("posts"); // 1
|
|
|
60
62
|
kv.remove("posts", "p1");
|
|
61
63
|
```
|
|
62
64
|
|
|
65
|
+
## Full-text search
|
|
66
|
+
|
|
67
|
+
`SearchStore` indexes documents by id and returns BM25-ranked keyword matches. Use `text` for the
|
|
68
|
+
single-field shorthand or `fields` for named columns with query-time weighting:
|
|
69
|
+
|
|
70
|
+
```ts
|
|
71
|
+
import { InMemorySearchStore } from "@mirk/store/search";
|
|
72
|
+
|
|
73
|
+
const search = new InMemorySearchStore();
|
|
74
|
+
search.index("pages", { id: "a", fields: { title: "Opal guide", body: "plain body" } });
|
|
75
|
+
search.index("pages", { id: "b", fields: { title: "plain title", body: "Opal guide" } });
|
|
76
|
+
search.search("pages", "opal", { fieldWeights: { title: 4, body: 1 } }); // [a, b]
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
The first indexed document fixes a collection's field schema; later documents must use the same
|
|
80
|
+
field names. `text` and `fields: { text }` are the same single-field schema for backwards
|
|
81
|
+
compatibility.
|
|
82
|
+
|
|
83
|
+
## Graph helpers
|
|
84
|
+
|
|
85
|
+
`@mirk/store/graph` stores edges as ordinary collection records and traverses them through the
|
|
86
|
+
existing collection port. Policy stays caller-owned through `StoreFilter`.
|
|
87
|
+
|
|
88
|
+
```ts
|
|
89
|
+
import { traverse } from "@mirk/store/graph";
|
|
90
|
+
|
|
91
|
+
const hits = traverse(kv, { start: "node:a", depth: 2, direction: "out" });
|
|
92
|
+
```
|
|
93
|
+
|
|
63
94
|
## SQLite adapter — one connection, many capabilities
|
|
64
95
|
|
|
65
|
-
`SqliteAdapter` opens a single `better-sqlite3` database and exposes
|
|
66
|
-
and
|
|
96
|
+
`SqliteAdapter` opens a single `better-sqlite3` database and exposes `.kv` (`SyncStore`), `.vector`
|
|
97
|
+
(`VectorStore`), and `.search` (`SearchStore`) facets over it:
|
|
67
98
|
|
|
68
99
|
```ts
|
|
69
100
|
import { SqliteAdapter } from "@mirk/store/sqlite";
|
|
70
101
|
|
|
71
|
-
//
|
|
72
|
-
const db = new SqliteAdapter({ path: "data.db"
|
|
102
|
+
// .kv and .search work immediately; vector dimensions infer on first write.
|
|
103
|
+
const db = new SqliteAdapter({ path: "data.db" });
|
|
73
104
|
|
|
74
105
|
db.kv.set("user:1", { name: "Ada" });
|
|
75
106
|
|
|
107
|
+
db.search.index("pages", { id: "intro", fields: { title: "Intro", body: "hello world" } });
|
|
108
|
+
db.search.search("pages", "hello", { fieldWeights: { title: 4, body: 1 } });
|
|
109
|
+
|
|
76
110
|
const embedding = new Float32Array(768); // your real embedding here
|
|
77
111
|
const query = new Float32Array(768);
|
|
78
112
|
db.vector.upsert("docs", { id: "a", vector: embedding });
|
|
@@ -87,7 +121,7 @@ db.close();
|
|
|
87
121
|
|---|---|---|
|
|
88
122
|
| `path` | `string` | DB file path, or `":memory:"`. |
|
|
89
123
|
| `db` | `Database` | Reuse an existing `better-sqlite3` connection instead of opening one. |
|
|
90
|
-
| `dimensions` | `number` |
|
|
124
|
+
| `dimensions` | `number` | Optional embedding dimensionality. If omitted, inferred and persisted from the first vector `upsert` / `upsertMany`; `search` still requires known dimensions. |
|
|
91
125
|
| `forceJsCosine` | `boolean` | Pin the exact JS-cosine path even when `sqlite-vec` is installed (mainly for tests). |
|
|
92
126
|
|
|
93
127
|
Vectors (`Vector` is a `Float32Array`) are stored as little-endian float32 BLOBs and ranked by
|
|
@@ -1,14 +1,15 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
|
-
import {
|
|
2
|
+
import { c as SyncStore } from '../types-DHLdLZ_W.js';
|
|
3
3
|
import { d as VectorStore } from '../types-B0XrD10b.js';
|
|
4
|
-
import { c as SearchStore } from '../types-
|
|
4
|
+
import { c as SearchStore } from '../types-BLZTMsQO.js';
|
|
5
5
|
|
|
6
6
|
interface SqliteAdapterOptions {
|
|
7
7
|
/** Path to the SQLite database file. Use ":memory:" for in-memory. */
|
|
8
8
|
path: string;
|
|
9
9
|
/** Existing better-sqlite3 instance to reuse (shares one connection). */
|
|
10
10
|
db?: Database.Database;
|
|
11
|
-
/** Embedding dimensions.
|
|
11
|
+
/** Embedding dimensions. Optional: when omitted, `.vector` persists the
|
|
12
|
+
* dimensions from the first upsert/upsertMany call. KV/search work without it. */
|
|
12
13
|
dimensions?: number;
|
|
13
14
|
/** Force the exact JS-cosine search path even when sqlite-vec is installed.
|
|
14
15
|
* Mainly for parity testing; production should leave this off. */
|
package/dist/adapters/sqlite.js
CHANGED
|
@@ -1,11 +1,17 @@
|
|
|
1
1
|
import {
|
|
2
|
+
DEFAULT_SEARCH_FIELD,
|
|
3
|
+
assertSameSearchFields,
|
|
4
|
+
assertValidFieldWeightValues,
|
|
5
|
+
fieldWeightsFor,
|
|
6
|
+
normalizeSearchDocument,
|
|
2
7
|
sanitizeFtsQuery
|
|
3
|
-
} from "../chunk-
|
|
8
|
+
} from "../chunk-N27Y55CA.js";
|
|
4
9
|
import {
|
|
5
10
|
buildLimitOffset,
|
|
6
11
|
buildOrderBy,
|
|
7
12
|
buildWhereClause,
|
|
8
|
-
hashName
|
|
13
|
+
hashName,
|
|
14
|
+
jsonPath
|
|
9
15
|
} from "../chunk-DP4D7CJY.js";
|
|
10
16
|
import {
|
|
11
17
|
assertDimensions,
|
|
@@ -38,6 +44,38 @@ function tryLoadSqliteVec(db) {
|
|
|
38
44
|
return false;
|
|
39
45
|
}
|
|
40
46
|
}
|
|
47
|
+
function assertPositiveDimensions(dimensions) {
|
|
48
|
+
if (!Number.isInteger(dimensions) || dimensions <= 0) {
|
|
49
|
+
throw new Error(`Vector dimensions must be a positive integer; got ${dimensions}.`);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
function sqlParam(value) {
|
|
53
|
+
if (value === null) return null;
|
|
54
|
+
if (typeof value === "boolean") return value ? 1 : 0;
|
|
55
|
+
if (typeof value === "string" || typeof value === "number" || typeof value === "bigint") {
|
|
56
|
+
return value;
|
|
57
|
+
}
|
|
58
|
+
throw new Error("Store IN queries only support JSON scalar values.");
|
|
59
|
+
}
|
|
60
|
+
function buildJsonInWhere(field, values, hasPriorWhere) {
|
|
61
|
+
const path = jsonPath(field);
|
|
62
|
+
const params = [];
|
|
63
|
+
const nonNull = values.filter((value) => value !== null).map(sqlParam);
|
|
64
|
+
const hasNull = values.some((value) => value === null);
|
|
65
|
+
const parts = [];
|
|
66
|
+
if (nonNull.length > 0) {
|
|
67
|
+
parts.push(`json_extract(data, ?) IN (${nonNull.map(() => "?").join(", ")})`);
|
|
68
|
+
params.push(path, ...nonNull);
|
|
69
|
+
}
|
|
70
|
+
if (hasNull) {
|
|
71
|
+
parts.push(`json_type(data, ?) = 'null'`);
|
|
72
|
+
params.push(path);
|
|
73
|
+
}
|
|
74
|
+
return {
|
|
75
|
+
clause: `${hasPriorWhere ? " AND" : " WHERE"} (${parts.join(" OR ")})`,
|
|
76
|
+
params
|
|
77
|
+
};
|
|
78
|
+
}
|
|
41
79
|
var SqliteAdapter = class {
|
|
42
80
|
db;
|
|
43
81
|
kv;
|
|
@@ -136,6 +174,17 @@ var SqliteKvFacet = class {
|
|
|
136
174
|
const rows = this.db.prepare(sql).all(...where.params, ...orderBy.params);
|
|
137
175
|
return rows.map((r) => JSON.parse(r.data));
|
|
138
176
|
}
|
|
177
|
+
listWhereIn(collection, field, values, filter) {
|
|
178
|
+
if (values.length === 0) return [];
|
|
179
|
+
const table = this.ensureTable(collection);
|
|
180
|
+
const where = buildWhereClause(filter);
|
|
181
|
+
const inWhere = buildJsonInWhere(field, values, where.clause.length > 0);
|
|
182
|
+
const orderBy = buildOrderBy(filter);
|
|
183
|
+
const limitOffset = buildLimitOffset(filter);
|
|
184
|
+
const sql = `SELECT data FROM ${table}${where.clause}${inWhere.clause}${orderBy.clause}${limitOffset}`;
|
|
185
|
+
const rows = this.db.prepare(sql).all(...where.params, ...inWhere.params, ...orderBy.params);
|
|
186
|
+
return rows.map((r) => JSON.parse(r.data));
|
|
187
|
+
}
|
|
139
188
|
getById(collection, id) {
|
|
140
189
|
const table = this.ensureTable(collection);
|
|
141
190
|
const row = this.db.prepare(`SELECT data FROM ${table} WHERE id = ?`).get(id);
|
|
@@ -162,8 +211,10 @@ var SqliteKvFacet = class {
|
|
|
162
211
|
}
|
|
163
212
|
};
|
|
164
213
|
var SqliteVectorFacet = class {
|
|
165
|
-
constructor(db, path, dimensions, forceJsCosine) {
|
|
214
|
+
constructor(db, path, dimensions, forceJsCosine = false) {
|
|
166
215
|
this.db = db;
|
|
216
|
+
this.path = path;
|
|
217
|
+
this.forceJsCosine = forceJsCosine;
|
|
167
218
|
this.db.exec(
|
|
168
219
|
`CREATE TABLE IF NOT EXISTS vectors (
|
|
169
220
|
collection TEXT NOT NULL,
|
|
@@ -183,31 +234,51 @@ var SqliteVectorFacet = class {
|
|
|
183
234
|
`Vector store at ${path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
|
|
184
235
|
);
|
|
185
236
|
}
|
|
237
|
+
this.refreshVectorMeta();
|
|
186
238
|
} else if (dimensions !== void 0) {
|
|
187
|
-
this.dimensions
|
|
188
|
-
this.db.prepare(`INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)`).run(String(dimensions));
|
|
189
|
-
} else {
|
|
190
|
-
this.dimensions = -1;
|
|
239
|
+
this.initializeDims(dimensions);
|
|
191
240
|
}
|
|
192
|
-
this.accelerated = !forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
|
|
193
|
-
this.meta = {
|
|
194
|
-
backend: "sqlite",
|
|
195
|
-
dimensions: Math.max(this.dimensions, 0),
|
|
196
|
-
accelerated: this.accelerated
|
|
197
|
-
};
|
|
198
241
|
}
|
|
199
242
|
db;
|
|
200
|
-
|
|
201
|
-
|
|
243
|
+
path;
|
|
244
|
+
forceJsCosine;
|
|
245
|
+
meta = { backend: "sqlite", dimensions: 0, accelerated: false };
|
|
246
|
+
dimensions = -1;
|
|
202
247
|
/** True when sqlite-vec loaded and the vec0 acceleration path is live. */
|
|
203
|
-
accelerated;
|
|
248
|
+
accelerated = false;
|
|
204
249
|
vecTablesEnsured = /* @__PURE__ */ new Set();
|
|
205
|
-
|
|
250
|
+
initializeDims(dimensions) {
|
|
251
|
+
assertPositiveDimensions(dimensions);
|
|
252
|
+
if (this.dimensions >= 0) {
|
|
253
|
+
if (dimensions !== this.dimensions) {
|
|
254
|
+
throw new Error(
|
|
255
|
+
`Vector store at ${this.path} was created with ${this.dimensions} dimensions, opened with ${dimensions}.`
|
|
256
|
+
);
|
|
257
|
+
}
|
|
258
|
+
return;
|
|
259
|
+
}
|
|
260
|
+
this.dimensions = dimensions;
|
|
261
|
+
this.db.prepare(
|
|
262
|
+
`INSERT INTO _vec_meta (key, value) VALUES ('dimensions', ?)
|
|
263
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value`
|
|
264
|
+
).run(String(dimensions));
|
|
265
|
+
this.refreshVectorMeta();
|
|
266
|
+
}
|
|
267
|
+
refreshVectorMeta() {
|
|
268
|
+
this.accelerated = !this.forceJsCosine && this.dimensions >= 0 && tryLoadSqliteVec(this.db);
|
|
269
|
+
this.meta.dimensions = Math.max(this.dimensions, 0);
|
|
270
|
+
this.meta.accelerated = this.accelerated;
|
|
271
|
+
}
|
|
272
|
+
requireKnownDims(v) {
|
|
206
273
|
if (this.dimensions < 0) {
|
|
207
|
-
throw new Error("SqliteAdapter.vector
|
|
274
|
+
throw new Error("SqliteAdapter.vector has no dimensions yet \u2014 pass { dimensions } when opening or upsert a vector first.");
|
|
208
275
|
}
|
|
209
276
|
assertDimensions(v, this.dimensions);
|
|
210
277
|
}
|
|
278
|
+
ensureDimsForWrite(v) {
|
|
279
|
+
if (this.dimensions < 0) this.initializeDims(v.length);
|
|
280
|
+
assertDimensions(v, this.dimensions);
|
|
281
|
+
}
|
|
211
282
|
// ── vec0 acceleration helpers ───────────────────────────────────────────
|
|
212
283
|
vecTableName(collection) {
|
|
213
284
|
return `vectors_vec_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
@@ -247,7 +318,7 @@ var SqliteVectorFacet = class {
|
|
|
247
318
|
}
|
|
248
319
|
}
|
|
249
320
|
upsert(collection, doc) {
|
|
250
|
-
this.
|
|
321
|
+
this.ensureDimsForWrite(doc.vector);
|
|
251
322
|
const write = this.db.transaction(() => {
|
|
252
323
|
this.db.prepare(
|
|
253
324
|
`INSERT INTO vectors(collection, id, vec, metadata) VALUES (?, ?, ?, ?)
|
|
@@ -263,6 +334,14 @@ var SqliteVectorFacet = class {
|
|
|
263
334
|
write();
|
|
264
335
|
}
|
|
265
336
|
upsertMany(collection, docs) {
|
|
337
|
+
const first = docs[0];
|
|
338
|
+
if (!first) return;
|
|
339
|
+
const dimensions = this.dimensions >= 0 ? this.dimensions : first.vector.length;
|
|
340
|
+
assertPositiveDimensions(dimensions);
|
|
341
|
+
for (const doc of docs) {
|
|
342
|
+
assertDimensions(doc.vector, dimensions);
|
|
343
|
+
}
|
|
344
|
+
if (this.dimensions < 0) this.initializeDims(dimensions);
|
|
266
345
|
const tx = this.db.transaction((items) => {
|
|
267
346
|
for (const doc of items) this.upsert(collection, doc);
|
|
268
347
|
});
|
|
@@ -294,7 +373,7 @@ var SqliteVectorFacet = class {
|
|
|
294
373
|
return row.n;
|
|
295
374
|
}
|
|
296
375
|
search(collection, query, opts) {
|
|
297
|
-
this.
|
|
376
|
+
this.requireKnownDims(query);
|
|
298
377
|
const topK = opts?.topK ?? 10;
|
|
299
378
|
const minScore = opts?.minScore;
|
|
300
379
|
const hasFilters = !!(opts?.where || opts?.whereNot);
|
|
@@ -346,52 +425,116 @@ var SqliteVectorFacet = class {
|
|
|
346
425
|
return scored.slice(0, topK);
|
|
347
426
|
}
|
|
348
427
|
};
|
|
428
|
+
function searchColumnName(field, index) {
|
|
429
|
+
if (field === DEFAULT_SEARCH_FIELD) return DEFAULT_SEARCH_FIELD;
|
|
430
|
+
return `f${index}_${hashName(field)}`;
|
|
431
|
+
}
|
|
432
|
+
function searchSchema(fields) {
|
|
433
|
+
return { fields: [...fields], columns: fields.map(searchColumnName) };
|
|
434
|
+
}
|
|
435
|
+
function quoteSqlIdent(identifier) {
|
|
436
|
+
return `"${identifier.replace(/"/g, '""')}"`;
|
|
437
|
+
}
|
|
438
|
+
function searchFieldDefs(schema) {
|
|
439
|
+
return schema.columns.map((column) => `${quoteSqlIdent(column)} TEXT NOT NULL`).join(",\n ");
|
|
440
|
+
}
|
|
441
|
+
function rowColumnRefs(prefix, schema) {
|
|
442
|
+
return schema.columns.map((column) => `${prefix}.${quoteSqlIdent(column)}`).join(", ");
|
|
443
|
+
}
|
|
349
444
|
var SqliteSearchFacet = class {
|
|
350
445
|
constructor(db) {
|
|
351
446
|
this.db = db;
|
|
447
|
+
this.db.exec(`
|
|
448
|
+
CREATE TABLE IF NOT EXISTS ${this.schemaTable} (
|
|
449
|
+
collection TEXT PRIMARY KEY,
|
|
450
|
+
fields_json TEXT NOT NULL
|
|
451
|
+
);
|
|
452
|
+
`);
|
|
352
453
|
}
|
|
353
454
|
db;
|
|
354
455
|
ensured = /* @__PURE__ */ new Set();
|
|
456
|
+
schemaTable = "_mirk_search_schema";
|
|
355
457
|
baseTable(collection) {
|
|
356
458
|
return `search_docs_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
357
459
|
}
|
|
358
460
|
ftsTable(collection) {
|
|
359
461
|
return `search_fts_${collection.replace(/[^a-zA-Z0-9_]/g, "_")}_${hashName(collection)}`;
|
|
360
462
|
}
|
|
361
|
-
|
|
463
|
+
tableExists(table) {
|
|
464
|
+
return this.db.prepare("SELECT 1 AS ok FROM sqlite_master WHERE type IN ('table', 'view') AND name = ?").get(table) !== void 0;
|
|
465
|
+
}
|
|
466
|
+
loadSchema(collection) {
|
|
467
|
+
const row = this.db.prepare(`SELECT fields_json FROM ${this.schemaTable} WHERE collection = ?`).get(collection);
|
|
468
|
+
if (row) {
|
|
469
|
+
const fields2 = JSON.parse(row.fields_json);
|
|
470
|
+
return searchSchema(fields2);
|
|
471
|
+
}
|
|
472
|
+
const docs = this.baseTable(collection);
|
|
473
|
+
if (!this.tableExists(docs)) return void 0;
|
|
474
|
+
const pragma = this.db.prepare(`PRAGMA table_info(${quoteSqlIdent(docs)})`).all();
|
|
475
|
+
if (!pragma.some((col) => col.name === DEFAULT_SEARCH_FIELD)) return void 0;
|
|
476
|
+
const fields = [DEFAULT_SEARCH_FIELD];
|
|
477
|
+
this.db.prepare(`INSERT OR IGNORE INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
|
|
478
|
+
return searchSchema(fields);
|
|
479
|
+
}
|
|
480
|
+
schemaForIndex(collection, normalized) {
|
|
481
|
+
const existing = this.loadSchema(collection);
|
|
482
|
+
if (existing) {
|
|
483
|
+
assertSameSearchFields(existing.fields, normalized.names, collection);
|
|
484
|
+
return existing;
|
|
485
|
+
}
|
|
486
|
+
const fields = [...normalized.names];
|
|
487
|
+
this.db.prepare(`INSERT INTO ${this.schemaTable}(collection, fields_json) VALUES (?, ?)`).run(collection, JSON.stringify(fields));
|
|
488
|
+
return searchSchema(fields);
|
|
489
|
+
}
|
|
490
|
+
ensure(collection, schema) {
|
|
362
491
|
const docs = this.baseTable(collection);
|
|
363
492
|
const fts = this.ftsTable(collection);
|
|
364
|
-
|
|
493
|
+
const key = `${docs}:${schema.fields.join("\0")}`;
|
|
494
|
+
if (this.ensured.has(key)) return { docs, fts };
|
|
495
|
+
const qDocs = quoteSqlIdent(docs);
|
|
496
|
+
const qFts = quoteSqlIdent(fts);
|
|
497
|
+
const qColumns = schema.columns.map(quoteSqlIdent).join(", ");
|
|
498
|
+
const newColumns = rowColumnRefs("new", schema);
|
|
499
|
+
const oldColumns = rowColumnRefs("old", schema);
|
|
500
|
+
const fieldDefs = searchFieldDefs(schema);
|
|
365
501
|
this.db.exec(`
|
|
366
|
-
CREATE TABLE IF NOT EXISTS ${
|
|
502
|
+
CREATE TABLE IF NOT EXISTS ${qDocs} (
|
|
367
503
|
id TEXT PRIMARY KEY,
|
|
368
|
-
|
|
504
|
+
${fieldDefs},
|
|
369
505
|
meta_json TEXT
|
|
370
506
|
);
|
|
371
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS ${
|
|
372
|
-
|
|
507
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS ${qFts} USING fts5(
|
|
508
|
+
${qColumns}, content='${docs}', content_rowid='rowid', tokenize='unicode61'
|
|
373
509
|
);
|
|
374
|
-
CREATE TRIGGER IF NOT EXISTS ${docs}_ai AFTER INSERT ON ${
|
|
375
|
-
INSERT INTO ${
|
|
510
|
+
CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ai`)} AFTER INSERT ON ${qDocs} BEGIN
|
|
511
|
+
INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
|
|
376
512
|
END;
|
|
377
|
-
CREATE TRIGGER IF NOT EXISTS ${docs}_ad AFTER DELETE ON ${
|
|
378
|
-
INSERT INTO ${
|
|
513
|
+
CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_ad`)} AFTER DELETE ON ${qDocs} BEGIN
|
|
514
|
+
INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
|
|
379
515
|
END;
|
|
380
|
-
CREATE TRIGGER IF NOT EXISTS ${docs}_au AFTER UPDATE ON ${
|
|
381
|
-
INSERT INTO ${
|
|
382
|
-
INSERT INTO ${
|
|
516
|
+
CREATE TRIGGER IF NOT EXISTS ${quoteSqlIdent(`${docs}_au`)} AFTER UPDATE ON ${qDocs} BEGIN
|
|
517
|
+
INSERT INTO ${qFts}(${quoteSqlIdent(fts)}, rowid, ${qColumns}) VALUES('delete', old.rowid, ${oldColumns});
|
|
518
|
+
INSERT INTO ${qFts}(rowid, ${qColumns}) VALUES (new.rowid, ${newColumns});
|
|
383
519
|
END;
|
|
384
520
|
`);
|
|
385
|
-
this.ensured.add(
|
|
521
|
+
this.ensured.add(key);
|
|
386
522
|
return { docs, fts };
|
|
387
523
|
}
|
|
388
524
|
index(collection, doc) {
|
|
389
|
-
const
|
|
525
|
+
const normalized = normalizeSearchDocument(doc);
|
|
526
|
+
const schema = this.schemaForIndex(collection, normalized);
|
|
527
|
+
const { docs } = this.ensure(collection, schema);
|
|
528
|
+
const qDocs = quoteSqlIdent(docs);
|
|
529
|
+
const qColumns = schema.columns.map(quoteSqlIdent);
|
|
530
|
+
const insertColumns = ["id", ...schema.columns, "meta_json"].map(quoteSqlIdent).join(", ");
|
|
531
|
+
const placeholders = Array.from({ length: schema.columns.length + 2 }, () => "?").join(", ");
|
|
532
|
+
const updateSet = [...qColumns.map((col) => `${col} = excluded.${col}`), "meta_json = excluded.meta_json"].join(", ");
|
|
390
533
|
const metaJson = doc.meta === void 0 ? null : JSON.stringify(doc.meta);
|
|
391
534
|
this.db.prepare(
|
|
392
|
-
`INSERT INTO ${
|
|
393
|
-
ON CONFLICT(id) DO UPDATE SET
|
|
394
|
-
).run(doc.id,
|
|
535
|
+
`INSERT INTO ${qDocs}(${insertColumns}) VALUES (${placeholders})
|
|
536
|
+
ON CONFLICT(id) DO UPDATE SET ${updateSet}`
|
|
537
|
+
).run(doc.id, ...schema.fields.map((field) => normalized.values[field] ?? ""), metaJson);
|
|
395
538
|
}
|
|
396
539
|
indexMany(collection, docs) {
|
|
397
540
|
const tx = this.db.transaction((items) => {
|
|
@@ -400,17 +543,24 @@ var SqliteSearchFacet = class {
|
|
|
400
543
|
tx(docs);
|
|
401
544
|
}
|
|
402
545
|
remove(collection, id) {
|
|
403
|
-
const
|
|
404
|
-
|
|
546
|
+
const schema = this.loadSchema(collection);
|
|
547
|
+
if (!schema) return false;
|
|
548
|
+
const { docs } = this.ensure(collection, schema);
|
|
549
|
+
return this.db.prepare(`DELETE FROM ${quoteSqlIdent(docs)} WHERE id = ?`).run(id).changes > 0;
|
|
405
550
|
}
|
|
406
551
|
search(collection, query, opts) {
|
|
407
|
-
const { docs, fts } = this.ensure(collection);
|
|
408
552
|
const sanitized = sanitizeFtsQuery(query);
|
|
553
|
+
assertValidFieldWeightValues(opts?.fieldWeights);
|
|
409
554
|
if (sanitized.length === 0) return [];
|
|
555
|
+
const schema = this.loadSchema(collection);
|
|
556
|
+
if (!schema) return [];
|
|
557
|
+
const { docs, fts } = this.ensure(collection, schema);
|
|
558
|
+
const weights = fieldWeightsFor(schema.fields, opts?.fieldWeights);
|
|
559
|
+
const weightArgs = weights.length > 0 ? `, ${weights.map((weight) => String(weight)).join(", ")}` : "";
|
|
410
560
|
const rows = this.db.prepare(
|
|
411
|
-
`SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}) AS bm
|
|
412
|
-
FROM ${fts}
|
|
413
|
-
JOIN ${docs} d ON d.rowid = ${fts}.rowid
|
|
561
|
+
`SELECT d.id AS id, d.meta_json AS meta_json, bm25(${fts}${weightArgs}) AS bm
|
|
562
|
+
FROM ${quoteSqlIdent(fts)}
|
|
563
|
+
JOIN ${quoteSqlIdent(docs)} d ON d.rowid = ${fts}.rowid
|
|
414
564
|
WHERE ${fts} MATCH ?
|
|
415
565
|
ORDER BY bm, d.id`
|
|
416
566
|
).all(sanitized);
|
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
|
+
assertSameSearchFields,
|
|
3
|
+
assertValidFieldWeightValues,
|
|
4
|
+
fieldWeightsFor,
|
|
5
|
+
normalizeSearchDocument,
|
|
2
6
|
tokenize
|
|
3
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-N27Y55CA.js";
|
|
4
8
|
import {
|
|
5
9
|
matchesWhere
|
|
6
10
|
} from "./chunk-KPMRRYTL.js";
|
|
@@ -11,21 +15,33 @@ var B = 0.75;
|
|
|
11
15
|
var InMemorySearchStore = class {
|
|
12
16
|
collections = /* @__PURE__ */ new Map();
|
|
13
17
|
index(collection, doc) {
|
|
14
|
-
const
|
|
18
|
+
const normalized = normalizeSearchDocument(doc);
|
|
19
|
+
const coll = this.collectionFor(collection, normalized.names);
|
|
20
|
+
assertSameSearchFields(coll.fields, normalized.names, collection);
|
|
15
21
|
this.removeFromColl(coll, doc.id);
|
|
16
|
-
const
|
|
17
|
-
const
|
|
18
|
-
|
|
22
|
+
const tfByField = /* @__PURE__ */ new Map();
|
|
23
|
+
const terms = /* @__PURE__ */ new Set();
|
|
24
|
+
let dl = 0;
|
|
25
|
+
for (const field of coll.fields) {
|
|
26
|
+
const tokens = tokenize(normalized.values[field] ?? "");
|
|
27
|
+
dl += tokens.length;
|
|
28
|
+
const tf = /* @__PURE__ */ new Map();
|
|
29
|
+
for (const token of tokens) {
|
|
30
|
+
tf.set(token, (tf.get(token) ?? 0) + 1);
|
|
31
|
+
terms.add(token);
|
|
32
|
+
}
|
|
33
|
+
tfByField.set(field, tf);
|
|
34
|
+
}
|
|
35
|
+
coll.totalLen += dl;
|
|
19
36
|
const indexed = {
|
|
20
37
|
id: doc.id,
|
|
21
38
|
meta: doc.meta ?? {},
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
39
|
+
tfByField,
|
|
40
|
+
dl,
|
|
41
|
+
terms
|
|
25
42
|
};
|
|
26
43
|
coll.docs.set(doc.id, indexed);
|
|
27
|
-
coll.
|
|
28
|
-
for (const term of tf.keys()) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
|
|
44
|
+
for (const term of terms) coll.df.set(term, (coll.df.get(term) ?? 0) + 1);
|
|
29
45
|
}
|
|
30
46
|
indexMany(collection, docs) {
|
|
31
47
|
for (const doc of docs) this.index(collection, doc);
|
|
@@ -38,25 +54,33 @@ var InMemorySearchStore = class {
|
|
|
38
54
|
search(collection, query, opts) {
|
|
39
55
|
const coll = this.collections.get(collection);
|
|
40
56
|
const qTokens = tokenize(query);
|
|
57
|
+
assertValidFieldWeightValues(opts?.fieldWeights);
|
|
41
58
|
if (!coll || qTokens.length === 0) return [];
|
|
42
59
|
const limit = opts?.limit ?? 10;
|
|
43
60
|
const where = opts?.filter?.where;
|
|
61
|
+
const fieldWeights = fieldWeightsFor(coll.fields, opts?.fieldWeights);
|
|
44
62
|
const n = coll.docs.size;
|
|
45
|
-
const avgdl = n > 0 ? coll.totalLen / n : 0;
|
|
46
63
|
const scored = [];
|
|
47
64
|
for (const doc of coll.docs.values()) {
|
|
48
65
|
if (where && !matchesWhere(doc.meta, where)) continue;
|
|
49
66
|
let matched = false;
|
|
50
67
|
let score = 0;
|
|
68
|
+
const avgdl = n > 0 ? coll.totalLen / n : 0;
|
|
51
69
|
for (const qt of qTokens) {
|
|
52
|
-
const tf = doc.tf.get(qt) ?? 0;
|
|
53
|
-
if (tf === 0) continue;
|
|
54
|
-
matched = true;
|
|
55
70
|
const df = coll.df.get(qt) ?? 0;
|
|
71
|
+
if (df === 0) continue;
|
|
72
|
+
let weightedTf = 0;
|
|
73
|
+
for (let i = 0; i < coll.fields.length; i++) {
|
|
74
|
+
const field = coll.fields[i];
|
|
75
|
+
const tf = doc.tfByField.get(field)?.get(qt) ?? 0;
|
|
76
|
+
weightedTf += fieldWeights[i] * tf;
|
|
77
|
+
}
|
|
78
|
+
if (weightedTf === 0) continue;
|
|
79
|
+
matched = true;
|
|
56
80
|
const idf = Math.log((n - df + 0.5) / (df + 0.5));
|
|
57
81
|
if (idf <= 0) continue;
|
|
58
|
-
const denom =
|
|
59
|
-
score += idf * (
|
|
82
|
+
const denom = weightedTf + K1 * (1 - B + B * (avgdl > 0 ? doc.dl / avgdl : 0));
|
|
83
|
+
score += idf * (weightedTf * (K1 + 1)) / denom;
|
|
60
84
|
}
|
|
61
85
|
if (!matched) continue;
|
|
62
86
|
scored.push({ id: doc.id, score, meta: doc.meta });
|
|
@@ -69,17 +93,22 @@ var InMemorySearchStore = class {
|
|
|
69
93
|
if (!doc) return false;
|
|
70
94
|
coll.docs.delete(id);
|
|
71
95
|
coll.totalLen -= doc.dl;
|
|
72
|
-
for (const term of doc.
|
|
96
|
+
for (const term of doc.terms) {
|
|
73
97
|
const next = (coll.df.get(term) ?? 0) - 1;
|
|
74
98
|
if (next <= 0) coll.df.delete(term);
|
|
75
99
|
else coll.df.set(term, next);
|
|
76
100
|
}
|
|
77
101
|
return true;
|
|
78
102
|
}
|
|
79
|
-
collectionFor(name) {
|
|
103
|
+
collectionFor(name, fields) {
|
|
80
104
|
let coll = this.collections.get(name);
|
|
81
105
|
if (!coll) {
|
|
82
|
-
coll = {
|
|
106
|
+
coll = {
|
|
107
|
+
fields: [...fields],
|
|
108
|
+
docs: /* @__PURE__ */ new Map(),
|
|
109
|
+
df: /* @__PURE__ */ new Map(),
|
|
110
|
+
totalLen: 0
|
|
111
|
+
};
|
|
83
112
|
this.collections.set(name, coll);
|
|
84
113
|
}
|
|
85
114
|
return coll;
|
|
@@ -1,9 +1,16 @@
|
|
|
1
1
|
// src/to-async.ts
|
|
2
|
+
function hasListWhereIn(store) {
|
|
3
|
+
return typeof store.listWhereIn === "function";
|
|
4
|
+
}
|
|
2
5
|
var AsyncStoreAdapter = class {
|
|
3
6
|
constructor(sync) {
|
|
4
7
|
this.sync = sync;
|
|
8
|
+
if (hasListWhereIn(sync)) {
|
|
9
|
+
this.listWhereIn = async (collection, field, values, filter) => sync.listWhereIn(collection, field, values, filter);
|
|
10
|
+
}
|
|
5
11
|
}
|
|
6
12
|
sync;
|
|
13
|
+
listWhereIn;
|
|
7
14
|
get meta() {
|
|
8
15
|
return this.sync.meta;
|
|
9
16
|
}
|
|
@@ -85,6 +92,16 @@ var InMemoryStore = class {
|
|
|
85
92
|
items = applyFilter(items, filter);
|
|
86
93
|
return items;
|
|
87
94
|
}
|
|
95
|
+
listWhereIn(collection, field, values, filter) {
|
|
96
|
+
if (values.length === 0) return [];
|
|
97
|
+
const set = new Set(values);
|
|
98
|
+
const col = this.ensureCollection(collection);
|
|
99
|
+
const items = [...col.values()].filter((item) => {
|
|
100
|
+
if (typeof item !== "object" || item === null) return false;
|
|
101
|
+
return set.has(item[field]);
|
|
102
|
+
});
|
|
103
|
+
return applyFilter(items, filter);
|
|
104
|
+
}
|
|
88
105
|
getById(collection, id) {
|
|
89
106
|
const col = this.ensureCollection(collection);
|
|
90
107
|
const item = col.get(id);
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
// src/search/tokenize.ts
|
|
2
|
+
function tokenize(text) {
|
|
3
|
+
if (!text) return [];
|
|
4
|
+
return text.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? [];
|
|
5
|
+
}
|
|
6
|
+
function sanitizeFtsQuery(q) {
|
|
7
|
+
const tokens = tokenize(q);
|
|
8
|
+
if (tokens.length === 0) return "";
|
|
9
|
+
return tokens.map((tok) => `"${tok.replace(/"/g, '""')}"`).join(" OR ");
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
// src/search/fields.ts
|
|
13
|
+
var DEFAULT_SEARCH_FIELD = "text";
|
|
14
|
+
function normalizeSearchDocument(doc) {
|
|
15
|
+
if (doc.text !== void 0 && doc.fields !== void 0) {
|
|
16
|
+
throw new Error("SearchDocument must provide either `text` or `fields`, not both.");
|
|
17
|
+
}
|
|
18
|
+
if (doc.text !== void 0) {
|
|
19
|
+
return { names: [DEFAULT_SEARCH_FIELD], values: { [DEFAULT_SEARCH_FIELD]: doc.text } };
|
|
20
|
+
}
|
|
21
|
+
if (doc.fields === void 0) {
|
|
22
|
+
throw new Error("SearchDocument must provide `text` or `fields`.");
|
|
23
|
+
}
|
|
24
|
+
const names = Object.keys(doc.fields).sort((a, b) => a < b ? -1 : a > b ? 1 : 0);
|
|
25
|
+
if (names.length === 0) throw new Error("SearchDocument.fields must contain at least one field.");
|
|
26
|
+
const values = {};
|
|
27
|
+
for (const name of names) {
|
|
28
|
+
const value = doc.fields[name];
|
|
29
|
+
if (typeof value !== "string") {
|
|
30
|
+
throw new Error(`SearchDocument field "${name}" must be a string.`);
|
|
31
|
+
}
|
|
32
|
+
values[name] = value;
|
|
33
|
+
}
|
|
34
|
+
return { names, values };
|
|
35
|
+
}
|
|
36
|
+
function assertSameSearchFields(existing, incoming, collection) {
|
|
37
|
+
if (existing.length !== incoming.length || existing.some((name, index) => name !== incoming[index])) {
|
|
38
|
+
throw new Error(
|
|
39
|
+
`Search collection "${collection}" was initialized with fields [${existing.join(", ")}], got [${incoming.join(", ")}].`
|
|
40
|
+
);
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
function assertValidFieldWeightValues(weights) {
|
|
44
|
+
for (const [field, weight] of Object.entries(weights ?? {})) {
|
|
45
|
+
if (!Number.isFinite(weight) || weight < 0) {
|
|
46
|
+
throw new Error(`Search field weight for "${field}" must be a non-negative finite number.`);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
function fieldWeightsFor(fields, weights) {
|
|
51
|
+
assertValidFieldWeightValues(weights);
|
|
52
|
+
const fieldSet = new Set(fields);
|
|
53
|
+
for (const field of Object.keys(weights ?? {})) {
|
|
54
|
+
if (!fieldSet.has(field)) throw new Error(`Unknown search field weight "${field}".`);
|
|
55
|
+
}
|
|
56
|
+
return fields.map((field) => weights?.[field] ?? 1);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export {
|
|
60
|
+
tokenize,
|
|
61
|
+
sanitizeFtsQuery,
|
|
62
|
+
DEFAULT_SEARCH_FIELD,
|
|
63
|
+
normalizeSearchDocument,
|
|
64
|
+
assertSameSearchFields,
|
|
65
|
+
assertValidFieldWeightValues,
|
|
66
|
+
fieldWeightsFor
|
|
67
|
+
};
|
package/dist/graph.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { A as AsyncStore, S as StoreFilter } from './types-
|
|
1
|
+
import { A as AsyncStore, S as StoreFilter } from './types-DHLdLZ_W.js';
|
|
2
2
|
|
|
3
3
|
/**
|
|
4
4
|
* A directed edge stored as a flat collection record. `from`/`to` are node ids,
|
|
@@ -67,5 +67,32 @@ declare function traverse(store: AsyncStore, collection: string, opts: {
|
|
|
67
67
|
nodes: string[];
|
|
68
68
|
edges: Edge[];
|
|
69
69
|
}>;
|
|
70
|
+
/**
|
|
71
|
+
* Frontier-IN batched BFS to `depth` hops from `start`.
|
|
72
|
+
*
|
|
73
|
+
* When the store exposes the optional `listWhereIn` capability, traversal fetches
|
|
74
|
+
* only edges adjacent to the current BFS frontier at each depth level:
|
|
75
|
+
*
|
|
76
|
+
* - "out": `from IN (frontier)`
|
|
77
|
+
* - "in": `to IN (frontier)`
|
|
78
|
+
* - "both": both queries, deduped by edge id
|
|
79
|
+
*
|
|
80
|
+
* `edgeFilter.where` is still pushed down into the same store query; the
|
|
81
|
+
* structural frontier field (`from`/`to`) overrides any same-named caller filter,
|
|
82
|
+
* matching `neighbors()`. `edgeTypes` remains in-memory because the base port has
|
|
83
|
+
* exact-match filters, not `type IN (...)`.
|
|
84
|
+
*
|
|
85
|
+
* Stores without `listWhereIn` fall back to `traverse()`'s load-once strategy.
|
|
86
|
+
*/
|
|
87
|
+
declare function traverseFrontierBatched(store: AsyncStore, collection: string, opts: {
|
|
88
|
+
start: string;
|
|
89
|
+
depth: number;
|
|
90
|
+
direction?: Direction;
|
|
91
|
+
edgeTypes?: string[];
|
|
92
|
+
edgeFilter?: StoreFilter;
|
|
93
|
+
}): Promise<{
|
|
94
|
+
nodes: string[];
|
|
95
|
+
edges: Edge[];
|
|
96
|
+
}>;
|
|
70
97
|
|
|
71
|
-
export { type Direction, type Edge, neighbors, traverse };
|
|
98
|
+
export { type Direction, type Edge, neighbors, traverse, traverseFrontierBatched };
|
package/dist/graph.js
CHANGED
|
@@ -17,6 +17,28 @@ function dedupById(edges) {
|
|
|
17
17
|
function withWhere(filter, override) {
|
|
18
18
|
return { ...filter, where: { ...filter?.where, ...override } };
|
|
19
19
|
}
|
|
20
|
+
function withoutWhereField(filter, field) {
|
|
21
|
+
if (!filter?.where || !(field in filter.where)) return filter;
|
|
22
|
+
const where = { ...filter.where };
|
|
23
|
+
delete where[field];
|
|
24
|
+
return { ...filter, where };
|
|
25
|
+
}
|
|
26
|
+
function hasListWhereIn(store) {
|
|
27
|
+
return typeof store.listWhereIn === "function";
|
|
28
|
+
}
|
|
29
|
+
async function frontierEdges(store, collection, frontier, direction, edgeFilter) {
|
|
30
|
+
if (direction === "out") {
|
|
31
|
+
return store.listWhereIn(collection, "from", frontier, withoutWhereField(edgeFilter, "from"));
|
|
32
|
+
}
|
|
33
|
+
if (direction === "in") {
|
|
34
|
+
return store.listWhereIn(collection, "to", frontier, withoutWhereField(edgeFilter, "to"));
|
|
35
|
+
}
|
|
36
|
+
const [out, inc] = await Promise.all([
|
|
37
|
+
store.listWhereIn(collection, "from", frontier, withoutWhereField(edgeFilter, "from")),
|
|
38
|
+
store.listWhereIn(collection, "to", frontier, withoutWhereField(edgeFilter, "to"))
|
|
39
|
+
]);
|
|
40
|
+
return dedupById([...out, ...inc]);
|
|
41
|
+
}
|
|
20
42
|
async function neighbors(store, collection, opts) {
|
|
21
43
|
const direction = opts.direction ?? "out";
|
|
22
44
|
let edges;
|
|
@@ -91,7 +113,49 @@ async function traverse(store, collection, opts) {
|
|
|
91
113
|
traversedEdges.sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
|
|
92
114
|
return { nodes: reached, edges: traversedEdges };
|
|
93
115
|
}
|
|
116
|
+
async function traverseFrontierBatched(store, collection, opts) {
|
|
117
|
+
if (!hasListWhereIn(store)) {
|
|
118
|
+
return traverse(store, collection, opts);
|
|
119
|
+
}
|
|
120
|
+
const direction = opts.direction ?? "out";
|
|
121
|
+
if (!Number.isFinite(opts.depth) || opts.depth <= 0) {
|
|
122
|
+
return { nodes: [], edges: [] };
|
|
123
|
+
}
|
|
124
|
+
const visited = /* @__PURE__ */ new Set([opts.start]);
|
|
125
|
+
const reached = [];
|
|
126
|
+
const traversedEdges = [];
|
|
127
|
+
const seenEdgeIds = /* @__PURE__ */ new Set();
|
|
128
|
+
let frontier = [opts.start];
|
|
129
|
+
for (let hop = 0; hop < opts.depth && frontier.length > 0; hop++) {
|
|
130
|
+
const edges = filterByTypes(
|
|
131
|
+
await frontierEdges(store, collection, frontier, direction, opts.edgeFilter),
|
|
132
|
+
opts.edgeTypes
|
|
133
|
+
);
|
|
134
|
+
const next = [];
|
|
135
|
+
for (const node of frontier) {
|
|
136
|
+
for (const edge of edges) {
|
|
137
|
+
const adjacent = direction === "out" ? edge.from === node : direction === "in" ? edge.to === node : edge.from === node || edge.to === node;
|
|
138
|
+
if (!adjacent) continue;
|
|
139
|
+
if (!seenEdgeIds.has(edge.id)) {
|
|
140
|
+
seenEdgeIds.add(edge.id);
|
|
141
|
+
traversedEdges.push(edge);
|
|
142
|
+
}
|
|
143
|
+
const neighbor = edge.from === node ? edge.to : edge.from;
|
|
144
|
+
if (!visited.has(neighbor)) {
|
|
145
|
+
visited.add(neighbor);
|
|
146
|
+
reached.push(neighbor);
|
|
147
|
+
next.push(neighbor);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
frontier = next;
|
|
152
|
+
}
|
|
153
|
+
reached.sort((a, b) => a < b ? -1 : a > b ? 1 : 0);
|
|
154
|
+
traversedEdges.sort((a, b) => a.id < b.id ? -1 : a.id > b.id ? 1 : 0);
|
|
155
|
+
return { nodes: reached, edges: traversedEdges };
|
|
156
|
+
}
|
|
94
157
|
export {
|
|
95
158
|
neighbors,
|
|
96
|
-
traverse
|
|
159
|
+
traverse,
|
|
160
|
+
traverseFrontierBatched
|
|
97
161
|
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
export { A as AsyncStore, S as StoreFilter,
|
|
1
|
+
export { A as AsyncStore, a as AsyncStoreInQuery, S as StoreFilter, b as StoreMeta, c as SyncStore, d as SyncStoreInQuery } from './types-DHLdLZ_W.js';
|
|
2
2
|
export { InMemoryKv, toAsync } from './kv.js';
|
|
3
3
|
export { A as AsyncVectorStore, V as Vector, a as VectorDocument, b as VectorSearchOptions, c as VectorSearchResult, d as VectorStore, e as VectorStoreMeta } from './types-B0XrD10b.js';
|
|
4
4
|
export { InMemoryVectorStore, assertDimensions, bufferToVector, cosineSimilarity, isUsableVector, toAsyncVector, vectorToBuffer } from './vector.js';
|
|
5
5
|
export { m as matchesWhere } from './filter-B9hP-TKF.js';
|
|
6
|
-
export { S as SearchDocument, a as SearchOptions, b as SearchResult, c as SearchStore } from './types-
|
|
6
|
+
export { S as SearchDocument, a as SearchOptions, b as SearchResult, c as SearchStore } from './types-BLZTMsQO.js';
|
|
7
7
|
export { InMemorySearchStore, sanitizeFtsQuery, tokenize } from './search.js';
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
InMemoryStore,
|
|
3
3
|
toAsync
|
|
4
|
-
} from "./chunk-
|
|
4
|
+
} from "./chunk-DKCPULXT.js";
|
|
5
5
|
import {
|
|
6
6
|
InMemorySearchStore
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-77IIKHQW.js";
|
|
8
8
|
import {
|
|
9
9
|
InMemoryVectorStore,
|
|
10
10
|
toAsyncVector
|
|
@@ -12,7 +12,7 @@ import {
|
|
|
12
12
|
import {
|
|
13
13
|
sanitizeFtsQuery,
|
|
14
14
|
tokenize
|
|
15
|
-
} from "./chunk-
|
|
15
|
+
} from "./chunk-N27Y55CA.js";
|
|
16
16
|
import {
|
|
17
17
|
assertDimensions,
|
|
18
18
|
bufferToVector,
|
package/dist/kv.d.ts
CHANGED
|
@@ -1,10 +1,11 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { c as SyncStore, d as SyncStoreInQuery, A as AsyncStore, a as AsyncStoreInQuery, b as StoreMeta, S as StoreFilter } from './types-DHLdLZ_W.js';
|
|
2
2
|
|
|
3
3
|
/** Lift a synchronous store to the {@link AsyncStore} interface. The bridge only
|
|
4
|
-
* goes this direction — sync ⊂ async. */
|
|
4
|
+
* goes this direction — sync ⊂ async. Optional sync capabilities are lifted too. */
|
|
5
|
+
declare function toAsync(store: SyncStore & SyncStoreInQuery): AsyncStore & AsyncStoreInQuery;
|
|
5
6
|
declare function toAsync(store: SyncStore): AsyncStore;
|
|
6
7
|
|
|
7
|
-
declare class InMemoryStore implements SyncStore {
|
|
8
|
+
declare class InMemoryStore implements SyncStore, SyncStoreInQuery {
|
|
8
9
|
readonly meta: StoreMeta;
|
|
9
10
|
/** Key-value storage. */
|
|
10
11
|
private kv;
|
|
@@ -17,6 +18,7 @@ declare class InMemoryStore implements SyncStore {
|
|
|
17
18
|
keys(prefix?: string): string[];
|
|
18
19
|
private ensureCollection;
|
|
19
20
|
list<T>(collection: string, filter?: StoreFilter): T[];
|
|
21
|
+
listWhereIn<T>(collection: string, field: string, values: readonly unknown[], filter?: StoreFilter): T[];
|
|
20
22
|
getById<T>(collection: string, id: string): T | null;
|
|
21
23
|
put<T extends {
|
|
22
24
|
id: string;
|
|
@@ -25,4 +27,4 @@ declare class InMemoryStore implements SyncStore {
|
|
|
25
27
|
count(collection: string, filter?: StoreFilter): number;
|
|
26
28
|
}
|
|
27
29
|
|
|
28
|
-
export { AsyncStore, InMemoryStore as InMemoryKv, StoreFilter, StoreMeta, SyncStore, toAsync };
|
|
30
|
+
export { AsyncStore, AsyncStoreInQuery, InMemoryStore as InMemoryKv, StoreFilter, StoreMeta, SyncStore, SyncStoreInQuery, toAsync };
|
package/dist/kv.js
CHANGED
package/dist/search.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
import { c as SearchStore, S as SearchDocument, a as SearchOptions, b as SearchResult } from './types-
|
|
1
|
+
import { c as SearchStore, S as SearchDocument, a as SearchOptions, b as SearchResult } from './types-BLZTMsQO.js';
|
|
2
|
+
export { d as SearchFieldDocument, e as SearchTextDocument } from './types-BLZTMsQO.js';
|
|
2
3
|
export { m as matchesWhere } from './filter-B9hP-TKF.js';
|
|
3
|
-
import './types-
|
|
4
|
+
import './types-DHLdLZ_W.js';
|
|
4
5
|
|
|
5
6
|
declare class InMemorySearchStore implements SearchStore {
|
|
6
7
|
private readonly collections;
|
package/dist/search.js
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
import {
|
|
2
2
|
InMemorySearchStore
|
|
3
|
-
} from "./chunk-
|
|
3
|
+
} from "./chunk-77IIKHQW.js";
|
|
4
4
|
import {
|
|
5
5
|
sanitizeFtsQuery,
|
|
6
6
|
tokenize
|
|
7
|
-
} from "./chunk-
|
|
7
|
+
} from "./chunk-N27Y55CA.js";
|
|
8
8
|
import {
|
|
9
9
|
matchesWhere
|
|
10
10
|
} from "./chunk-KPMRRYTL.js";
|
package/dist/sql.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { S as StoreFilter } from './types-
|
|
1
|
+
import { S as StoreFilter } from './types-DHLdLZ_W.js';
|
|
2
2
|
|
|
3
3
|
/** A bound SQL parameter — the common subset both better-sqlite3 and @libsql/client
|
|
4
4
|
* accept. Booleans are pre-converted to 0/1 by the builders (better-sqlite3 rejects
|
|
@@ -1,23 +1,42 @@
|
|
|
1
|
-
import { S as StoreFilter } from './types-
|
|
1
|
+
import { S as StoreFilter } from './types-DHLdLZ_W.js';
|
|
2
2
|
|
|
3
3
|
/** A document to be full-text indexed. `meta` is an exact-match-filterable
|
|
4
4
|
* payload persisted as JSON by disk-backed backends, so it must be JSON-
|
|
5
5
|
* serializable; values JSON can't represent (`undefined`, functions) are dropped
|
|
6
6
|
* on persistence — don't rely on them. */
|
|
7
|
-
interface
|
|
7
|
+
interface SearchTextDocument<M = Record<string, unknown>> {
|
|
8
8
|
/** Unique id within the collection. */
|
|
9
9
|
id: string;
|
|
10
|
-
/**
|
|
10
|
+
/** Single-field text to tokenize and index. Back-compat shorthand for
|
|
11
|
+
* `fields: { text }`. */
|
|
11
12
|
text: string;
|
|
13
|
+
/** Provide either `text` or `fields`, not both. */
|
|
14
|
+
fields?: never;
|
|
12
15
|
/** Typed context stored alongside the text. */
|
|
13
16
|
meta?: M;
|
|
14
17
|
}
|
|
18
|
+
interface SearchFieldDocument<M = Record<string, unknown>> {
|
|
19
|
+
/** Unique id within the collection. */
|
|
20
|
+
id: string;
|
|
21
|
+
/** Named text fields to tokenize and index with optional query-time weights
|
|
22
|
+
* (for example `{ title, body }`). All documents in a collection must use the
|
|
23
|
+
* same field names. */
|
|
24
|
+
fields: Record<string, string>;
|
|
25
|
+
/** Provide either `text` or `fields`, not both. */
|
|
26
|
+
text?: never;
|
|
27
|
+
/** Typed context stored alongside the text. */
|
|
28
|
+
meta?: M;
|
|
29
|
+
}
|
|
30
|
+
type SearchDocument<M = Record<string, unknown>> = SearchTextDocument<M> | SearchFieldDocument<M>;
|
|
15
31
|
interface SearchOptions {
|
|
16
32
|
/** Maximum results to return. Default: 10. */
|
|
17
33
|
limit?: number;
|
|
18
34
|
/** Exact-match filter on document `meta` (uses `filter.where`, evaluated with
|
|
19
35
|
* the shared `matchesWhere`). Applied before limit, after the FTS MATCH. */
|
|
20
36
|
filter?: StoreFilter;
|
|
37
|
+
/** Per-field bm25 weights. Fields not listed default to 1. Higher means more
|
|
38
|
+
* important; e.g. `{ title: 4, body: 1 }` boosts title matches. */
|
|
39
|
+
fieldWeights?: Record<string, number>;
|
|
21
40
|
}
|
|
22
41
|
/** A ranked search hit. `score` is the bm25 relevance — higher is more relevant. */
|
|
23
42
|
interface SearchResult<M = Record<string, unknown>> {
|
|
@@ -30,10 +49,11 @@ interface SearchResult<M = Record<string, unknown>> {
|
|
|
30
49
|
}
|
|
31
50
|
/** A synchronous full-text search store with per-collection documents.
|
|
32
51
|
*
|
|
33
|
-
* Ranking is bm25 with the FTS5 default parameters (k1 = 1.2, b = 0.75)
|
|
34
|
-
* in-memory reference and the sqlite
|
|
35
|
-
*
|
|
36
|
-
* backends (the same parity caveat as
|
|
52
|
+
* Ranking is bm25 with the FTS5 default parameters (k1 = 1.2, b = 0.75), plus
|
|
53
|
+
* optional query-time per-field weights. The in-memory reference and the sqlite
|
|
54
|
+
* `.search` facet produce the same RANKING on clear relevance differences;
|
|
55
|
+
* exact float scores need not match across backends (the same parity caveat as
|
|
56
|
+
* /vector). */
|
|
37
57
|
interface SearchStore {
|
|
38
58
|
/** Insert or replace a document by (collection, id). */
|
|
39
59
|
index<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, doc: SearchDocument<M>): void;
|
|
@@ -46,4 +66,4 @@ interface SearchStore {
|
|
|
46
66
|
search<M extends Record<string, unknown> = Record<string, unknown>>(collection: string, query: string, opts?: SearchOptions): SearchResult<M>[];
|
|
47
67
|
}
|
|
48
68
|
|
|
49
|
-
export type { SearchDocument as S, SearchOptions as a, SearchResult as b, SearchStore as c };
|
|
69
|
+
export type { SearchDocument as S, SearchOptions as a, SearchResult as b, SearchStore as c, SearchFieldDocument as d, SearchTextDocument as e };
|
|
@@ -20,6 +20,17 @@ interface StoreFilter {
|
|
|
20
20
|
/** Number of results to skip. */
|
|
21
21
|
offset?: number;
|
|
22
22
|
}
|
|
23
|
+
/** Optional collection capability for adapters that can push `field IN (...)`
|
|
24
|
+
* down to the backing store while preserving the normal StoreFilter semantics.
|
|
25
|
+
* Graph frontier traversal uses this when present and falls back to plain
|
|
26
|
+
* `list()` when absent. */
|
|
27
|
+
interface SyncStoreInQuery {
|
|
28
|
+
listWhereIn<T>(collection: string, field: string, values: readonly unknown[], filter?: StoreFilter): T[];
|
|
29
|
+
}
|
|
30
|
+
/** Async twin of SyncStoreInQuery for remote-capable stores. */
|
|
31
|
+
interface AsyncStoreInQuery {
|
|
32
|
+
listWhereIn<T>(collection: string, field: string, values: readonly unknown[], filter?: StoreFilter): Promise<T[]>;
|
|
33
|
+
}
|
|
23
34
|
/**
|
|
24
35
|
* A typed key-value + collection store. **Synchronous.**
|
|
25
36
|
*
|
|
@@ -80,4 +91,4 @@ interface AsyncStore {
|
|
|
80
91
|
count(collection: string, filter?: StoreFilter): Promise<number>;
|
|
81
92
|
}
|
|
82
93
|
|
|
83
|
-
export type { AsyncStore as A, StoreFilter as S,
|
|
94
|
+
export type { AsyncStore as A, StoreFilter as S, AsyncStoreInQuery as a, StoreMeta as b, SyncStore as c, SyncStoreInQuery as d };
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mirk/store",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Code-split storage ports + source adapters under one namespace. KV + collection store and vector similarity store as interface subpaths; the sqlite source-adapter implements both over one connection.",
|
|
6
6
|
"license": "Apache-2.0",
|
|
@@ -59,12 +59,6 @@
|
|
|
59
59
|
"files": [
|
|
60
60
|
"dist"
|
|
61
61
|
],
|
|
62
|
-
"scripts": {
|
|
63
|
-
"test": "vitest run",
|
|
64
|
-
"test:watch": "vitest",
|
|
65
|
-
"typecheck": "tsc --noEmit",
|
|
66
|
-
"build": "rm -rf dist && tsup src/index.ts src/kv.ts src/vector.ts src/search.ts src/graph.ts src/sql.ts src/adapters/sqlite.ts --format esm --dts --external better-sqlite3 --external sqlite-vec"
|
|
67
|
-
},
|
|
68
62
|
"peerDependencies": {
|
|
69
63
|
"better-sqlite3": "^11.0.0 || ^12.0.0",
|
|
70
64
|
"sqlite-vec": "^0.1.0"
|
|
@@ -88,5 +82,11 @@
|
|
|
88
82
|
"publishConfig": {
|
|
89
83
|
"registry": "https://registry.npmjs.org",
|
|
90
84
|
"access": "public"
|
|
85
|
+
},
|
|
86
|
+
"scripts": {
|
|
87
|
+
"test": "vitest run",
|
|
88
|
+
"test:watch": "vitest",
|
|
89
|
+
"typecheck": "tsc --noEmit",
|
|
90
|
+
"build": "rm -rf dist && tsup src/index.ts src/kv.ts src/vector.ts src/search.ts src/graph.ts src/sql.ts src/adapters/sqlite.ts --format esm --dts --external better-sqlite3 --external sqlite-vec"
|
|
91
91
|
}
|
|
92
|
-
}
|
|
92
|
+
}
|
package/dist/chunk-6YJ66JFO.js
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
// src/search/tokenize.ts
|
|
2
|
-
function tokenize(text) {
|
|
3
|
-
if (!text) return [];
|
|
4
|
-
return text.toLowerCase().match(/[\p{L}\p{N}]+/gu) ?? [];
|
|
5
|
-
}
|
|
6
|
-
function sanitizeFtsQuery(q) {
|
|
7
|
-
const tokens = tokenize(q);
|
|
8
|
-
if (tokens.length === 0) return "";
|
|
9
|
-
return tokens.map((tok) => `"${tok.replace(/"/g, '""')}"`).join(" OR ");
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export {
|
|
13
|
-
tokenize,
|
|
14
|
-
sanitizeFtsQuery
|
|
15
|
-
};
|