@dxos/index-core 0.0.0 → 0.8.4-main.03d5cd7b56

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/dist/lib/neutral/index.mjs +790 -0
  2. package/dist/lib/neutral/index.mjs.map +7 -0
  3. package/dist/lib/neutral/meta.json +1 -0
  4. package/dist/types/src/index-engine.d.ts +112 -0
  5. package/dist/types/src/index-engine.d.ts.map +1 -0
  6. package/dist/types/src/index-engine.test.d.ts +2 -0
  7. package/dist/types/src/index-engine.test.d.ts.map +1 -0
  8. package/dist/types/src/index-tracker.d.ts +44 -0
  9. package/dist/types/src/index-tracker.d.ts.map +1 -0
  10. package/dist/types/src/index-tracker.test.d.ts +2 -0
  11. package/dist/types/src/index-tracker.test.d.ts.map +1 -0
  12. package/dist/types/src/index.d.ts +8 -0
  13. package/dist/types/src/index.d.ts.map +1 -0
  14. package/dist/types/src/indexes/fts-index.d.ts +64 -0
  15. package/dist/types/src/indexes/fts-index.d.ts.map +1 -0
  16. package/dist/types/src/indexes/fts-index.test.d.ts +2 -0
  17. package/dist/types/src/indexes/fts-index.test.d.ts.map +1 -0
  18. package/dist/types/src/indexes/fts5.test.d.ts +2 -0
  19. package/dist/types/src/indexes/fts5.test.d.ts.map +1 -0
  20. package/dist/types/src/indexes/index.d.ts +5 -0
  21. package/dist/types/src/indexes/index.d.ts.map +1 -0
  22. package/dist/types/src/indexes/interface.d.ts +56 -0
  23. package/dist/types/src/indexes/interface.d.ts.map +1 -0
  24. package/dist/types/src/indexes/object-meta-index.d.ts +94 -0
  25. package/dist/types/src/indexes/object-meta-index.d.ts.map +1 -0
  26. package/dist/types/src/indexes/object-meta-index.test.d.ts +2 -0
  27. package/dist/types/src/indexes/object-meta-index.test.d.ts.map +1 -0
  28. package/dist/types/src/indexes/reverse-ref-index.d.ts +37 -0
  29. package/dist/types/src/indexes/reverse-ref-index.d.ts.map +1 -0
  30. package/dist/types/src/indexes/reverse-ref-index.test.d.ts +2 -0
  31. package/dist/types/src/indexes/reverse-ref-index.test.d.ts.map +1 -0
  32. package/dist/types/src/utils.d.ts +17 -0
  33. package/dist/types/src/utils.d.ts.map +1 -0
  34. package/dist/types/tsconfig.tsbuildinfo +1 -0
  35. package/package.json +22 -18
  36. package/src/index-engine.test.ts +172 -9
  37. package/src/index-engine.ts +161 -29
  38. package/src/index-tracker.ts +9 -0
  39. package/src/index.ts +10 -3
  40. package/src/indexes/fts-index.test.ts +153 -3
  41. package/src/indexes/fts-index.ts +66 -10
  42. package/src/indexes/interface.ts +10 -0
  43. package/src/indexes/object-meta-index.test.ts +361 -3
  44. package/src/indexes/object-meta-index.ts +304 -17
  45. package/src/indexes/reverse-ref-index.test.ts +16 -2
  46. package/src/indexes/reverse-ref-index.ts +0 -1
  47. package/src/utils.ts +1 -1
@@ -3,8 +3,8 @@
3
3
  //
4
4
 
5
5
  import * as Reactivity from '@effect/experimental/Reactivity';
6
- import * as SqlClient from '@effect/sql/SqlClient';
7
6
  import * as SqliteClient from '@effect/sql-sqlite-node/SqliteClient';
7
+ import * as SqlClient from '@effect/sql/SqlClient';
8
8
  import { describe, expect, it } from '@effect/vitest';
9
9
  import * as Effect from 'effect/Effect';
10
10
  import * as Layer from 'effect/Layer';
@@ -16,7 +16,7 @@ import { FtsIndex } from './fts-index';
16
16
  import type { IndexerObject } from './interface';
17
17
  import { ObjectMetaIndex } from './object-meta-index';
18
18
 
19
- const TYPE_PERSON = DXN.parse('dxn:type:example.com/type/Person:0.1.0').toString();
19
+ const TYPE_PERSON = DXN.parse('dxn:type:com.example.type.person:0.1.0').toString();
20
20
  const TYPE_DEFAULT = DXN.parse('dxn:type:test.com/type/Type:0.1.0').toString();
21
21
 
22
22
  const TestLayer = Layer.merge(
@@ -55,8 +55,10 @@ describe('FtsIndex', () => {
55
55
  {
56
56
  spaceId,
57
57
  queueId: null,
58
+ queueNamespace: null,
58
59
  documentId: 'doc-1',
59
60
  recordId: null,
61
+ updatedAt: Date.now(),
60
62
  data: {
61
63
  id: ObjectId.random(),
62
64
  [ATTR_TYPE]: TYPE_PERSON,
@@ -99,11 +101,13 @@ describe('FtsIndex', () => {
99
101
  const obj1: IndexerObject = {
100
102
  spaceId,
101
103
  queueId: null,
104
+ queueNamespace: null,
102
105
  documentId: 'doc-1',
103
106
  recordId: null,
107
+ updatedAt: Date.now(),
104
108
  data: {
105
109
  id: objectId,
106
- [ATTR_TYPE]: DXN.parse('dxn:type:example.com/type/Person:0.1.0').toString(),
110
+ [ATTR_TYPE]: DXN.parse('dxn:type:com.example.type.person:0.1.0').toString(),
107
111
  title: 'Original Title',
108
112
  },
109
113
  };
@@ -118,8 +122,10 @@ describe('FtsIndex', () => {
118
122
  const obj2: IndexerObject = {
119
123
  spaceId,
120
124
  queueId: null,
125
+ queueNamespace: null,
121
126
  documentId: 'doc-1',
122
127
  recordId: null,
128
+ updatedAt: Date.now(),
123
129
  data: {
124
130
  id: objectId,
125
131
  [ATTR_TYPE]: TYPE_DEFAULT,
@@ -155,8 +161,10 @@ describe('FtsIndex', () => {
155
161
  {
156
162
  spaceId,
157
163
  queueId: null,
164
+ queueNamespace: null,
158
165
  documentId: 'doc-100',
159
166
  recordId: null,
167
+ updatedAt: Date.now(),
160
168
  data: {
161
169
  id: ObjectId.random(),
162
170
  [ATTR_TYPE]: TYPE_PERSON,
@@ -166,8 +174,10 @@ describe('FtsIndex', () => {
166
174
  {
167
175
  spaceId,
168
176
  queueId: null,
177
+ queueNamespace: null,
169
178
  documentId: 'doc-200',
170
179
  recordId: null,
180
+ updatedAt: Date.now(),
171
181
  data: {
172
182
  id: ObjectId.random(),
173
183
  [ATTR_TYPE]: TYPE_PERSON,
@@ -177,8 +187,10 @@ describe('FtsIndex', () => {
177
187
  {
178
188
  spaceId,
179
189
  queueId: null,
190
+ queueNamespace: null,
180
191
  documentId: 'doc-1000',
181
192
  recordId: null,
193
+ updatedAt: Date.now(),
182
194
  data: {
183
195
  id: ObjectId.random(),
184
196
  [ATTR_TYPE]: TYPE_PERSON,
@@ -225,8 +237,10 @@ describe('FtsIndex', () => {
225
237
  const obj1: IndexerObject = {
226
238
  spaceId: space1,
227
239
  queueId: null,
240
+ queueNamespace: null,
228
241
  documentId: 'doc-s1',
229
242
  recordId: null,
243
+ updatedAt: Date.now(),
230
244
  data: {
231
245
  id: ObjectId.random(),
232
246
  [ATTR_TYPE]: TYPE_PERSON,
@@ -237,8 +251,10 @@ describe('FtsIndex', () => {
237
251
  const obj2: IndexerObject = {
238
252
  spaceId: space2,
239
253
  queueId: null,
254
+ queueNamespace: null,
240
255
  documentId: 'doc-s2',
241
256
  recordId: null,
257
+ updatedAt: Date.now(),
242
258
  data: {
243
259
  id: ObjectId.random(),
244
260
  [ATTR_TYPE]: TYPE_PERSON,
@@ -295,8 +311,10 @@ describe('FtsIndex', () => {
295
311
  {
296
312
  spaceId,
297
313
  queueId: null,
314
+ queueNamespace: null,
298
315
  documentId: 'doc-1',
299
316
  recordId: null,
317
+ updatedAt: Date.now(),
300
318
  data: {
301
319
  id: ObjectId.random(),
302
320
  [ATTR_TYPE]: TYPE_PERSON,
@@ -307,8 +325,10 @@ describe('FtsIndex', () => {
307
325
  {
308
326
  spaceId,
309
327
  queueId: null,
328
+ queueNamespace: null,
310
329
  documentId: 'doc-2',
311
330
  recordId: null,
331
+ updatedAt: Date.now(),
312
332
  data: {
313
333
  id: ObjectId.random(),
314
334
  [ATTR_TYPE]: TYPE_PERSON,
@@ -376,8 +396,10 @@ describe('FtsIndex', () => {
376
396
  const spaceObj: IndexerObject = {
377
397
  spaceId,
378
398
  queueId: null,
399
+ queueNamespace: null,
379
400
  documentId: 'doc-space',
380
401
  recordId: null,
402
+ updatedAt: Date.now(),
381
403
  data: {
382
404
  id: ObjectId.random(),
383
405
  [ATTR_TYPE]: TYPE_PERSON,
@@ -388,8 +410,10 @@ describe('FtsIndex', () => {
388
410
  const queue1Obj: IndexerObject = {
389
411
  spaceId,
390
412
  queueId: queue1,
413
+ queueNamespace: 'data',
391
414
  documentId: null,
392
415
  recordId: null,
416
+ updatedAt: Date.now(),
393
417
  data: {
394
418
  id: ObjectId.random(),
395
419
  [ATTR_TYPE]: TYPE_PERSON,
@@ -400,8 +424,10 @@ describe('FtsIndex', () => {
400
424
  const queue2Obj: IndexerObject = {
401
425
  spaceId,
402
426
  queueId: queue2,
427
+ queueNamespace: 'data',
403
428
  documentId: null,
404
429
  recordId: null,
430
+ updatedAt: Date.now(),
405
431
  data: {
406
432
  id: ObjectId.random(),
407
433
  [ATTR_TYPE]: TYPE_PERSON,
@@ -448,8 +474,10 @@ describe('FtsIndex', () => {
448
474
  const spaceObj: IndexerObject = {
449
475
  spaceId,
450
476
  queueId: null,
477
+ queueNamespace: null,
451
478
  documentId: 'doc-space',
452
479
  recordId: null,
480
+ updatedAt: Date.now(),
453
481
  data: {
454
482
  id: ObjectId.random(),
455
483
  [ATTR_TYPE]: TYPE_PERSON,
@@ -460,8 +488,10 @@ describe('FtsIndex', () => {
460
488
  const queueObj: IndexerObject = {
461
489
  spaceId,
462
490
  queueId,
491
+ queueNamespace: 'data',
463
492
  documentId: null,
464
493
  recordId: null,
494
+ updatedAt: Date.now(),
465
495
  data: {
466
496
  id: ObjectId.random(),
467
497
  [ATTR_TYPE]: TYPE_PERSON,
@@ -509,8 +539,10 @@ describe('FtsIndex', () => {
509
539
  const space1Obj: IndexerObject = {
510
540
  spaceId: space1,
511
541
  queueId: null,
542
+ queueNamespace: null,
512
543
  documentId: 'doc-s1',
513
544
  recordId: null,
545
+ updatedAt: Date.now(),
514
546
  data: {
515
547
  id: ObjectId.random(),
516
548
  [ATTR_TYPE]: TYPE_PERSON,
@@ -521,8 +553,10 @@ describe('FtsIndex', () => {
521
553
  const space2Obj: IndexerObject = {
522
554
  spaceId: space2,
523
555
  queueId: null,
556
+ queueNamespace: null,
524
557
  documentId: 'doc-s2',
525
558
  recordId: null,
559
+ updatedAt: Date.now(),
526
560
  data: {
527
561
  id: ObjectId.random(),
528
562
  [ATTR_TYPE]: TYPE_PERSON,
@@ -533,8 +567,10 @@ describe('FtsIndex', () => {
533
567
  const queueObj: IndexerObject = {
534
568
  spaceId: space2,
535
569
  queueId: queueInSpace2,
570
+ queueNamespace: 'data',
536
571
  documentId: null,
537
572
  recordId: null,
573
+ updatedAt: Date.now(),
538
574
  data: {
539
575
  id: ObjectId.random(),
540
576
  [ATTR_TYPE]: TYPE_PERSON,
@@ -561,4 +597,118 @@ describe('FtsIndex', () => {
561
597
  expect(objectIds).not.toContain(space2Obj.data.id);
562
598
  }, Effect.provide(TestLayer)),
563
599
  );
600
+
601
+ describe('querySnapshotsJSON', () => {
602
+ it.effect(
603
+ 'returns snapshots for all present recordIds',
604
+ Effect.fnUntraced(function* () {
605
+ const index = new FtsIndex();
606
+ const metaIndex = new ObjectMetaIndex();
607
+ yield* index.migrate();
608
+ yield* metaIndex.migrate();
609
+
610
+ const spaceId = SpaceId.random();
611
+ const objects: IndexerObject[] = [
612
+ {
613
+ spaceId,
614
+ queueId: ObjectId.random(),
615
+ queueNamespace: 'data',
616
+ documentId: null,
617
+ recordId: null,
618
+ updatedAt: Date.now(),
619
+ data: { id: ObjectId.random(), [ATTR_TYPE]: TYPE_PERSON, value: 'alpha' },
620
+ },
621
+ {
622
+ spaceId,
623
+ queueId: ObjectId.random(),
624
+ queueNamespace: 'data',
625
+ documentId: null,
626
+ recordId: null,
627
+ updatedAt: Date.now(),
628
+ data: { id: ObjectId.random(), [ATTR_TYPE]: TYPE_PERSON, value: 'beta' },
629
+ },
630
+ ];
631
+
632
+ yield* metaIndex.update(objects);
633
+ yield* metaIndex.lookupRecordIds(objects);
634
+ yield* index.update(objects);
635
+
636
+ const recordIds = objects.map((o) => o.recordId!);
637
+ const snapshots = yield* index.querySnapshotsJSON(recordIds);
638
+
639
+ expect(snapshots).toHaveLength(2);
640
+ const snapshotMap = new Map(snapshots.map((s) => [s.recordId, s.snapshot]));
641
+ expect((snapshotMap.get(objects[0].recordId!) as any).value).toBe('alpha');
642
+ expect((snapshotMap.get(objects[1].recordId!) as any).value).toBe('beta');
643
+ }, Effect.provide(TestLayer)),
644
+ );
645
+
646
+ it.effect(
647
+ 'omits stale recordIds not present in FTS index',
648
+ Effect.fnUntraced(function* () {
649
+ const index = new FtsIndex();
650
+ const metaIndex = new ObjectMetaIndex();
651
+ yield* index.migrate();
652
+ yield* metaIndex.migrate();
653
+
654
+ const spaceId = SpaceId.random();
655
+ const object: IndexerObject = {
656
+ spaceId,
657
+ queueId: ObjectId.random(),
658
+ queueNamespace: 'data',
659
+ documentId: null,
660
+ recordId: null,
661
+ updatedAt: Date.now(),
662
+ data: { id: ObjectId.random(), [ATTR_TYPE]: TYPE_PERSON, value: 'present' },
663
+ };
664
+
665
+ yield* metaIndex.update([object]);
666
+ yield* metaIndex.lookupRecordIds([object]);
667
+ yield* index.update([object]);
668
+
669
+ // Query with the real id plus a stale/non-existent id.
670
+ const staleId = 99999;
671
+ const snapshots = yield* index.querySnapshotsJSON([object.recordId!, staleId]);
672
+
673
+ expect(snapshots).toHaveLength(1);
674
+ expect(snapshots[0].recordId).toBe(object.recordId!);
675
+ expect((snapshots[0].snapshot as any).value).toBe('present');
676
+ }, Effect.provide(TestLayer)),
677
+ );
678
+
679
+ it.effect(
680
+ 'handles more than 999 recordIds without exceeding SQLite variable limit',
681
+ Effect.fnUntraced(function* () {
682
+ const index = new FtsIndex();
683
+ const metaIndex = new ObjectMetaIndex();
684
+ yield* index.migrate();
685
+ yield* metaIndex.migrate();
686
+
687
+ const spaceId = SpaceId.random();
688
+ const count = 1100;
689
+ const objects: IndexerObject[] = Array.from({ length: count }, (_, i) => ({
690
+ spaceId,
691
+ queueId: ObjectId.random(),
692
+ queueNamespace: 'data',
693
+ documentId: null,
694
+ recordId: null,
695
+ updatedAt: Date.now(),
696
+ data: { id: ObjectId.random(), [ATTR_TYPE]: TYPE_PERSON, index: i },
697
+ }));
698
+
699
+ yield* metaIndex.update(objects);
700
+ yield* metaIndex.lookupRecordIds(objects);
701
+ yield* index.update(objects);
702
+
703
+ const recordIds = objects.map((o) => o.recordId!);
704
+ const snapshots = yield* index.querySnapshotsJSON(recordIds);
705
+
706
+ expect(snapshots).toHaveLength(count);
707
+ const returnedIds = new Set(snapshots.map((s) => s.recordId));
708
+ for (const id of recordIds) {
709
+ expect(returnedIds.has(id)).toBe(true);
710
+ }
711
+ }, Effect.provide(TestLayer)),
712
+ );
713
+ });
564
714
  });
@@ -13,6 +13,10 @@ import type { ObjectId, SpaceId } from '@dxos/keys';
13
13
  import type { Index, IndexerObject } from './interface';
14
14
  import type { ObjectMeta } from './object-meta-index';
15
15
 
16
+ // SQLite bound-variable limit (SQLITE_LIMIT_VARIABLE_NUMBER) is 999 in most builds.
17
+ // Use 500 as a safe chunk size for IN (...) clauses.
18
+ const SQL_CHUNK_SIZE = 500;
19
+
16
20
  /**
17
21
  * The space and queue constrains are combined together using a logical OR.
18
22
  */
@@ -49,6 +53,18 @@ export interface FtsResult extends ObjectMeta {
49
53
  snapshot: string;
50
54
  }
51
55
 
56
+ /**
57
+ * Result of FTS query with rank.
58
+ */
59
+ export interface FtsQueryResult extends ObjectMeta {
60
+ /**
61
+ * Relevance rank from FTS5.
62
+ * Higher values indicate better matches.
63
+ * Uses BM25 algorithm when available, falls back to 1 for non-BM25 queries.
64
+ */
65
+ rank: number;
66
+ }
67
+
52
68
  /**
53
69
  * Escapes user input for safe FTS5 queries.
54
70
  *
@@ -92,7 +108,7 @@ export class FtsIndex implements Index {
92
108
  spaceId,
93
109
  includeAllQueues,
94
110
  queueIds,
95
- }: FtsQuery): Effect.Effect<readonly ObjectMeta[], SqlError.SqlError, SqlClient.SqlClient> {
111
+ }: FtsQuery): Effect.Effect<readonly FtsQueryResult[], SqlError.SqlError, SqlClient.SqlClient> {
96
112
  return Effect.gen(function* () {
97
113
  const trimmed = query.trim();
98
114
  if (trimmed.length === 0) {
@@ -106,6 +122,11 @@ export class FtsIndex implements Index {
106
122
  const terms = trimmed.split(/\s+/).filter(Boolean);
107
123
  const minTermLength = Math.min(...terms.map((t) => t.length));
108
124
 
125
+ // Use BM25 ranking for FTS5 MATCH queries, fall back to rank 1 for LIKE queries.
126
+ // BM25 returns negative values where lower (more negative) means better match,
127
+ // so we negate it to get higher = better.
128
+ const useBm25 = minTermLength >= 3;
129
+
109
130
  const conditions =
110
131
  minTermLength < 3
111
132
  ? // LIKE fallback - scan the entire table, AND all terms.
@@ -135,13 +156,36 @@ export class FtsIndex implements Index {
135
156
  conditions.push(sql`(${sql.or(sourceConditions)})`);
136
157
  }
137
158
 
138
- return yield* sql<ObjectMeta>`SELECT m.* FROM ftsIndex AS f JOIN objectMeta AS m ON f.rowid = m.recordId WHERE ${sql.and(conditions)}`;
159
+ if (useBm25) {
160
+ // Use BM25 ranking for FTS5 MATCH queries.
161
+ // BM25 returns negative values, negate to get higher = better match.
162
+ // Order by rank descending so best matches come first.
163
+ // Note: bm25() requires the actual table name, not an alias.
164
+ const rows = yield* sql<ObjectMeta & { rank: number }>`
165
+ SELECT m.*, -bm25(ftsIndex) AS rank
166
+ FROM ftsIndex AS f
167
+ JOIN objectMeta AS m ON f.rowid = m.recordId
168
+ WHERE ${sql.and(conditions)}
169
+ ORDER BY rank DESC
170
+ `;
171
+ return rows;
172
+ } else {
173
+ // LIKE fallback - no ranking available, default to 1.
174
+ const rows = yield* sql<ObjectMeta>`
175
+ SELECT m.*
176
+ FROM ftsIndex AS f
177
+ JOIN objectMeta AS m ON f.rowid = m.recordId
178
+ WHERE ${sql.and(conditions)}
179
+ `;
180
+ return rows.map((row) => ({ ...row, rank: 1 }));
181
+ }
139
182
  });
140
183
  }
141
184
 
142
185
  /**
143
186
  * Query snapshots by recordIds.
144
187
  * Returns the parsed JSON snapshots for queue objects.
188
+ * RecordIds not present in the FTS index are silently omitted from the result.
145
189
  */
146
190
  querySnapshotsJSON(
147
191
  recordIds: number[],
@@ -151,14 +195,26 @@ export class FtsIndex implements Index {
151
195
  return [];
152
196
  }
153
197
  const sql = yield* SqlClient.SqlClient;
154
- const results = yield* sql<{
155
- rowid: number;
156
- snapshot: string;
157
- }>`SELECT rowid, snapshot FROM ftsIndex WHERE rowid IN ${sql.in(recordIds)}`;
158
- return results.map((r) => ({
159
- recordId: r.rowid,
160
- snapshot: JSON.parse(r.snapshot),
161
- }));
198
+
199
+ // Chunk to avoid SQLite bound-variable limit (SQLITE_LIMIT_VARIABLE_NUMBER,
200
+ // typically 999 in wasm builds). 500 gives a safe margin.
201
+ const chunks: number[][] = [];
202
+ for (let i = 0; i < recordIds.length; i += SQL_CHUNK_SIZE) {
203
+ chunks.push(recordIds.slice(i, i + SQL_CHUNK_SIZE));
204
+ }
205
+
206
+ const allResults: { recordId: number; snapshot: Obj.JSON }[] = [];
207
+ for (const chunk of chunks) {
208
+ const rows = yield* sql<{
209
+ rowid: number;
210
+ snapshot: string;
211
+ }>`SELECT rowid, snapshot FROM ftsIndex WHERE rowid IN ${sql.in(chunk)}`;
212
+ for (const r of rows) {
213
+ allResults.push({ recordId: r.rowid, snapshot: JSON.parse(r.snapshot) });
214
+ }
215
+ }
216
+
217
+ return allResults;
162
218
  });
163
219
  }
164
220
 
@@ -19,6 +19,11 @@ export interface IndexerObject {
19
19
  * If null, `documentId` must be set.
20
20
  */
21
21
  queueId: ObjectId | null;
22
+ /**
23
+ * Queue subspace namespace (e.g. 'data', 'trace') the object lives in.
24
+ * Set together with `queueId`; null for non-queue objects.
25
+ */
26
+ queueNamespace: string | null;
22
27
  /**
23
28
  * Document id if object is from the automerge document.
24
29
  * If null, `queueId` must be set.
@@ -36,6 +41,11 @@ export interface IndexerObject {
36
41
  * JSON data of the object.
37
42
  */
38
43
  data: Obj.JSON;
44
+
45
+ /**
46
+ * Timestamp of the last update of the object.
47
+ */
48
+ updatedAt: number;
39
49
  }
40
50
 
41
51
  /**