@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
package/src/reader.ts CHANGED
@@ -1,18 +1,66 @@
1
- import { existsSync, openSync, readSync, closeSync } from "node:fs";
2
1
  import type { Config } from "./config";
3
2
  import type { SqliteDurableStore, SegmentRow } from "./db/db";
4
3
  import type { ObjectStore } from "./objectstore/interface";
4
+ import {
5
+ type CompiledReadFilter,
6
+ type ReadFilterColumnClause,
7
+ collectPositiveColumnFilterClauses,
8
+ collectPositiveExactFilterClauses,
9
+ evaluateReadFilterResult,
10
+ } from "./read_filter";
11
+ import { decodeJsonPayloadResult } from "./schema/read_json";
12
+ import { SchemaRegistryStore } from "./schema/registry";
5
13
  import { parseOffsetResult, offsetToSeqOrNeg1, encodeOffset } from "./offset";
6
- import { decodeBlockResult, iterateBlocksResult, parseBlockHeaderResult, parseFooterBytes, DSB3_HEADER_BYTES, type SegmentFooter } from "./segment/format";
14
+ import {
15
+ type BlockIndexEntry,
16
+ decodeBlockResult,
17
+ iterateBlocksResult,
18
+ parseBlockHeaderResult,
19
+ parseFooter,
20
+ parseFooterBytes,
21
+ DSB3_HEADER_BYTES,
22
+ } from "./segment/format";
7
23
  import { SegmentDiskCache, type SegmentCacheStats } from "./segment/cache";
24
+ import { loadSegmentBytesCached, loadSegmentSource, readRangeFromSource, type SegmentReadSource } from "./segment/cached_segment";
8
25
  import { Bloom256 } from "./util/bloom256";
9
- import { segmentObjectKey, streamHash16Hex } from "./util/stream_paths";
10
26
  import { readU32BE } from "./util/endian";
11
- import { retry, type RetryOptions } from "./util/retry";
12
- import { LruCache } from "./util/lru";
13
- import type { IndexManager } from "./index/indexer";
27
+ import { type RetryOptions } from "./util/retry";
28
+ import type { IndexCandidate, StreamIndexLookup } from "./index/indexer";
14
29
  import { dsError } from "./util/ds_error.ts";
15
30
  import { Result } from "better-result";
31
+ import { filterDocIdsByColumnResult } from "./search/col_runtime";
32
+ import {
33
+ type AggregateRequest,
34
+ cloneAggMeasureState,
35
+ extractRollupContributionResult,
36
+ extractRollupEligibility,
37
+ formatAggMeasureState,
38
+ mergeAggMeasureState,
39
+ } from "./search/aggregate";
40
+ import {
41
+ type CompiledSearchQuery,
42
+ type SearchColumnClause,
43
+ type SearchEvaluation,
44
+ type SearchExactClause,
45
+ type SearchFtsClause,
46
+ type SearchRequest,
47
+ type SearchSortSpec,
48
+ collectPositiveSearchColumnClauses,
49
+ collectPositiveSearchExactClauses,
50
+ collectPositiveSearchFtsClauses,
51
+ evaluateSearchQueryResult,
52
+ extractSearchHitFieldsResult,
53
+ } from "./search/query";
54
+ import { filterDocIdsByFtsClausesResult } from "./search/fts_runtime";
55
+ import { canonicalizeColumnValue, canonicalizeExactValue } from "./search/schema";
56
+ import { encodeSortableBool, encodeSortableFloat64, encodeSortableInt64 } from "./search/column_encoding";
57
+ import type { SearchRollupConfig } from "./schema/registry";
58
+ import type { AggMeasureState } from "./search/agg_format";
59
+ import type { MetricsBlockSectionView } from "./profiles/metrics/block_format";
60
+ import { materializeMetricsBlockRecord } from "./profiles/metrics/normalize";
61
+ import { buildDesiredSearchCompanionPlan, hashSearchCompanionPlan } from "./search/companion_plan";
62
+ import { RuntimeMemorySampler } from "./runtime_memory_sampler";
63
+ import type { MemoryPressureMonitor } from "./memory";
16
64
 
17
65
  export type ReadFormat = "raw" | "json";
18
66
 
@@ -26,6 +74,87 @@ export type ReadBatch = {
26
74
  endOffsetSeq: bigint;
27
75
  nextOffsetSeq: bigint;
28
76
  records: Array<{ offset: bigint; payload: Uint8Array }>; // payload bytes in wire order
77
+ filterScannedBytes?: number;
78
+ filterScanLimitBytes?: number;
79
+ filterScanLimitReached?: boolean;
80
+ };
81
+
82
+ export type SearchHit = {
83
+ offset: string;
84
+ score: number;
85
+ sort: unknown[];
86
+ fields: Record<string, unknown>;
87
+ source: unknown;
88
+ };
89
+
90
+ export type SearchResultBatch = {
91
+ stream: string;
92
+ snapshotEndOffset: string;
93
+ tookMs: number;
94
+ timedOut: boolean;
95
+ timeoutMs: number | null;
96
+ coverage: {
97
+ mode: "complete" | "published";
98
+ complete: boolean;
99
+ streamHeadOffset: string;
100
+ visibleThroughOffset: string;
101
+ visibleThroughPrimaryTimestampMax: string | null;
102
+ oldestOmittedAppendAt: string | null;
103
+ possibleMissingEventsUpperBound: number;
104
+ possibleMissingUploadedSegments: number;
105
+ possibleMissingSealedRows: number;
106
+ possibleMissingWalRows: number;
107
+ indexedSegments: number;
108
+ indexedSegmentTimeMs: number;
109
+ ftsSectionGetMs: number;
110
+ ftsDecodeMs: number;
111
+ ftsClauseEstimateMs: number;
112
+ scannedSegments: number;
113
+ scannedSegmentTimeMs: number;
114
+ scannedTailDocs: number;
115
+ scannedTailTimeMs: number;
116
+ exactCandidateTimeMs: number;
117
+ indexFamiliesUsed: string[];
118
+ };
119
+ total: {
120
+ value: number;
121
+ relation: "eq" | "gte";
122
+ };
123
+ hits: SearchHit[];
124
+ nextSearchAfter: unknown[] | null;
125
+ };
126
+
127
+ export type AggregateResultBatch = {
128
+ stream: string;
129
+ rollup: string;
130
+ from: string;
131
+ to: string;
132
+ interval: string;
133
+ coverage: {
134
+ mode: "complete" | "published";
135
+ complete: boolean;
136
+ streamHeadOffset: string;
137
+ visibleThroughOffset: string;
138
+ visibleThroughPrimaryTimestampMax: string | null;
139
+ oldestOmittedAppendAt: string | null;
140
+ possibleMissingEventsUpperBound: number;
141
+ possibleMissingUploadedSegments: number;
142
+ possibleMissingSealedRows: number;
143
+ possibleMissingWalRows: number;
144
+ usedRollups: boolean;
145
+ indexedSegments: number;
146
+ scannedSegments: number;
147
+ scannedTailDocs: number;
148
+ indexFamiliesUsed: string[];
149
+ };
150
+ buckets: Array<{
151
+ start: string;
152
+ end: string;
153
+ groups: Array<{
154
+ key: Record<string, string | null>;
155
+ measures: Record<string, unknown>;
156
+ }>;
157
+ }>;
29
158
  };
30
159
 
31
160
  export type ReaderError =
@@ -34,7 +163,50 @@ export type ReaderError =
34
163
  | { kind: "invalid_offset"; message: string }
35
164
  | { kind: "internal"; message: string };
36
165
 
37
- type FooterCacheEntry = { footer: SegmentFooter | null; footerStart: number };
166
+ const READ_FILTER_SCAN_LIMIT_BYTES = 100 * 1024 * 1024;
167
+ type SegmentCandidateInfo = { segments: Set<number> | null; indexedThrough: number };
168
+ type SearchFamilyCandidateInfo = { docIds: Set<number> | null; usedFamilies: Set<string> };
169
+ type SearchHitInternal = {
170
+ offsetSeq: bigint;
171
+ offset: string;
172
+ score: number;
173
+ sortInternal: Array<bigint | number | string | boolean | null>;
174
+ sortResponse: unknown[];
175
+ fields: Record<string, unknown>;
176
+ source: unknown;
177
+ };
178
+ type AggregateGroupInternal = {
179
+ key: Record<string, string | null>;
180
+ measures: Record<string, AggMeasureState>;
181
+ };
182
+ type SearchCursorFieldBound = {
183
+ kind: "field";
184
+ sort: Extract<SearchSortSpec, { kind: "field" }>;
185
+ after: bigint | number | string | boolean | null;
186
+ encoded: Uint8Array | null;
187
+ };
188
+ type PublishedCoverageState = {
189
+ mode: "complete" | "published";
190
+ complete: boolean;
191
+ canSearchWalTail: boolean;
192
+ publishedSegmentCount: number;
193
+ visiblePublishedSegmentCount: number;
194
+ streamHeadOffset: string;
195
+ visibleThroughSeq: bigint;
196
+ visibleThroughOffset: string;
197
+ visibleThroughPrimaryTimestampMax: string | null;
198
+ oldestOmittedAppendAt: string | null;
199
+ possibleMissingEventsUpperBound: number;
200
+ possibleMissingUploadedSegments: number;
201
+ possibleMissingSealedRows: number;
202
+ possibleMissingWalRows: number;
203
+ };
204
+
205
+ type PlannedReadSegments = {
206
+ segments: SegmentRow[];
207
+ sealedEndSeq: bigint;
208
+ };
209
+ type PlannedReadOrder = "asc" | "desc";
38
210
 
39
211
  function errorMessage(e: unknown): string {
40
212
  return String((e as any)?.message ?? e);
@@ -44,109 +216,158 @@ function utf8Bytes(s: string): Uint8Array {
44
216
  return new TextEncoder().encode(s);
45
217
  }
46
218
 
47
- function objectKeyForSegment(seg: SegmentRow): string {
48
- const streamHash = streamHash16Hex(seg.stream);
49
- return segmentObjectKey(streamHash, seg.segment_index);
50
- }
51
-
52
- function readRangeFromFile(path: string, start: number, end: number): Uint8Array {
53
- const len = end - start + 1;
54
- const fd = openSync(path, "r");
219
+ function parseCompanionSections(value: string): Set<string> {
55
220
  try {
56
- const buf = Buffer.alloc(len);
57
- const bytesRead = readSync(fd, buf, 0, len, start);
58
- if (bytesRead !== len) throw dsError("short read");
59
- return new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
60
- } finally {
61
- closeSync(fd);
221
+ const parsed = JSON.parse(value);
222
+ return new Set(Array.isArray(parsed) ? parsed.filter((entry) => typeof entry === "string") : []);
223
+ } catch {
224
+ return new Set();
62
225
  }
63
226
  }
64
227
 
65
- async function readSegmentRange(
228
+ async function loadSegmentBytes(
66
229
  os: ObjectStore,
67
230
  seg: SegmentRow,
68
- start: number,
69
- end: number,
70
231
  diskCache?: SegmentDiskCache,
71
232
  retryOpts?: RetryOptions
72
233
  ): Promise<Uint8Array> {
73
- const local = seg.local_path;
74
- if (existsSync(local)) return readRangeFromFile(local, start, end);
75
-
76
- const objectKey = objectKeyForSegment(seg);
77
- if (diskCache && diskCache.has(objectKey)) {
78
- diskCache.recordHit();
79
- diskCache.touch(objectKey);
80
- return readRangeFromFile(diskCache.getPath(objectKey), start, end);
81
- }
82
- if (diskCache) diskCache.recordMiss();
83
-
84
- const bytes = await retry(
85
- async () => {
86
- const res = await os.get(objectKey, { range: { start, end } });
87
- if (!res) throw dsError(`object store missing segment: ${objectKey}`);
88
- return res;
89
- },
90
- retryOpts ?? { retries: 0, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 0 }
91
- );
92
- if (diskCache && start === 0 && end === seg.size_bytes - 1) {
93
- diskCache.put(objectKey, bytes);
94
- }
95
- return bytes;
234
+ return loadSegmentBytesCached(os, seg, diskCache, retryOpts);
96
235
  }
97
236
 
98
- async function loadSegmentBytes(
99
- os: ObjectStore,
100
- seg: SegmentRow,
101
- diskCache?: SegmentDiskCache,
102
- retryOpts?: RetryOptions
103
- ): Promise<Uint8Array> {
104
- return readSegmentRange(os, seg, 0, seg.size_bytes - 1, diskCache, retryOpts);
237
+ function loadSegmentDataLimitFromSource(seg: SegmentRow, source: SegmentReadSource): number {
238
+ if (seg.size_bytes < 8) return seg.size_bytes;
239
+ const tail = readRangeFromSource(source, seg.size_bytes - 8, seg.size_bytes - 1);
240
+ const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
241
+ if (magic !== "DSF1") return seg.size_bytes;
242
+ const footerLen = readU32BE(tail, 0);
243
+ const footerStart = seg.size_bytes - 8 - footerLen;
244
+ return footerStart >= 0 ? footerStart : seg.size_bytes;
105
245
  }
106
246
 
107
- async function loadSegmentFooter(
108
- os: ObjectStore,
109
- seg: SegmentRow,
110
- diskCache?: SegmentDiskCache,
111
- retryOpts?: RetryOptions,
112
- footerCache?: LruCache<string, FooterCacheEntry>
113
- ): Promise<{ footer: SegmentFooter | null; footerStart: number } | null> {
114
- const cacheKey = seg.segment_id;
115
- if (footerCache) {
116
- const cached = footerCache.get(cacheKey);
117
- if (cached) return cached;
247
+ function findFirstRelevantBlockIndex(blocks: BlockIndexEntry[], seq: bigint): number {
248
+ if (blocks.length <= 1) return 0;
249
+ let lo = 0;
250
+ let hi = blocks.length - 1;
251
+ let best = 0;
252
+ while (lo <= hi) {
253
+ const mid = (lo + hi) >>> 1;
254
+ if (blocks[mid]!.firstOffset <= seq) {
255
+ best = mid;
256
+ lo = mid + 1;
257
+ } else {
258
+ hi = mid - 1;
259
+ }
118
260
  }
261
+ return best;
262
+ }
263
+
264
+ function loadSegmentFooterBlocksFromSource(seg: SegmentRow, source: SegmentReadSource): BlockIndexEntry[] | null {
119
265
  if (seg.size_bytes < 8) return null;
120
- const tail = await readSegmentRange(os, seg, seg.size_bytes - 8, seg.size_bytes - 1, diskCache, retryOpts);
266
+ const tail = readRangeFromSource(source, seg.size_bytes - 8, seg.size_bytes - 1);
121
267
  const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
122
268
  if (magic !== "DSF1") return null;
123
269
  const footerLen = readU32BE(tail, 0);
124
270
  const footerStart = seg.size_bytes - 8 - footerLen;
125
271
  if (footerStart < 0) return null;
126
- const footerBytes = await readSegmentRange(os, seg, footerStart, footerStart + footerLen - 1, diskCache, retryOpts);
272
+ const footerBytes = readRangeFromSource(source, footerStart, footerStart + footerLen - 1);
127
273
  const footer = parseFooterBytes(footerBytes);
128
- const result = { footer, footerStart };
129
- if (footerCache) footerCache.set(cacheKey, result);
130
- return result;
274
+ return footer?.blocks ?? null;
131
275
  }
132
276
 
133
277
  export class StreamReader {
134
278
  private readonly config: Config;
135
279
  private readonly db: SqliteDurableStore;
136
280
  private readonly os: ObjectStore;
281
+ private readonly registry: SchemaRegistryStore;
137
282
  private readonly diskCache?: SegmentDiskCache;
138
- private readonly footerCache?: LruCache<string, FooterCacheEntry>;
139
- private readonly index?: IndexManager;
283
+ private readonly index?: StreamIndexLookup;
284
+ private readonly memorySampler?: RuntimeMemorySampler;
285
+ private readonly memory?: MemoryPressureMonitor;
140
286
 
141
- constructor(config: Config, db: SqliteDurableStore, os: ObjectStore, diskCache?: SegmentDiskCache, index?: IndexManager) {
287
+ constructor(
288
+ config: Config,
289
+ db: SqliteDurableStore,
290
+ os: ObjectStore,
291
+ registry: SchemaRegistryStore,
292
+ diskCache?: SegmentDiskCache,
293
+ index?: StreamIndexLookup,
294
+ memorySampler?: RuntimeMemorySampler,
295
+ memory?: MemoryPressureMonitor
296
+ ) {
142
297
  this.config = config;
143
298
  this.db = db;
144
299
  this.os = os;
300
+ this.registry = registry;
145
301
  this.diskCache = diskCache;
146
302
  this.index = index;
147
- if (config.segmentFooterCacheEntries > 0) {
148
- this.footerCache = new LruCache(config.segmentFooterCacheEntries);
303
+ this.memorySampler = memorySampler;
304
+ this.memory = memory;
305
+ }
306
+
307
+ private planSealedReadSegments(
308
+ stream: string,
309
+ startSeq: bigint,
310
+ sealedEndSeq: bigint,
311
+ candidateSegments: Set<number> | null,
312
+ indexedThrough: number,
313
+ order: PlannedReadOrder = "asc"
314
+ ): PlannedReadSegments | null {
315
+ if (startSeq > sealedEndSeq) return { segments: [], sealedEndSeq };
316
+ if (candidateSegments == null) return null;
317
+
318
+ const startSeg = this.db.findSegmentForOffset(stream, startSeq);
319
+ const endSeg = this.db.findSegmentForOffset(stream, sealedEndSeq);
320
+ if (!startSeg || !endSeg) return null;
321
+
322
+ const startIndex = startSeg.segment_index;
323
+ const endIndex = endSeg.segment_index;
324
+ const plannedIndexes: number[] = [];
325
+ const seenIndexes = new Set<number>();
326
+ const indexedPrefixEnd = Math.min(endIndex, indexedThrough - 1);
327
+
328
+ if (order === "asc") {
329
+ if (startIndex <= indexedPrefixEnd) {
330
+ const sortedCandidateIndexes = Array.from(candidateSegments)
331
+ .filter((segmentIndex) => segmentIndex >= startIndex && segmentIndex <= indexedPrefixEnd)
332
+ .sort((a, b) => a - b);
333
+ for (const segmentIndex of sortedCandidateIndexes) {
334
+ if (seenIndexes.has(segmentIndex)) continue;
335
+ plannedIndexes.push(segmentIndex);
336
+ seenIndexes.add(segmentIndex);
337
+ }
338
+ }
339
+
340
+ const tailStartIndex = Math.max(startIndex, indexedThrough);
341
+ for (let segmentIndex = tailStartIndex; segmentIndex <= endIndex; segmentIndex++) {
342
+ if (seenIndexes.has(segmentIndex)) continue;
343
+ plannedIndexes.push(segmentIndex);
344
+ seenIndexes.add(segmentIndex);
345
+ }
346
+ } else {
347
+ for (let segmentIndex = endIndex; segmentIndex >= Math.max(startIndex, indexedThrough); segmentIndex--) {
348
+ if (seenIndexes.has(segmentIndex)) continue;
349
+ plannedIndexes.push(segmentIndex);
350
+ seenIndexes.add(segmentIndex);
351
+ }
352
+ if (startIndex <= indexedPrefixEnd) {
353
+ const sortedCandidateIndexes = Array.from(candidateSegments)
354
+ .filter((segmentIndex) => segmentIndex >= startIndex && segmentIndex <= indexedPrefixEnd)
355
+ .sort((a, b) => b - a);
356
+ for (const segmentIndex of sortedCandidateIndexes) {
357
+ if (seenIndexes.has(segmentIndex)) continue;
358
+ plannedIndexes.push(segmentIndex);
359
+ seenIndexes.add(segmentIndex);
360
+ }
361
+ }
362
+ }
363
+
364
+ const plannedSegments: SegmentRow[] = [];
365
+ for (const segmentIndex of plannedIndexes) {
366
+ const seg = this.db.getSegmentByIndex(stream, segmentIndex);
367
+ if (!seg) return null;
368
+ plannedSegments.push(seg);
149
369
  }
370
+ return { segments: plannedSegments, sealedEndSeq };
150
371
  }
151
372
 
152
373
  cacheStats(): SegmentCacheStats | null {
@@ -162,6 +383,149 @@ export class StreamReader {
162
383
  };
163
384
  }
164
385
 
386
+ private isoTimestampFromMs(value: bigint | null): string | null {
387
+ if (value == null) return null;
388
+ const ms = Number(value);
389
+ if (!Number.isFinite(ms) || ms < 0) return null;
390
+ return new Date(ms).toISOString();
391
+ }
392
+
393
+ private shouldSearchWalTail(
394
+ srow: { pending_rows: bigint; pending_bytes: bigint; last_append_ms: bigint; segment_in_progress: number },
395
+ hasOutstandingPublishedSegments: boolean,
396
+ hasOutstandingCompanions: boolean
397
+ ): boolean {
398
+ if (srow.pending_rows <= 0n) return false;
399
+ if (hasOutstandingPublishedSegments || hasOutstandingCompanions) return false;
400
+ if (srow.segment_in_progress !== 0) return false;
401
+ const quietPeriodMs = Math.max(0, this.config.searchWalOverlayQuietPeriodMs);
402
+ const quietForMs = Number(this.db.nowMs() - srow.last_append_ms);
403
+ if (!Number.isFinite(quietForMs) || quietForMs < quietPeriodMs) return false;
404
+ if (srow.pending_bytes > BigInt(this.config.searchWalOverlayMaxBytes)) return false;
405
+ if (srow.pending_rows > BigInt(this.config.segmentTargetRows)) return false;
406
+ return true;
407
+ }
408
+
409
+ private computeOldestOmittedAppendAt(
410
+ stream: string,
411
+ srow: { uploaded_through: bigint; sealed_through: bigint; pending_rows: bigint },
412
+ visiblePublishedSegmentCount: number,
413
+ publishedSegmentCount: number,
414
+ shouldSearchWalTail: boolean
415
+ ): string | null {
416
+ if (visiblePublishedSegmentCount < publishedSegmentCount) {
417
+ const firstOmittedSegment = this.db.getSegmentByIndex(stream, visiblePublishedSegmentCount);
418
+ return this.isoTimestampFromMs(firstOmittedSegment?.last_append_ms ?? null);
419
+ }
420
+ if (srow.sealed_through > srow.uploaded_through) {
421
+ const firstSealedOmitted = this.db.findSegmentForOffset(stream, srow.uploaded_through + 1n);
422
+ return this.isoTimestampFromMs(firstSealedOmitted?.last_append_ms ?? null);
423
+ }
424
+ if (srow.pending_rows > 0n && !shouldSearchWalTail) {
425
+ return this.isoTimestampFromMs(this.db.getWalOldestTimestampMs(stream));
426
+ }
427
+ return null;
428
+ }
429
+
430
+ private computePublishedCoverageState(
431
+ stream: string,
432
+ srow: {
433
+ epoch: number;
434
+ next_offset: bigint;
435
+ sealed_through: bigint;
436
+ uploaded_through: bigint;
437
+ pending_rows: bigint;
438
+ pending_bytes: bigint;
439
+ last_append_ms: bigint;
440
+ segment_in_progress: number;
441
+ },
442
+ registry: { search?: { fields: Record<string, unknown> } }
443
+ ): PublishedCoverageState {
444
+ const totalSegmentCount = this.db.countSegmentsForStream(stream);
445
+ const publishedSegmentCount =
446
+ srow.uploaded_through >= 0n
447
+ ? ((this.db.findSegmentForOffset(stream, srow.uploaded_through)?.segment_index ?? -1) + 1)
448
+ : 0;
449
+
450
+ const desiredPlan = buildDesiredSearchCompanionPlan(registry as any);
451
+ const planHasFamilies = Object.values(desiredPlan.families).some(Boolean);
452
+ let visiblePublishedSegmentCount = publishedSegmentCount;
453
+ let visibleThroughPrimaryTimestampMax: string | null = null;
454
+ if (planHasFamilies) {
455
+ const desiredHash = hashSearchCompanionPlan(desiredPlan);
456
+ const companionPlanRow = this.db.getSearchCompanionPlan(stream);
457
+ const desiredGeneration =
458
+ companionPlanRow == null
459
+ ? 1
460
+ : companionPlanRow.plan_hash === desiredHash
461
+ ? companionPlanRow.generation
462
+ : companionPlanRow.generation + 1;
463
+ const currentCompanions = this.db
464
+ .listSearchSegmentCompanions(stream)
465
+ .filter((row) => row.plan_generation === desiredGeneration);
466
+ const currentSegments = new Set<number>();
467
+ for (const row of currentCompanions) {
468
+ const sections = parseCompanionSections(row.sections_json);
469
+ const hasEnabledFamily = Object.entries(desiredPlan.families).some(([family, enabled]) => enabled && sections.has(family));
470
+ if (hasEnabledFamily) currentSegments.add(row.segment_index);
471
+ }
472
+ visiblePublishedSegmentCount = 0;
473
+ while (visiblePublishedSegmentCount < publishedSegmentCount && currentSegments.has(visiblePublishedSegmentCount)) {
474
+ visiblePublishedSegmentCount += 1;
475
+ }
476
+ if (visiblePublishedSegmentCount > 0) {
477
+ const visibleCompanionRow = currentCompanions.find((row) => row.segment_index === visiblePublishedSegmentCount - 1) ?? null;
478
+ visibleThroughPrimaryTimestampMax = this.isoTimestampFromMs(visibleCompanionRow?.primary_timestamp_max_ms ?? null);
479
+ }
480
+ }
481
+
482
+ const hasOutstandingPublishedSegments = publishedSegmentCount < totalSegmentCount;
483
+ const hasOutstandingCompanions = planHasFamilies && visiblePublishedSegmentCount < publishedSegmentCount;
484
+ const canSearchWalTail = this.shouldSearchWalTail(srow, hasOutstandingPublishedSegments, hasOutstandingCompanions);
485
+ const omitWalTail = srow.pending_rows > 0n && !canSearchWalTail;
486
+
487
+ let visibleThroughSeq = srow.next_offset - 1n;
488
+ if (hasOutstandingPublishedSegments || hasOutstandingCompanions || omitWalTail) {
489
+ if (visiblePublishedSegmentCount > 0) {
490
+ visibleThroughSeq = this.db.getSegmentByIndex(stream, visiblePublishedSegmentCount - 1)?.end_offset ?? -1n;
491
+ } else {
492
+ visibleThroughSeq = -1n;
493
+ }
494
+ }
495
+
496
+ const possibleMissingUploadedSegments = Math.max(0, publishedSegmentCount - visiblePublishedSegmentCount);
497
+ const hasOmittedPublishedSuffix = hasOutstandingPublishedSegments || hasOutstandingCompanions;
498
+ const possibleMissingUploadedRows = hasOmittedPublishedSuffix && srow.uploaded_through > visibleThroughSeq ? Number(srow.uploaded_through - visibleThroughSeq) : 0;
499
+ const possibleMissingSealedRows = hasOmittedPublishedSuffix && srow.sealed_through > srow.uploaded_through ? Number(srow.sealed_through - srow.uploaded_through) : 0;
500
+ const possibleMissingWalRows = omitWalTail ? Number(srow.pending_rows) : 0;
501
+ const possibleMissingEventsUpperBound = possibleMissingUploadedRows + possibleMissingSealedRows + possibleMissingWalRows;
502
+ const streamHeadOffset = encodeOffset(srow.epoch, srow.next_offset - 1n);
503
+ const oldestOmittedAppendAt = this.computeOldestOmittedAppendAt(
504
+ stream,
505
+ srow,
506
+ visiblePublishedSegmentCount,
507
+ publishedSegmentCount,
508
+ canSearchWalTail
509
+ );
510
+
511
+ return {
512
+ mode: possibleMissingEventsUpperBound === 0 ? "complete" : "published",
513
+ complete: possibleMissingEventsUpperBound === 0,
514
+ canSearchWalTail,
515
+ publishedSegmentCount,
516
+ visiblePublishedSegmentCount,
517
+ streamHeadOffset,
518
+ visibleThroughSeq,
519
+ visibleThroughOffset: encodeOffset(srow.epoch, visibleThroughSeq),
520
+ visibleThroughPrimaryTimestampMax,
521
+ oldestOmittedAppendAt,
522
+ possibleMissingEventsUpperBound,
523
+ possibleMissingUploadedSegments,
524
+ possibleMissingSealedRows,
525
+ possibleMissingWalRows,
526
+ };
527
+ }
528
+
165
529
  async seekOffsetByTimestampResult(stream: string, sinceMs: bigint, key: string | null): Promise<Result<string, ReaderError>> {
166
530
  const srow = this.db.getStream(stream);
167
531
  if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
@@ -171,10 +535,17 @@ export class StreamReader {
171
535
  try {
172
536
  const sinceNs = sinceMs * 1_000_000n;
173
537
  const keyBytes = key ? utf8Bytes(key) : null;
538
+ const candidateInfo = await this.resolveCandidateSegments(stream, keyBytes, null);
539
+ const plannedSealedSegments = this.planSealedReadSegments(
540
+ stream,
541
+ 0n,
542
+ srow.sealed_through,
543
+ candidateInfo.segments,
544
+ candidateInfo.indexedThrough,
545
+ "asc"
546
+ );
174
547
 
175
- // Scan segments in order.
176
- const segments = this.db.listSegmentsForStream(stream);
177
- for (const seg of segments) {
548
+ for (const seg of plannedSealedSegments?.segments ?? this.db.listSegmentsForStream(stream)) {
178
549
  const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
179
550
  let curOffset = seg.start_offset;
180
551
  for (const blockRes of iterateBlocksResult(segBytes)) {
@@ -224,8 +595,14 @@ export class StreamReader {
224
595
  return res.value;
225
596
  }
226
597
 
227
- async readResult(args: { stream: string; offset: string; key: string | null; format: ReadFormat }): Promise<Result<ReadBatch, ReaderError>> {
228
- const { stream, offset, key, format } = args;
598
+ async readResult(args: {
599
+ stream: string;
600
+ offset: string;
601
+ key: string | null;
602
+ format: ReadFormat;
603
+ filter?: CompiledReadFilter | null;
604
+ }): Promise<Result<ReadBatch, ReaderError>> {
605
+ const { stream, offset, key, format, filter = null } = args;
229
606
  const srow = this.db.getStream(stream);
230
607
  if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
231
608
  if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
@@ -246,6 +623,8 @@ export class StreamReader {
246
623
 
247
624
  const results: Array<{ offset: bigint; payload: Uint8Array }> = [];
248
625
  let bytesOut = 0;
626
+ let filterScannedBytes = 0;
627
+ let filterScanLimitReached = false;
249
628
 
250
629
  // Nothing to read.
251
630
  if (desiredOffset > endOffsetNum) {
@@ -259,16 +638,113 @@ export class StreamReader {
259
638
  endOffsetSeq: endOffsetNum,
260
639
  nextOffsetSeq: startOffsetExclusive,
261
640
  records: [],
641
+ ...(filter
642
+ ? {
643
+ filterScannedBytes,
644
+ filterScanLimitBytes: READ_FILTER_SCAN_LIMIT_BYTES,
645
+ filterScanLimitReached,
646
+ }
647
+ : {}),
262
648
  });
263
649
  }
264
650
 
265
651
  let seq = desiredOffset;
266
652
  const keyBytes = key ? utf8Bytes(key) : null;
267
- const indexInfo = keyBytes && this.index ? await this.index.candidateSegments(stream, keyBytes) : null;
268
- const candidateSegments = indexInfo?.segments ?? null;
269
- const indexedThrough = indexInfo?.indexedThrough ?? 0;
653
+ const candidateInfo = await this.resolveCandidateSegments(stream, keyBytes, filter);
654
+ const candidateSegments = candidateInfo.segments;
655
+ const indexedThrough = candidateInfo.indexedThrough;
656
+ const columnClauses = filter ? collectPositiveColumnFilterClauses(filter) : [];
657
+ const filterRegistryRes = filter ? this.registry.getRegistryResult(stream) : Result.ok(null);
658
+ if (Result.isError(filterRegistryRes)) return Result.err({ kind: "internal", message: filterRegistryRes.error.message });
659
+ const filterRegistry = filterRegistryRes.value;
660
+
661
+ const evaluateRecordResult = (
662
+ offset: bigint,
663
+ routingKey: Uint8Array | null | undefined,
664
+ payload: Uint8Array
665
+ ): Result<{ matched: boolean; stop: boolean }, ReaderError> => {
666
+ if (filter) {
667
+ filterScannedBytes += payload.byteLength;
668
+ }
669
+ if (keyBytes && (!routingKey || !bytesEqual(routingKey, keyBytes))) {
670
+ return Result.ok({
671
+ matched: false,
672
+ stop: !!filter && filterScannedBytes >= READ_FILTER_SCAN_LIMIT_BYTES,
673
+ });
674
+ }
675
+ if (!filter) return Result.ok({ matched: true, stop: false });
676
+ const valueRes = decodeJsonPayloadResult(this.registry, stream, offset, payload);
677
+ if (Result.isError(valueRes)) {
678
+ return Result.err({ kind: "internal", message: valueRes.error.message });
679
+ }
680
+ const matchesRes = evaluateReadFilterResult(filterRegistry!, offset, filter, valueRes.value);
681
+ if (Result.isError(matchesRes)) return Result.err({ kind: "internal", message: matchesRes.error.message });
682
+ return Result.ok({
683
+ matched: matchesRes.value,
684
+ stop: filterScannedBytes >= READ_FILTER_SCAN_LIMIT_BYTES,
685
+ });
686
+ };
687
+
688
+ const scanSegmentBytes = async (
689
+ segBytes: Uint8Array,
690
+ seg: SegmentRow,
691
+ allowedDocIds: Set<number> | null
692
+ ): Promise<Result<void, ReaderError>> => {
693
+ const footer = parseFooter(segBytes)?.footer;
694
+ if (footer) {
695
+ for (let blockIndex = findFirstRelevantBlockIndex(footer.blocks, seq); blockIndex < footer.blocks.length; blockIndex++) {
696
+ const block = footer.blocks[blockIndex]!;
697
+ const blockStart = block.firstOffset;
698
+ const blockEnd = blockStart + BigInt(block.recordCount) - 1n;
699
+ if (blockEnd < seq) continue;
700
+ if (blockStart > endOffsetNum) break;
701
+
702
+ if (keyBytes) {
703
+ const headerBytes = segBytes.subarray(block.blockOffset, block.blockOffset + DSB3_HEADER_BYTES);
704
+ const headerRes = parseBlockHeaderResult(headerBytes);
705
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
706
+ const bloom = new Bloom256(headerRes.value.bloom);
707
+ if (!bloom.maybeHas(keyBytes)) continue;
708
+ }
709
+
710
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
711
+ const blockBytes = segBytes.subarray(block.blockOffset, block.blockOffset + totalLen);
712
+ const decodedRes = decodeBlockResult(blockBytes);
713
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
714
+ const decoded = decodedRes.value;
715
+ let curOffset = blockStart;
716
+ for (const r of decoded.records) {
717
+ if (curOffset < seq) {
718
+ curOffset += 1n;
719
+ continue;
720
+ }
721
+ if (curOffset > endOffsetNum) break;
722
+ const localDocId = Number(curOffset - seg.start_offset);
723
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
724
+ curOffset += 1n;
725
+ continue;
726
+ }
727
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
728
+ if (Result.isError(matchRes)) return matchRes;
729
+ if (matchRes.value.matched) {
730
+ results.push({ offset: curOffset, payload: r.payload });
731
+ bytesOut += r.payload.byteLength;
732
+ }
733
+ curOffset += 1n;
734
+ if (matchRes.value.stop) {
735
+ filterScanLimitReached = true;
736
+ seq = curOffset;
737
+ return Result.ok(undefined);
738
+ }
739
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
740
+ seq = curOffset;
741
+ return Result.ok(undefined);
742
+ }
743
+ }
744
+ }
745
+ return Result.ok(undefined);
746
+ }
270
747
 
271
- const scanSegmentBytes = async (segBytes: Uint8Array, seg: SegmentRow): Promise<Result<void, ReaderError>> => {
272
748
  let curOffset = seg.start_offset;
273
749
  for (const blockRes of iterateBlocksResult(segBytes)) {
274
750
  if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
@@ -286,13 +762,23 @@ export class StreamReader {
286
762
  continue;
287
763
  }
288
764
  if (curOffset > endOffsetNum) break;
289
- if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
765
+ const localDocId = Number(curOffset - seg.start_offset);
766
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
290
767
  curOffset += 1n;
291
768
  continue;
292
769
  }
293
- results.push({ offset: curOffset, payload: r.payload });
294
- bytesOut += r.payload.byteLength;
770
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
771
+ if (Result.isError(matchRes)) return matchRes;
772
+ if (matchRes.value.matched) {
773
+ results.push({ offset: curOffset, payload: r.payload });
774
+ bytesOut += r.payload.byteLength;
775
+ }
295
776
  curOffset += 1n;
777
+ if (matchRes.value.stop) {
778
+ filterScanLimitReached = true;
779
+ seq = curOffset;
780
+ return Result.ok(undefined);
781
+ }
296
782
  if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
297
783
  seq = curOffset;
298
784
  return Result.ok(undefined);
@@ -302,91 +788,219 @@ export class StreamReader {
302
788
  return Result.ok(undefined);
303
789
  };
304
790
 
305
- // 1) Read from sealed segments.
306
- while (seq <= endOffsetNum && seq <= srow.sealed_through) {
307
- const seg = this.db.findSegmentForOffset(stream, seq);
308
- if (!seg) {
309
- // Corruption in local metadata: sealed_through points past segments table.
310
- break;
791
+ const scanSegmentSource = async (
792
+ source: SegmentReadSource,
793
+ seg: SegmentRow,
794
+ allowedDocIds: Set<number> | null
795
+ ): Promise<Result<void, ReaderError>> => {
796
+ const footerBlocks = loadSegmentFooterBlocksFromSource(seg, source);
797
+ if (footerBlocks) {
798
+ for (let blockIndex = findFirstRelevantBlockIndex(footerBlocks, seq); blockIndex < footerBlocks.length; blockIndex++) {
799
+ const block = footerBlocks[blockIndex]!;
800
+ const blockStart = block.firstOffset;
801
+ const blockEnd = blockStart + BigInt(block.recordCount) - 1n;
802
+ if (blockEnd < seq) continue;
803
+ if (blockStart > endOffsetNum) break;
804
+
805
+ const headerBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + DSB3_HEADER_BYTES - 1);
806
+ const headerRes = parseBlockHeaderResult(headerBytes);
807
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
808
+ if (keyBytes) {
809
+ const bloom = new Bloom256(headerRes.value.bloom);
810
+ if (!bloom.maybeHas(keyBytes)) continue;
811
+ }
812
+
813
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
814
+ const blockBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1);
815
+ const decodedRes = decodeBlockResult(blockBytes);
816
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
817
+ const decoded = decodedRes.value;
818
+ let curOffset = blockStart;
819
+ for (const r of decoded.records) {
820
+ if (curOffset < seq) {
821
+ curOffset += 1n;
822
+ continue;
823
+ }
824
+ if (curOffset > endOffsetNum) break;
825
+ const localDocId = Number(curOffset - seg.start_offset);
826
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
827
+ curOffset += 1n;
828
+ continue;
829
+ }
830
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
831
+ if (Result.isError(matchRes)) return matchRes;
832
+ if (matchRes.value.matched) {
833
+ results.push({ offset: curOffset, payload: r.payload });
834
+ bytesOut += r.payload.byteLength;
835
+ }
836
+ curOffset += 1n;
837
+ if (matchRes.value.stop) {
838
+ filterScanLimitReached = true;
839
+ seq = curOffset;
840
+ return Result.ok(undefined);
841
+ }
842
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
843
+ seq = curOffset;
844
+ return Result.ok(undefined);
845
+ }
846
+ }
847
+ }
848
+ return Result.ok(undefined);
311
849
  }
312
- if (keyBytes && candidateSegments && seg.segment_index < indexedThrough && !candidateSegments.has(seg.segment_index)) {
850
+
851
+ const limit = loadSegmentDataLimitFromSource(seg, source);
852
+ let blockOffset = 0;
853
+ let blockFirstOffset = seg.start_offset;
854
+ while (blockOffset < limit) {
855
+ const headerBytes = readRangeFromSource(source, blockOffset, blockOffset + DSB3_HEADER_BYTES - 1);
856
+ const headerRes = parseBlockHeaderResult(headerBytes);
857
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
858
+ const header = headerRes.value;
859
+ const totalLen = DSB3_HEADER_BYTES + header.compressedLen;
860
+ const blockStart = blockFirstOffset;
861
+ const blockEnd = blockStart + BigInt(header.recordCount) - 1n;
862
+ if (blockEnd < seq) {
863
+ blockOffset += totalLen;
864
+ blockFirstOffset = blockEnd + 1n;
865
+ continue;
866
+ }
867
+ if (blockStart > endOffsetNum) break;
868
+
869
+ if (keyBytes) {
870
+ const bloom = new Bloom256(header.bloom);
871
+ if (!bloom.maybeHas(keyBytes)) {
872
+ blockOffset += totalLen;
873
+ blockFirstOffset = blockEnd + 1n;
874
+ continue;
875
+ }
876
+ }
877
+
878
+ const blockBytes = readRangeFromSource(source, blockOffset, blockOffset + totalLen - 1);
879
+ const decodedRes = decodeBlockResult(blockBytes);
880
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
881
+ const decoded = decodedRes.value;
882
+ let curOffset = blockStart;
883
+ for (const r of decoded.records) {
884
+ if (curOffset < seq) {
885
+ curOffset += 1n;
886
+ continue;
887
+ }
888
+ if (curOffset > endOffsetNum) break;
889
+ const localDocId = Number(curOffset - seg.start_offset);
890
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
891
+ curOffset += 1n;
892
+ continue;
893
+ }
894
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
895
+ if (Result.isError(matchRes)) return matchRes;
896
+ if (matchRes.value.matched) {
897
+ results.push({ offset: curOffset, payload: r.payload });
898
+ bytesOut += r.payload.byteLength;
899
+ }
900
+ curOffset += 1n;
901
+ if (matchRes.value.stop) {
902
+ filterScanLimitReached = true;
903
+ seq = curOffset;
904
+ return Result.ok(undefined);
905
+ }
906
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
907
+ seq = curOffset;
908
+ return Result.ok(undefined);
909
+ }
910
+ }
911
+ blockOffset += totalLen;
912
+ blockFirstOffset = blockEnd + 1n;
913
+ }
914
+ return Result.ok(undefined);
915
+ };
916
+
917
+ const sealedEndSeq = endOffsetNum < srow.sealed_through ? endOffsetNum : srow.sealed_through;
918
+ const plannedSealedSegments = this.planSealedReadSegments(
919
+ stream,
920
+ seq,
921
+ sealedEndSeq,
922
+ candidateSegments,
923
+ indexedThrough,
924
+ "asc"
925
+ );
926
+
927
+ // 1) Read from sealed segments.
928
+ if (plannedSealedSegments) {
929
+ for (const seg of plannedSealedSegments.segments) {
930
+ if (seg.end_offset < seq) continue;
931
+ if (seg.start_offset > sealedEndSeq) break;
932
+ let allowedDocIds: Set<number> | null = null;
933
+ if (columnClauses.length > 0) {
934
+ const docIdsRes = await this.resolveColumnCandidateDocIdsResult(stream, seg.segment_index, columnClauses);
935
+ if (Result.isError(docIdsRes)) return Result.err({ kind: "internal", message: docIdsRes.error.message });
936
+ if (docIdsRes.value) {
937
+ allowedDocIds = docIdsRes.value;
938
+ if (allowedDocIds.size === 0) {
939
+ seq = seg.end_offset + 1n;
940
+ continue;
941
+ }
942
+ }
943
+ }
944
+ const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
945
+ if (preferFull) {
946
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
947
+ const scanRes = await scanSegmentBytes(segBytes, seg, allowedDocIds);
948
+ if (Result.isError(scanRes)) return scanRes;
949
+ if (filterScanLimitReached) return Result.ok(finalize());
950
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
951
+ } else {
952
+ const source = await loadSegmentSource(this.os, seg, this.diskCache, this.retryOpts());
953
+ const scanRes = await scanSegmentSource(source, seg, allowedDocIds);
954
+ if (Result.isError(scanRes)) return scanRes;
955
+ if (filterScanLimitReached) return Result.ok(finalize());
956
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
957
+ }
313
958
  seq = seg.end_offset + 1n;
314
- continue;
959
+ }
960
+ if (seq <= plannedSealedSegments.sealedEndSeq) {
961
+ seq = plannedSealedSegments.sealedEndSeq + 1n;
962
+ }
963
+ } else {
964
+ while (seq <= endOffsetNum && seq <= srow.sealed_through) {
965
+ const seg = this.db.findSegmentForOffset(stream, seq);
966
+ if (!seg) {
967
+ // Corruption in local metadata: sealed_through points past segments table.
968
+ break;
969
+ }
970
+ if (candidateSegments && seg.segment_index < indexedThrough && !candidateSegments.has(seg.segment_index)) {
971
+ seq = seg.end_offset + 1n;
972
+ continue;
973
+ }
974
+ let allowedDocIds: Set<number> | null = null;
975
+ if (columnClauses.length > 0) {
976
+ const docIdsRes = await this.resolveColumnCandidateDocIdsResult(stream, seg.segment_index, columnClauses);
977
+ if (Result.isError(docIdsRes)) return Result.err({ kind: "internal", message: docIdsRes.error.message });
978
+ if (docIdsRes.value) {
979
+ allowedDocIds = docIdsRes.value;
980
+ if (allowedDocIds.size === 0) {
981
+ seq = seg.end_offset + 1n;
982
+ continue;
983
+ }
984
+ }
315
985
  }
316
986
  const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
317
987
  if (preferFull) {
318
988
  const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
319
- const scanRes = await scanSegmentBytes(segBytes, seg);
989
+ const scanRes = await scanSegmentBytes(segBytes, seg, allowedDocIds);
320
990
  if (Result.isError(scanRes)) return scanRes;
991
+ if (filterScanLimitReached) return Result.ok(finalize());
321
992
  if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
322
993
  } else {
323
- const footerInfo = await loadSegmentFooter(this.os, seg, this.diskCache, this.retryOpts(), this.footerCache);
324
- if (!footerInfo || !footerInfo.footer) {
325
- const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
326
- const scanRes = await scanSegmentBytes(segBytes, seg);
327
- if (Result.isError(scanRes)) return scanRes;
328
- if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
329
- } else {
330
- const footer = footerInfo.footer;
331
- for (const entry of footer.blocks) {
332
- const blockStart = entry.firstOffset;
333
- const blockEnd = entry.firstOffset + BigInt(entry.recordCount) - 1n;
334
- if (blockEnd < seq) continue;
335
- if (blockStart > endOffsetNum) break;
336
-
337
- if (keyBytes) {
338
- const headerBytes = await readSegmentRange(
339
- this.os,
340
- seg,
341
- entry.blockOffset,
342
- entry.blockOffset + DSB3_HEADER_BYTES - 1,
343
- this.diskCache,
344
- this.retryOpts()
345
- );
346
- const headerRes = parseBlockHeaderResult(headerBytes);
347
- if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
348
- const header = headerRes.value;
349
- const bloom = new Bloom256(header.bloom);
350
- if (!bloom.maybeHas(keyBytes)) continue;
351
- }
352
-
353
- const totalLen = DSB3_HEADER_BYTES + entry.compressedLen;
354
- const blockBytes = await readSegmentRange(
355
- this.os,
356
- seg,
357
- entry.blockOffset,
358
- entry.blockOffset + totalLen - 1,
359
- this.diskCache,
360
- this.retryOpts()
361
- );
362
- const decodedRes = decodeBlockResult(blockBytes);
363
- if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
364
- const decoded = decodedRes.value;
365
- let curOffset = entry.firstOffset;
366
- for (const r of decoded.records) {
367
- if (curOffset < seq) {
368
- curOffset += 1n;
369
- continue;
370
- }
371
- if (curOffset > endOffsetNum) break;
372
- if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
373
- curOffset += 1n;
374
- continue;
375
- }
376
- results.push({ offset: curOffset, payload: r.payload });
377
- bytesOut += r.payload.byteLength;
378
- curOffset += 1n;
379
- if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
380
- seq = curOffset;
381
- return Result.ok(finalize());
382
- }
383
- }
384
- }
385
- }
994
+ const source = await loadSegmentSource(this.os, seg, this.diskCache, this.retryOpts());
995
+ const scanRes = await scanSegmentSource(source, seg, allowedDocIds);
996
+ if (Result.isError(scanRes)) return scanRes;
997
+ if (filterScanLimitReached) return Result.ok(finalize());
998
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
386
999
  }
387
1000
 
388
- // Move to next segment.
389
- seq = seg.end_offset + 1n;
1001
+ // Move to next segment.
1002
+ seq = seg.end_offset + 1n;
1003
+ }
390
1004
  }
391
1005
 
392
1006
  // 2) Read remaining from WAL tail.
@@ -395,8 +1009,24 @@ export class StreamReader {
395
1009
  for (const rec of this.db.iterWalRange(stream, seq, endOffsetNum, keyBytes ?? undefined)) {
396
1010
  const s = BigInt(rec.offset);
397
1011
  const payload: Uint8Array = rec.payload;
398
- results.push({ offset: s, payload });
399
- bytesOut += payload.byteLength;
1012
+ const routingKey =
1013
+ rec.routing_key == null
1014
+ ? null
1015
+ : rec.routing_key instanceof Uint8Array
1016
+ ? rec.routing_key
1017
+ : new Uint8Array(rec.routing_key);
1018
+ const matchRes = evaluateRecordResult(s, routingKey, payload);
1019
+ if (Result.isError(matchRes)) return matchRes;
1020
+ if (matchRes.value.matched) {
1021
+ results.push({ offset: s, payload });
1022
+ bytesOut += payload.byteLength;
1023
+ }
1024
+ if (matchRes.value.stop) {
1025
+ filterScanLimitReached = true;
1026
+ hitLimit = true;
1027
+ seq = s + 1n;
1028
+ break;
1029
+ }
400
1030
  if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
401
1031
  hitLimit = true;
402
1032
  // We only emitted payloads up through this offset (key-filtered reads
@@ -433,6 +1063,13 @@ export class StreamReader {
433
1063
  endOffsetSeq: endOffsetNum,
434
1064
  nextOffsetSeq: scannedThrough,
435
1065
  records: results,
1066
+ ...(filter
1067
+ ? {
1068
+ filterScannedBytes,
1069
+ filterScanLimitBytes: READ_FILTER_SCAN_LIMIT_BYTES,
1070
+ filterScanLimitReached,
1071
+ }
1072
+ : {}),
436
1073
  };
437
1074
  }
438
1075
  } catch (e: unknown) {
@@ -440,11 +1077,1218 @@ export class StreamReader {
440
1077
  }
441
1078
  }
442
1079
 
443
- async read(args: { stream: string; offset: string; key: string | null; format: ReadFormat }): Promise<ReadBatch> {
1080
+ async read(args: {
1081
+ stream: string;
1082
+ offset: string;
1083
+ key: string | null;
1084
+ format: ReadFormat;
1085
+ filter?: CompiledReadFilter | null;
1086
+ }): Promise<ReadBatch> {
444
1087
  const res = await this.readResult(args);
445
1088
  if (Result.isError(res)) throw dsError(res.error.message);
446
1089
  return res.value;
447
1090
  }
1091
+
1092
+ async searchResult(args: { stream: string; request: SearchRequest }): Promise<Result<SearchResultBatch, ReaderError>> {
1093
+ const startedAt = Date.now();
1094
+ const { stream, request } = args;
1095
+ const leaveSearchPhase = this.memorySampler?.enter("search", {
1096
+ stream,
1097
+ has_query: request.q != null,
1098
+ over_limit: this.memory?.isOverLimit() === true,
1099
+ });
1100
+ const srow = this.db.getStream(stream);
1101
+ try {
1102
+ if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
1103
+ if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
1104
+ return Result.err({ kind: "gone", message: "stream expired" });
1105
+ }
1106
+
1107
+ const regRes = this.registry.getRegistryResult(stream);
1108
+ if (Result.isError(regRes)) return Result.err({ kind: "internal", message: regRes.error.message });
1109
+ const registry = regRes.value;
1110
+ if (!registry.search) return Result.err({ kind: "internal", message: "search is not configured for this stream" });
1111
+
1112
+ const snapshotEndSeq = srow.next_offset - 1n;
1113
+ const snapshotEndOffset = encodeOffset(srow.epoch, snapshotEndSeq);
1114
+ const coverageState = this.computePublishedCoverageState(stream, srow, registry);
1115
+ const visibleSnapshotEndSeq = coverageState.canSearchWalTail
1116
+ ? snapshotEndSeq
1117
+ : (coverageState.visibleThroughSeq < snapshotEndSeq ? coverageState.visibleThroughSeq : snapshotEndSeq);
1118
+ const visibleSealedThrough = coverageState.canSearchWalTail
1119
+ ? srow.sealed_through
1120
+ : (coverageState.visibleThroughSeq < srow.sealed_through ? coverageState.visibleThroughSeq : srow.sealed_through);
1121
+ const deadline = request.timeoutMs == null ? null : Date.now() + request.timeoutMs;
1122
+ const leadingSort = request.sort[0] ?? null;
1123
+ const offsetSearchAfter =
1124
+ request.searchAfter && leadingSort?.kind === "offset" ? normalizeSearchAfterValue(leadingSort, request.searchAfter[0]) : null;
1125
+ const cursorFieldBound = resolveSearchCursorFieldBound(request);
1126
+
1127
+ const hits: SearchHitInternal[] = [];
1128
+ let timedOut = false;
1129
+ const markTimedOutIfNeeded = (): boolean => {
1130
+ if (deadline == null || Date.now() < deadline) return false;
1131
+ timedOut = true;
1132
+ return true;
1133
+ };
1134
+ let indexedSegments = 0;
1135
+ let indexedSegmentTimeMs = 0;
1136
+ let ftsSectionGetMs = 0;
1137
+ let ftsDecodeMs = 0;
1138
+ let ftsClauseEstimateMs = 0;
1139
+ let scannedSegments = 0;
1140
+ let scannedSegmentTimeMs = 0;
1141
+ let scannedTailDocs = 0;
1142
+ let scannedTailTimeMs = 0;
1143
+ const indexFamiliesUsed = new Set<string>();
1144
+ const columnClauses = collectPositiveSearchColumnClauses(request.q);
1145
+ const ftsClauses = collectPositiveSearchFtsClauses(request.q);
1146
+ let exactCandidateInfo: SegmentCandidateInfo = { segments: null, indexedThrough: 0 };
1147
+ let exactCandidateTimeMs = 0;
1148
+ if (!markTimedOutIfNeeded()) {
1149
+ const exactCandidateStartedAt = Date.now();
1150
+ exactCandidateInfo = await this.resolveSearchExactCandidateSegments(stream, request.q);
1151
+ exactCandidateTimeMs = Date.now() - exactCandidateStartedAt;
1152
+ markTimedOutIfNeeded();
1153
+ }
1154
+
1155
+ const collectSearchMatchResult = (
1156
+ offsetSeq: bigint,
1157
+ payload: Uint8Array
1158
+ ): Result<void, ReaderError> => {
1159
+ const parsedRes = decodeJsonPayloadResult(this.registry, stream, offsetSeq, payload);
1160
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
1161
+ const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, parsedRes.value);
1162
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
1163
+ if (!evalRes.value.matched) return Result.ok(undefined);
1164
+ const fieldsRes = extractSearchHitFieldsResult(registry, offsetSeq, parsedRes.value);
1165
+ if (Result.isError(fieldsRes)) return Result.err({ kind: "internal", message: fieldsRes.error.message });
1166
+ const sortInternal = buildSearchSortInternalValues(request.sort, fieldsRes.value, evalRes.value, offsetSeq);
1167
+ if (request.searchAfter && compareSearchAfterValues(sortInternal, request.sort, request.searchAfter) <= 0) {
1168
+ return Result.ok(undefined);
1169
+ }
1170
+ hits.push({
1171
+ offsetSeq,
1172
+ offset: encodeOffset(srow.epoch, offsetSeq),
1173
+ score: evalRes.value.score,
1174
+ sortInternal,
1175
+ sortResponse: buildSearchSortResponseValues(request.sort, sortInternal, encodeOffset(srow.epoch, offsetSeq)),
1176
+ fields: fieldsRes.value,
1177
+ source: parsedRes.value,
1178
+ });
1179
+ return Result.ok(undefined);
1180
+ };
1181
+
1182
+ const scanSegmentForSearchResult = async (
1183
+ seg: SegmentRow,
1184
+ allowedDocIds: Set<number> | null,
1185
+ rangeStartSeq: bigint,
1186
+ rangeEndSeq: bigint
1187
+ ): Promise<Result<void, ReaderError>> => {
1188
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1189
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
1190
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1191
+ let curOffset = seg.start_offset;
1192
+ for (const blockRes of iterateBlocksResult(segBytes)) {
1193
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
1194
+ for (const record of blockRes.value.decoded.records) {
1195
+ if (curOffset > rangeEndSeq) return Result.ok(undefined);
1196
+ if (curOffset < rangeStartSeq) {
1197
+ curOffset += 1n;
1198
+ continue;
1199
+ }
1200
+ const localDocId = Number(curOffset - seg.start_offset);
1201
+ if (!allowedDocIds || allowedDocIds.has(localDocId)) {
1202
+ const matchRes = collectSearchMatchResult(curOffset, record.payload);
1203
+ if (Result.isError(matchRes)) return matchRes;
1204
+ }
1205
+ curOffset += 1n;
1206
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1207
+ }
1208
+ }
1209
+ return Result.ok(undefined);
1210
+ };
1211
+
1212
+ const scanSegmentWithFamiliesResult = async (
1213
+ seg: SegmentRow,
1214
+ rangeStartSeq: bigint,
1215
+ rangeEndSeq: bigint
1216
+ ): Promise<Result<void, ReaderError>> => {
1217
+ const segmentStartedAt = Date.now();
1218
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1219
+ if (
1220
+ exactCandidateInfo.segments &&
1221
+ seg.segment_index < exactCandidateInfo.indexedThrough &&
1222
+ !exactCandidateInfo.segments.has(seg.segment_index)
1223
+ ) {
1224
+ return Result.ok(undefined);
1225
+ }
1226
+ if (cursorFieldBound) {
1227
+ const overlapsCursor = await this.segmentMayOverlapSearchCursor(stream, seg.segment_index, cursorFieldBound);
1228
+ if (!overlapsCursor) {
1229
+ indexFamiliesUsed.add("col");
1230
+ indexedSegments += 1;
1231
+ indexedSegmentTimeMs += Date.now() - segmentStartedAt;
1232
+ return Result.ok(undefined);
1233
+ }
1234
+ }
1235
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1236
+
1237
+ const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult(
1238
+ stream,
1239
+ seg.segment_index,
1240
+ columnClauses,
1241
+ ftsClauses,
1242
+ {
1243
+ addFtsSectionGetMs: (deltaMs) => {
1244
+ ftsSectionGetMs += deltaMs;
1245
+ },
1246
+ addFtsDecodeMs: (deltaMs) => {
1247
+ ftsDecodeMs += deltaMs;
1248
+ },
1249
+ addFtsClauseEstimateMs: (deltaMs) => {
1250
+ ftsClauseEstimateMs += deltaMs;
1251
+ },
1252
+ }
1253
+ );
1254
+ if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message });
1255
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1256
+ const familyCandidates = familyCandidatesRes.value;
1257
+ if (familyCandidates.docIds && familyCandidates.docIds.size === 0) {
1258
+ indexedSegments += familyCandidates.usedFamilies.size > 0 ? 1 : 0;
1259
+ for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family);
1260
+ if (familyCandidates.usedFamilies.size > 0) indexedSegmentTimeMs += Date.now() - segmentStartedAt;
1261
+ return Result.ok(undefined);
1262
+ }
1263
+ const usedIndexedFamilies = familyCandidates.usedFamilies.size > 0;
1264
+ if (familyCandidates.usedFamilies.size > 0) {
1265
+ indexedSegments += 1;
1266
+ for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family);
1267
+ } else {
1268
+ scannedSegments += 1;
1269
+ }
1270
+
1271
+ const scanRes = await scanSegmentForSearchResult(seg, familyCandidates.docIds, rangeStartSeq, rangeEndSeq);
1272
+ if (Result.isError(scanRes)) return scanRes;
1273
+ if (usedIndexedFamilies) indexedSegmentTimeMs += Date.now() - segmentStartedAt;
1274
+ else scannedSegmentTimeMs += Date.now() - segmentStartedAt;
1275
+ return Result.ok(undefined);
1276
+ };
1277
+
1278
+ const stopIfPageComplete = (): boolean => hits.length >= request.size;
1279
+
1280
+ if (leadingSort?.kind === "offset") {
1281
+ const descending = leadingSort.direction === "desc";
1282
+ const rangeStartSeq = descending ? 0n : typeof offsetSearchAfter === "bigint" ? offsetSearchAfter + 1n : 0n;
1283
+ const requestedRangeEndSeq = descending ? (typeof offsetSearchAfter === "bigint" ? offsetSearchAfter - 1n : snapshotEndSeq) : snapshotEndSeq;
1284
+ const rangeEndSeq = requestedRangeEndSeq < visibleSnapshotEndSeq ? requestedRangeEndSeq : visibleSnapshotEndSeq;
1285
+
1286
+ if (rangeStartSeq <= rangeEndSeq) {
1287
+ if (descending) {
1288
+ const tailStart = srow.sealed_through + 1n;
1289
+ if (coverageState.canSearchWalTail && tailStart <= rangeEndSeq) {
1290
+ const walStart = rangeStartSeq > tailStart ? rangeStartSeq : tailStart;
1291
+ const walEnd = rangeEndSeq;
1292
+ if (walStart <= walEnd) {
1293
+ const tailStartedAt = Date.now();
1294
+ for (const record of this.db.iterWalRangeDesc(stream, walStart, walEnd)) {
1295
+ scannedTailDocs += 1;
1296
+ const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload);
1297
+ if (Result.isError(matchRes)) return matchRes;
1298
+ if (markTimedOutIfNeeded()) break;
1299
+ if (stopIfPageComplete()) break;
1300
+ }
1301
+ scannedTailTimeMs += Date.now() - tailStartedAt;
1302
+ }
1303
+ }
1304
+ if (!timedOut && !stopIfPageComplete()) {
1305
+ const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough;
1306
+ if (sealedEnd >= rangeStartSeq) {
1307
+ const plannedSealedSegments = this.planSealedReadSegments(
1308
+ stream,
1309
+ rangeStartSeq,
1310
+ sealedEnd,
1311
+ exactCandidateInfo.segments,
1312
+ exactCandidateInfo.indexedThrough,
1313
+ "desc"
1314
+ );
1315
+ if (plannedSealedSegments) {
1316
+ for (const seg of plannedSealedSegments.segments) {
1317
+ const scanRes = await this.scanSegmentReverseForSearchResult(
1318
+ stream,
1319
+ seg,
1320
+ exactCandidateInfo,
1321
+ cursorFieldBound,
1322
+ columnClauses,
1323
+ ftsClauses,
1324
+ rangeStartSeq,
1325
+ sealedEnd,
1326
+ {
1327
+ indexFamiliesUsed,
1328
+ collectSearchMatchResult,
1329
+ deadline,
1330
+ isTimedOut: () => timedOut,
1331
+ setTimedOut: (next) => {
1332
+ timedOut = next;
1333
+ },
1334
+ stopIfPageComplete,
1335
+ addIndexedSegment: () => {
1336
+ indexedSegments += 1;
1337
+ },
1338
+ addScannedSegment: () => {
1339
+ scannedSegments += 1;
1340
+ },
1341
+ addIndexedSegmentTimeMs: (deltaMs) => {
1342
+ indexedSegmentTimeMs += deltaMs;
1343
+ },
1344
+ addFtsSectionGetMs: (deltaMs) => {
1345
+ ftsSectionGetMs += deltaMs;
1346
+ },
1347
+ addFtsDecodeMs: (deltaMs) => {
1348
+ ftsDecodeMs += deltaMs;
1349
+ },
1350
+ addFtsClauseEstimateMs: (deltaMs) => {
1351
+ ftsClauseEstimateMs += deltaMs;
1352
+ },
1353
+ addScannedSegmentTimeMs: (deltaMs) => {
1354
+ scannedSegmentTimeMs += deltaMs;
1355
+ },
1356
+ }
1357
+ );
1358
+ if (Result.isError(scanRes)) return scanRes;
1359
+ if (timedOut || stopIfPageComplete()) break;
1360
+ }
1361
+ } else {
1362
+ const startSeg = this.db.findSegmentForOffset(stream, sealedEnd);
1363
+ let segmentIndex = startSeg?.segment_index ?? this.db.countSegmentsForStream(stream) - 1;
1364
+ while (segmentIndex >= 0) {
1365
+ const seg = this.db.getSegmentByIndex(stream, segmentIndex);
1366
+ if (!seg) {
1367
+ segmentIndex -= 1;
1368
+ continue;
1369
+ }
1370
+ if (seg.end_offset < rangeStartSeq) break;
1371
+ if (seg.start_offset > sealedEnd) {
1372
+ segmentIndex -= 1;
1373
+ continue;
1374
+ }
1375
+ const scanRes = await this.scanSegmentReverseForSearchResult(
1376
+ stream,
1377
+ seg,
1378
+ exactCandidateInfo,
1379
+ cursorFieldBound,
1380
+ columnClauses,
1381
+ ftsClauses,
1382
+ rangeStartSeq,
1383
+ sealedEnd,
1384
+ {
1385
+ indexFamiliesUsed,
1386
+ collectSearchMatchResult,
1387
+ deadline,
1388
+ isTimedOut: () => timedOut,
1389
+ setTimedOut: (next) => {
1390
+ timedOut = next;
1391
+ },
1392
+ stopIfPageComplete,
1393
+ addIndexedSegment: () => {
1394
+ indexedSegments += 1;
1395
+ },
1396
+ addScannedSegment: () => {
1397
+ scannedSegments += 1;
1398
+ },
1399
+ addIndexedSegmentTimeMs: (deltaMs) => {
1400
+ indexedSegmentTimeMs += deltaMs;
1401
+ },
1402
+ addFtsSectionGetMs: (deltaMs) => {
1403
+ ftsSectionGetMs += deltaMs;
1404
+ },
1405
+ addFtsDecodeMs: (deltaMs) => {
1406
+ ftsDecodeMs += deltaMs;
1407
+ },
1408
+ addFtsClauseEstimateMs: (deltaMs) => {
1409
+ ftsClauseEstimateMs += deltaMs;
1410
+ },
1411
+ addScannedSegmentTimeMs: (deltaMs) => {
1412
+ scannedSegmentTimeMs += deltaMs;
1413
+ },
1414
+ }
1415
+ );
1416
+ if (Result.isError(scanRes)) return scanRes;
1417
+ if (timedOut || stopIfPageComplete()) break;
1418
+ segmentIndex -= 1;
1419
+ }
1420
+ }
1421
+ }
1422
+ }
1423
+ } else {
1424
+ let seq = rangeStartSeq;
1425
+ const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough;
1426
+ const plannedSealedSegments = this.planSealedReadSegments(
1427
+ stream,
1428
+ rangeStartSeq,
1429
+ sealedEnd,
1430
+ exactCandidateInfo.segments,
1431
+ exactCandidateInfo.indexedThrough,
1432
+ "asc"
1433
+ );
1434
+ if (plannedSealedSegments) {
1435
+ for (const seg of plannedSealedSegments.segments) {
1436
+ const scanRes = await scanSegmentWithFamiliesResult(seg, rangeStartSeq, rangeEndSeq);
1437
+ if (Result.isError(scanRes)) return scanRes;
1438
+ seq = seg.end_offset + 1n;
1439
+ if (timedOut || stopIfPageComplete()) break;
1440
+ }
1441
+ if (seq <= plannedSealedSegments.sealedEndSeq) seq = plannedSealedSegments.sealedEndSeq + 1n;
1442
+ } else {
1443
+ while (seq <= rangeEndSeq && seq <= visibleSealedThrough) {
1444
+ const seg = this.db.findSegmentForOffset(stream, seq);
1445
+ if (!seg) break;
1446
+ const scanRes = await scanSegmentWithFamiliesResult(seg, rangeStartSeq, rangeEndSeq);
1447
+ if (Result.isError(scanRes)) return scanRes;
1448
+ seq = seg.end_offset + 1n;
1449
+ if (timedOut || stopIfPageComplete()) break;
1450
+ }
1451
+ }
1452
+ if (!timedOut && !stopIfPageComplete() && coverageState.canSearchWalTail && seq <= rangeEndSeq) {
1453
+ const tailStartedAt = Date.now();
1454
+ for (const record of this.db.iterWalRange(stream, seq, rangeEndSeq)) {
1455
+ scannedTailDocs += 1;
1456
+ const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload);
1457
+ if (Result.isError(matchRes)) return matchRes;
1458
+ if (markTimedOutIfNeeded()) break;
1459
+ if (stopIfPageComplete()) break;
1460
+ }
1461
+ scannedTailTimeMs += Date.now() - tailStartedAt;
1462
+ }
1463
+ }
1464
+ }
1465
+
1466
+ const pageHits = hits.slice(0, request.size);
1467
+ const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null;
1468
+ const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null;
1469
+ return Result.ok({
1470
+ stream,
1471
+ snapshotEndOffset,
1472
+ tookMs: Date.now() - startedAt,
1473
+ timedOut,
1474
+ timeoutMs: request.timeoutMs,
1475
+ coverage: {
1476
+ mode: coverageState.mode,
1477
+ complete: coverageState.complete && !timedOut,
1478
+ streamHeadOffset: coverageState.streamHeadOffset,
1479
+ visibleThroughOffset: coverageState.visibleThroughOffset,
1480
+ visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
1481
+ oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
1482
+ possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
1483
+ possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
1484
+ possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
1485
+ possibleMissingWalRows: coverageState.possibleMissingWalRows,
1486
+ indexedSegments,
1487
+ indexedSegmentTimeMs,
1488
+ ftsSectionGetMs,
1489
+ ftsDecodeMs,
1490
+ ftsClauseEstimateMs,
1491
+ scannedSegments,
1492
+ scannedSegmentTimeMs,
1493
+ scannedTailDocs,
1494
+ scannedTailTimeMs,
1495
+ exactCandidateTimeMs,
1496
+ indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
1497
+ },
1498
+ total: {
1499
+ value: pageHits.length,
1500
+ relation: exactTotalKnown ? "eq" : "gte",
1501
+ },
1502
+ hits: pageHits.map((hit) => ({
1503
+ offset: hit.offset,
1504
+ score: hit.score,
1505
+ sort: hit.sortResponse,
1506
+ fields: hit.fields,
1507
+ source: hit.source,
1508
+ })),
1509
+ nextSearchAfter,
1510
+ });
1511
+ }
1512
+
1513
+ let seq = 0n;
1514
+ const sealedEnd = visibleSnapshotEndSeq < visibleSealedThrough ? visibleSnapshotEndSeq : visibleSealedThrough;
1515
+ const plannedSealedSegments = this.planSealedReadSegments(
1516
+ stream,
1517
+ 0n,
1518
+ sealedEnd,
1519
+ exactCandidateInfo.segments,
1520
+ exactCandidateInfo.indexedThrough,
1521
+ "asc"
1522
+ );
1523
+ if (plannedSealedSegments) {
1524
+ for (const seg of plannedSealedSegments.segments) {
1525
+ const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq);
1526
+ if (Result.isError(scanRes)) return scanRes;
1527
+ seq = seg.end_offset + 1n;
1528
+ if (timedOut) break;
1529
+ }
1530
+ if (seq <= plannedSealedSegments.sealedEndSeq) seq = plannedSealedSegments.sealedEndSeq + 1n;
1531
+ } else {
1532
+ while (seq <= visibleSnapshotEndSeq && seq <= visibleSealedThrough) {
1533
+ const seg = this.db.findSegmentForOffset(stream, seq);
1534
+ if (!seg) break;
1535
+ const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq);
1536
+ if (Result.isError(scanRes)) return scanRes;
1537
+ seq = seg.end_offset + 1n;
1538
+ if (timedOut) break;
1539
+ }
1540
+ }
1541
+
1542
+ if (!timedOut && coverageState.canSearchWalTail && seq <= snapshotEndSeq) {
1543
+ const tailStartedAt = Date.now();
1544
+ for (const record of this.db.iterWalRange(stream, seq, snapshotEndSeq)) {
1545
+ scannedTailDocs += 1;
1546
+ const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload);
1547
+ if (Result.isError(matchRes)) return matchRes;
1548
+ if (markTimedOutIfNeeded()) break;
1549
+ }
1550
+ scannedTailTimeMs += Date.now() - tailStartedAt;
1551
+ }
1552
+
1553
+ hits.sort((left, right) => compareSearchHits(left, right, request.sort));
1554
+ const pageHits = hits.slice(0, request.size);
1555
+ const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null;
1556
+ const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null;
1557
+
1558
+ return Result.ok({
1559
+ stream,
1560
+ snapshotEndOffset,
1561
+ tookMs: Date.now() - startedAt,
1562
+ timedOut,
1563
+ timeoutMs: request.timeoutMs,
1564
+ coverage: {
1565
+ mode: coverageState.mode,
1566
+ complete: coverageState.complete && !timedOut,
1567
+ streamHeadOffset: coverageState.streamHeadOffset,
1568
+ visibleThroughOffset: coverageState.visibleThroughOffset,
1569
+ visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
1570
+ oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
1571
+ possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
1572
+ possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
1573
+ possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
1574
+ possibleMissingWalRows: coverageState.possibleMissingWalRows,
1575
+ indexedSegments,
1576
+ indexedSegmentTimeMs,
1577
+ ftsSectionGetMs,
1578
+ ftsDecodeMs,
1579
+ ftsClauseEstimateMs,
1580
+ scannedSegments,
1581
+ scannedSegmentTimeMs,
1582
+ scannedTailDocs,
1583
+ scannedTailTimeMs,
1584
+ exactCandidateTimeMs,
1585
+ indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
1586
+ },
1587
+ total: {
1588
+ value: pageHits.length,
1589
+ relation: exactTotalKnown ? "eq" : "gte",
1590
+ },
1591
+ hits: pageHits.map((hit) => ({
1592
+ offset: hit.offset,
1593
+ score: hit.score,
1594
+ sort: hit.sortResponse,
1595
+ fields: hit.fields,
1596
+ source: hit.source,
1597
+ })),
1598
+ nextSearchAfter,
1599
+ });
1600
+ } catch (e: unknown) {
1601
+ return Result.err({ kind: "internal", message: errorMessage(e) });
1602
+ } finally {
1603
+ leaveSearchPhase?.();
1604
+ }
1605
+ }
1606
+
1607
+ async search(args: { stream: string; request: SearchRequest }): Promise<SearchResultBatch> {
1608
+ const res = await this.searchResult(args);
1609
+ if (Result.isError(res)) throw dsError(res.error.message);
1610
+ return res.value;
1611
+ }
1612
+
1613
+ async aggregateResult(args: { stream: string; request: AggregateRequest }): Promise<Result<AggregateResultBatch, ReaderError>> {
1614
+ const { stream, request } = args;
1615
+ const leaveAggregatePhase = this.memorySampler?.enter("aggregate", {
1616
+ stream,
1617
+ rollup: request.rollup,
1618
+ over_limit: this.memory?.isOverLimit() === true,
1619
+ });
1620
+ const srow = this.db.getStream(stream);
1621
+ try {
1622
+ if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
1623
+ if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
1624
+ return Result.err({ kind: "gone", message: "stream expired" });
1625
+ }
1626
+
1627
+ const regRes = this.registry.getRegistryResult(stream);
1628
+ if (Result.isError(regRes)) return Result.err({ kind: "internal", message: regRes.error.message });
1629
+ const registry = regRes.value;
1630
+ const rollup = registry.search?.rollups?.[request.rollup];
1631
+ if (!registry.search || !rollup) {
1632
+ return Result.err({ kind: "internal", message: "rollup is not configured for this stream" });
1633
+ }
1634
+
1635
+ const coverageState = this.computePublishedCoverageState(stream, srow, registry);
1636
+ const intervalMs = request.intervalMs;
1637
+ const intervalBig = BigInt(intervalMs);
1638
+ const fromMs = Number(request.fromMs);
1639
+ const toMs = Number(request.toMs);
1640
+ const fullStartMs = Number(((request.fromMs + intervalBig - 1n) / intervalBig) * intervalBig);
1641
+ const fullEndMs = Number((request.toMs / intervalBig) * intervalBig);
1642
+ const hasFullWindows = fullEndMs > fullStartMs;
1643
+ const dimensions = new Set(rollup.dimensions ?? []);
1644
+ const eligibility = extractRollupEligibility(request.q, dimensions);
1645
+ const selectedMeasures = new Set(request.measures ?? Object.keys(rollup.measures));
1646
+ const timestampField = rollup.timestampField ?? registry.search.primaryTimestampField;
1647
+ const primaryTimestampField = registry.search.primaryTimestampField;
1648
+ const usesPrimaryTimestampBounds = timestampField === primaryTimestampField;
1649
+
1650
+ const buckets = new Map<number, Map<string, AggregateGroupInternal>>();
1651
+ const indexedSegmentSet = new Set<number>();
1652
+ const scannedSegmentSet = new Set<number>();
1653
+ let scannedTailDocs = 0;
1654
+ const indexFamiliesUsed = new Set<string>();
1655
+ const metricsProfile = registry.search.profile === "metrics";
1656
+ let usedRollups = false;
1657
+
1658
+ const mergeBucketMeasures = (bucketStartMs: number, dimensionsKey: Record<string, string | null>, measures: Record<string, AggMeasureState>): void => {
1659
+ let groups = buckets.get(bucketStartMs);
1660
+ if (!groups) {
1661
+ groups = new Map();
1662
+ buckets.set(bucketStartMs, groups);
1663
+ }
1664
+ const projectedKey: Record<string, string | null> = {};
1665
+ for (const field of request.groupBy) projectedKey[field] = dimensionsKey[field] ?? null;
1666
+ const groupKey = JSON.stringify(projectedKey);
1667
+ let group = groups.get(groupKey);
1668
+ if (!group) {
1669
+ group = { key: projectedKey, measures: {} };
1670
+ groups.set(groupKey, group);
1671
+ }
1672
+ for (const [measureName, state] of Object.entries(measures)) {
1673
+ if (!selectedMeasures.has(measureName)) continue;
1674
+ const existing = group.measures[measureName];
1675
+ if (!existing) {
1676
+ group.measures[measureName] = cloneAggMeasureState(state);
1677
+ continue;
1678
+ }
1679
+ group.measures[measureName] = mergeAggMeasureState(existing, state);
1680
+ }
1681
+ };
1682
+
1683
+ const matchesExactFilters = (dimensionsKey: Record<string, string | null>): boolean => {
1684
+ for (const [field, value] of Object.entries(eligibility.exactFilters)) {
1685
+ if ((dimensionsKey[field] ?? null) !== value) return false;
1686
+ }
1687
+ return true;
1688
+ };
1689
+
1690
+ const partialRanges: Array<{ startMs: number; endMs: number }> = [];
1691
+ if (!eligibility.eligible || !hasFullWindows) {
1692
+ partialRanges.push({ startMs: fromMs, endMs: toMs });
1693
+ } else {
1694
+ if (fromMs < fullStartMs) partialRanges.push({ startMs: fromMs, endMs: fullStartMs });
1695
+ if (fullEndMs < toMs) partialRanges.push({ startMs: fullEndMs, endMs: toMs });
1696
+ }
1697
+
1698
+ const scanSegmentForAggregateResult = async (
1699
+ seg: SegmentRow,
1700
+ scanRanges: Array<{ startMs: number; endMs: number }>
1701
+ ): Promise<Result<void, ReaderError>> => {
1702
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
1703
+ let curOffset = seg.start_offset;
1704
+ for (const blockRes of iterateBlocksResult(segBytes)) {
1705
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
1706
+ for (const record of blockRes.value.decoded.records) {
1707
+ const parsedRes = decodeJsonPayloadResult(this.registry, stream, curOffset, record.payload);
1708
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
1709
+ const contributionRes = extractRollupContributionResult(registry, rollup, curOffset, parsedRes.value);
1710
+ if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
1711
+ const contribution = contributionRes.value;
1712
+ if (!contribution) {
1713
+ curOffset += 1n;
1714
+ continue;
1715
+ }
1716
+ const inRange = scanRanges.some((range) => contribution.timestampMs >= range.startMs && contribution.timestampMs < range.endMs);
1717
+ if (!inRange) {
1718
+ curOffset += 1n;
1719
+ continue;
1720
+ }
1721
+ if (request.q) {
1722
+ const evalRes = evaluateSearchQueryResult(registry, curOffset, request.q, parsedRes.value);
1723
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
1724
+ if (!evalRes.value.matched) {
1725
+ curOffset += 1n;
1726
+ continue;
1727
+ }
1728
+ }
1729
+ const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
1730
+ mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
1731
+ curOffset += 1n;
1732
+ }
1733
+ }
1734
+ scannedSegmentSet.add(seg.segment_index);
1735
+ return Result.ok(undefined);
1736
+ };
1737
+
1738
+ const segmentMayOverlapAggregateRange = async (
1739
+ seg: SegmentRow,
1740
+ startMs: number,
1741
+ endMs: number
1742
+ ): Promise<boolean> => {
1743
+ if (usesPrimaryTimestampBounds) {
1744
+ const companionRow = this.db.getSearchSegmentCompanion(stream, seg.segment_index);
1745
+ if (companionRow?.primary_timestamp_min_ms != null && companionRow.primary_timestamp_max_ms != null) {
1746
+ return companionRow.primary_timestamp_max_ms >= BigInt(startMs) && companionRow.primary_timestamp_min_ms < BigInt(endMs);
1747
+ }
1748
+ }
1749
+ return this.segmentMayOverlapTimeRange(stream, seg.segment_index, startMs, endMs, timestampField);
1750
+ };
1751
+
1752
+ const scanMetricsBlockForAggregateResult = async (
1753
+ seg: SegmentRow,
1754
+ companion: MetricsBlockSectionView,
1755
+ scanRanges: Array<{ startMs: number; endMs: number }>
1756
+ ): Promise<Result<void, ReaderError>> => {
1757
+ for (const record of companion.records()) {
1758
+ const offsetSeq = seg.start_offset + BigInt(record.doc_id);
1759
+ const timestampMs = record.windowStartMs;
1760
+ const inRange = scanRanges.some((range) => timestampMs >= range.startMs && timestampMs < range.endMs);
1761
+ if (!inRange) continue;
1762
+ const materialized = materializeMetricsBlockRecord(record);
1763
+ if (request.q) {
1764
+ const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, materialized);
1765
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
1766
+ if (!evalRes.value.matched) continue;
1767
+ }
1768
+ const contributionRes = extractRollupContributionResult(registry, rollup, offsetSeq, materialized);
1769
+ if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
1770
+ const contribution = contributionRes.value;
1771
+ if (!contribution) continue;
1772
+ const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
1773
+ mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
1774
+ }
1775
+ indexedSegmentSet.add(seg.segment_index);
1776
+ indexFamiliesUsed.add("mblk");
1777
+ return Result.ok(undefined);
1778
+ };
1779
+
1780
+ for (const seg of this.db.listSegmentsForStream(stream)) {
1781
+ if (seg.segment_index >= coverageState.visiblePublishedSegmentCount) break;
1782
+ let coveredAlignedWindows = false;
1783
+ if (eligibility.eligible && this.index && hasFullWindows) {
1784
+ const overlapsAlignedWindow = await segmentMayOverlapAggregateRange(seg, fullStartMs, fullEndMs);
1785
+ if (overlapsAlignedWindow) {
1786
+ const companion = await this.index.getAggSegmentCompanion(stream, seg.segment_index);
1787
+ const intervalCompanion = companion?.getInterval(request.rollup, intervalMs);
1788
+ if (intervalCompanion) {
1789
+ coveredAlignedWindows = true;
1790
+ indexedSegmentSet.add(seg.segment_index);
1791
+ indexFamiliesUsed.add("agg");
1792
+ usedRollups = true;
1793
+ intervalCompanion.forEachGroupInRange(fullStartMs, fullEndMs, (windowStartMs, group) => {
1794
+ if (!matchesExactFilters(group.dimensions)) return;
1795
+ mergeBucketMeasures(windowStartMs, group.dimensions, group.measures);
1796
+ });
1797
+ }
1798
+ }
1799
+ }
1800
+
1801
+ const scanRanges =
1802
+ !eligibility.eligible || !hasFullWindows
1803
+ ? [{ startMs: fromMs, endMs: toMs }]
1804
+ : coveredAlignedWindows
1805
+ ? partialRanges
1806
+ : [{ startMs: fromMs, endMs: toMs }];
1807
+ if (scanRanges.length === 0) continue;
1808
+ let overlaps = false;
1809
+ for (const range of scanRanges) {
1810
+ if (await segmentMayOverlapAggregateRange(seg, range.startMs, range.endMs)) {
1811
+ overlaps = true;
1812
+ break;
1813
+ }
1814
+ }
1815
+ if (!overlaps) continue;
1816
+ let scanRes: Result<void, ReaderError>;
1817
+ if (metricsProfile && this.index) {
1818
+ const companion = await this.index.getMetricsBlockSegmentCompanion(stream, seg.segment_index);
1819
+ if (companion) {
1820
+ scanRes = await scanMetricsBlockForAggregateResult(seg, companion, scanRanges);
1821
+ } else {
1822
+ scanRes = await scanSegmentForAggregateResult(seg, scanRanges);
1823
+ }
1824
+ } else {
1825
+ scanRes = await scanSegmentForAggregateResult(seg, scanRanges);
1826
+ }
1827
+ if (Result.isError(scanRes)) return scanRes;
1828
+ }
1829
+
1830
+ const tailStart = srow.sealed_through + 1n;
1831
+ const tailEnd = srow.next_offset - 1n;
1832
+ if (coverageState.canSearchWalTail && tailStart <= tailEnd) {
1833
+ for (const record of this.db.iterWalRange(stream, tailStart, tailEnd)) {
1834
+ scannedTailDocs += 1;
1835
+ const parsedRes = decodeJsonPayloadResult(this.registry, stream, BigInt(record.offset), record.payload);
1836
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
1837
+ const contributionRes = extractRollupContributionResult(registry, rollup, BigInt(record.offset), parsedRes.value);
1838
+ if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
1839
+ const contribution = contributionRes.value;
1840
+ if (!contribution || contribution.timestampMs < fromMs || contribution.timestampMs >= toMs) continue;
1841
+ if (request.q) {
1842
+ const evalRes = evaluateSearchQueryResult(registry, BigInt(record.offset), request.q, parsedRes.value);
1843
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
1844
+ if (!evalRes.value.matched) continue;
1845
+ }
1846
+ const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
1847
+ mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
1848
+ }
1849
+ }
1850
+
1851
+ const bucketList = Array.from(buckets.entries())
1852
+ .sort((a, b) => a[0] - b[0])
1853
+ .map(([startMs, groups]) => ({
1854
+ start: new Date(startMs).toISOString(),
1855
+ end: new Date(startMs + intervalMs).toISOString(),
1856
+ groups: Array.from(groups.values())
1857
+ .sort((a, b) => JSON.stringify(a.key).localeCompare(JSON.stringify(b.key)))
1858
+ .map((group) => ({
1859
+ key: group.key,
1860
+ measures: Object.fromEntries(
1861
+ Object.entries(group.measures)
1862
+ .sort((a, b) => a[0].localeCompare(b[0]))
1863
+ .map(([name, state]) => [name, formatAggMeasureState(state)])
1864
+ ),
1865
+ })),
1866
+ }));
1867
+
1868
+ return Result.ok({
1869
+ stream,
1870
+ rollup: request.rollup,
1871
+ from: new Date(fromMs).toISOString(),
1872
+ to: new Date(toMs).toISOString(),
1873
+ interval: request.interval,
1874
+ coverage: {
1875
+ mode: coverageState.mode,
1876
+ complete: coverageState.complete,
1877
+ streamHeadOffset: coverageState.streamHeadOffset,
1878
+ visibleThroughOffset: coverageState.visibleThroughOffset,
1879
+ visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
1880
+ oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
1881
+ possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
1882
+ possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
1883
+ possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
1884
+ possibleMissingWalRows: coverageState.possibleMissingWalRows,
1885
+ usedRollups,
1886
+ indexedSegments: indexedSegmentSet.size,
1887
+ scannedSegments: scannedSegmentSet.size,
1888
+ scannedTailDocs,
1889
+ indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
1890
+ },
1891
+ buckets: bucketList,
1892
+ });
1893
+ } catch (e: unknown) {
1894
+ return Result.err({ kind: "internal", message: errorMessage(e) });
1895
+ } finally {
1896
+ leaveAggregatePhase?.();
1897
+ }
1898
+ }
1899
+
1900
+ async aggregate(args: { stream: string; request: AggregateRequest }): Promise<AggregateResultBatch> {
1901
+ const res = await this.aggregateResult(args);
1902
+ if (Result.isError(res)) throw dsError(res.error.message);
1903
+ return res.value;
1904
+ }
1905
+
1906
+ private async scanSegmentReverseForSearchResult(
1907
+ stream: string,
1908
+ seg: SegmentRow,
1909
+ exactCandidateInfo: SegmentCandidateInfo,
1910
+ cursorFieldBound: SearchCursorFieldBound | null,
1911
+ columnClauses: SearchColumnClause[],
1912
+ ftsClauses: SearchFtsClause[],
1913
+ rangeStartSeq: bigint,
1914
+ rangeEndSeq: bigint,
1915
+ state: {
1916
+ indexFamiliesUsed: Set<string>;
1917
+ collectSearchMatchResult: (offsetSeq: bigint, payload: Uint8Array) => Result<void, ReaderError>;
1918
+ deadline: number | null;
1919
+ isTimedOut: () => boolean;
1920
+ setTimedOut: (next: boolean) => void;
1921
+ stopIfPageComplete: () => boolean;
1922
+ addIndexedSegment: () => void;
1923
+ addScannedSegment: () => void;
1924
+ addIndexedSegmentTimeMs: (deltaMs: number) => void;
1925
+ addFtsSectionGetMs: (deltaMs: number) => void;
1926
+ addFtsDecodeMs: (deltaMs: number) => void;
1927
+ addFtsClauseEstimateMs: (deltaMs: number) => void;
1928
+ addScannedSegmentTimeMs: (deltaMs: number) => void;
1929
+ }
1930
+ ): Promise<Result<void, ReaderError>> {
1931
+ const segmentStartedAt = Date.now();
1932
+ const markTimedOutIfNeeded = (): boolean => {
1933
+ if (state.deadline == null || Date.now() < state.deadline) return false;
1934
+ state.setTimedOut(true);
1935
+ return true;
1936
+ };
1937
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1938
+ if (
1939
+ exactCandidateInfo.segments &&
1940
+ seg.segment_index < exactCandidateInfo.indexedThrough &&
1941
+ !exactCandidateInfo.segments.has(seg.segment_index)
1942
+ ) {
1943
+ return Result.ok(undefined);
1944
+ }
1945
+ if (cursorFieldBound) {
1946
+ const overlapsCursor = await this.segmentMayOverlapSearchCursor(stream, seg.segment_index, cursorFieldBound);
1947
+ if (!overlapsCursor) {
1948
+ state.indexFamiliesUsed.add("col");
1949
+ state.addIndexedSegment();
1950
+ state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
1951
+ return Result.ok(undefined);
1952
+ }
1953
+ }
1954
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1955
+
1956
+ const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult(
1957
+ stream,
1958
+ seg.segment_index,
1959
+ columnClauses,
1960
+ ftsClauses,
1961
+ {
1962
+ addFtsSectionGetMs: state.addFtsSectionGetMs,
1963
+ addFtsDecodeMs: state.addFtsDecodeMs,
1964
+ addFtsClauseEstimateMs: state.addFtsClauseEstimateMs,
1965
+ }
1966
+ );
1967
+ if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message });
1968
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1969
+ const familyCandidates = familyCandidatesRes.value;
1970
+ if (familyCandidates.docIds && familyCandidates.docIds.size === 0) {
1971
+ if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegment();
1972
+ for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family);
1973
+ if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
1974
+ return Result.ok(undefined);
1975
+ }
1976
+ const usedIndexedFamilies = familyCandidates.usedFamilies.size > 0;
1977
+ if (familyCandidates.usedFamilies.size > 0) {
1978
+ state.addIndexedSegment();
1979
+ for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family);
1980
+ } else {
1981
+ state.addScannedSegment();
1982
+ }
1983
+
1984
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1985
+ const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
1986
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1987
+ const decodedBlocks: Array<{ records: Array<{ payload: Uint8Array }> }> = [];
1988
+ for (const blockRes of iterateBlocksResult(segBytes)) {
1989
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
1990
+ decodedBlocks.push({ records: blockRes.value.decoded.records });
1991
+ if (markTimedOutIfNeeded()) {
1992
+ if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
1993
+ else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
1994
+ return Result.ok(undefined);
1995
+ }
1996
+ }
1997
+
1998
+ let blockEndOffset = seg.end_offset;
1999
+ for (let blockIndex = decodedBlocks.length - 1; blockIndex >= 0; blockIndex--) {
2000
+ const decoded = decodedBlocks[blockIndex]!;
2001
+ const blockStartOffset = blockEndOffset - BigInt(decoded.records.length) + 1n;
2002
+ for (let recordIndex = decoded.records.length - 1; recordIndex >= 0; recordIndex--) {
2003
+ const offsetSeq = blockStartOffset + BigInt(recordIndex);
2004
+ if (offsetSeq > rangeEndSeq) continue;
2005
+ if (offsetSeq < rangeStartSeq) {
2006
+ if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2007
+ else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
2008
+ return Result.ok(undefined);
2009
+ }
2010
+ const localDocId = Number(offsetSeq - seg.start_offset);
2011
+ if (!familyCandidates.docIds || familyCandidates.docIds.has(localDocId)) {
2012
+ const matchRes = state.collectSearchMatchResult(offsetSeq, decoded.records[recordIndex]!.payload);
2013
+ if (Result.isError(matchRes)) return matchRes;
2014
+ }
2015
+ if (markTimedOutIfNeeded()) {
2016
+ if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2017
+ else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
2018
+ return Result.ok(undefined);
2019
+ }
2020
+ if (state.stopIfPageComplete()) {
2021
+ if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2022
+ else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
2023
+ return Result.ok(undefined);
2024
+ }
2025
+ }
2026
+ blockEndOffset = blockStartOffset - 1n;
2027
+ }
2028
+
2029
+ if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2030
+ else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
2031
+ return Result.ok(undefined);
2032
+ }
2033
+
2034
+ private async segmentMayOverlapSearchCursor(
2035
+ stream: string,
2036
+ segmentIndex: number,
2037
+ bound: SearchCursorFieldBound
2038
+ ): Promise<boolean> {
2039
+ if (!this.index || bound.encoded == null) return true;
2040
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2041
+ if (!companion) return true;
2042
+
2043
+ if (companion.primaryTimestampField === bound.sort.field && companion.minTimestampMs() != null && companion.maxTimestampMs() != null) {
2044
+ const target = bound.after;
2045
+ if (typeof target !== "bigint") return true;
2046
+ const minMs = companion.minTimestampMs()!;
2047
+ const maxMs = companion.maxTimestampMs()!;
2048
+ return bound.sort.direction === "desc" ? minMs <= target : maxMs >= target;
2049
+ }
2050
+
2051
+ const field = companion.getField(bound.sort.field);
2052
+ if (!field) return true;
2053
+ const minValue = field.minValue();
2054
+ const maxValue = field.maxValue();
2055
+ if (minValue == null || maxValue == null) return true;
2056
+ const boundValue = bound.after;
2057
+ const cmpMin = compareComparableValues(minValue, boundValue);
2058
+ const cmpMax = compareComparableValues(maxValue, boundValue);
2059
+ return bound.sort.direction === "desc" ? cmpMin <= 0 : cmpMax >= 0;
2060
+ }
2061
+
2062
+ private async segmentMayOverlapTimeRange(
2063
+ stream: string,
2064
+ segmentIndex: number,
2065
+ startMs: number,
2066
+ endMs: number,
2067
+ timestampField: string
2068
+ ): Promise<boolean> {
2069
+ if (!this.index) return true;
2070
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2071
+ if (companion && companion.primaryTimestampField === timestampField) {
2072
+ const minMs = companion.minTimestampMs() == null ? null : Number(companion.minTimestampMs());
2073
+ const maxMs = companion.maxTimestampMs() == null ? null : Number(companion.maxTimestampMs());
2074
+ if (Number.isFinite(minMs) && Number.isFinite(maxMs)) {
2075
+ return (maxMs as number) >= startMs && (minMs as number) < endMs;
2076
+ }
2077
+ }
2078
+ const metricsBlock = await this.index.getMetricsBlockSegmentCompanion(stream, segmentIndex);
2079
+ if (!metricsBlock) return true;
2080
+ const minMs = metricsBlock.minWindowStartMs;
2081
+ const maxMs = metricsBlock.maxWindowEndMs;
2082
+ if (!Number.isFinite(minMs) || !Number.isFinite(maxMs)) return true;
2083
+ return (maxMs as number) >= startMs && (minMs as number) < endMs;
2084
+ }
2085
+
2086
+ private async resolveCandidateSegments(
2087
+ stream: string,
2088
+ keyBytes: Uint8Array | null,
2089
+ filter: CompiledReadFilter | null
2090
+ ): Promise<SegmentCandidateInfo> {
2091
+ if (!this.index) return { segments: null, indexedThrough: 0 };
2092
+
2093
+ const candidates: IndexCandidate[] = [];
2094
+ if (keyBytes) {
2095
+ const keyCandidate = await this.index.candidateSegmentsForRoutingKey(stream, keyBytes);
2096
+ if (keyCandidate) candidates.push(keyCandidate);
2097
+ }
2098
+ if (filter) {
2099
+ for (const clause of collectPositiveExactFilterClauses(filter)) {
2100
+ const filterCandidate = await this.index.candidateSegmentsForSecondaryIndex(
2101
+ stream,
2102
+ clause.field,
2103
+ utf8Bytes(clause.canonicalValue)
2104
+ );
2105
+ if (filterCandidate) candidates.push(filterCandidate);
2106
+ }
2107
+ }
2108
+ if (candidates.length === 0) return { segments: null, indexedThrough: 0 };
2109
+
2110
+ const indexedThrough = candidates.reduce((min, candidate) => Math.min(min, candidate.indexedThrough), Number.MAX_SAFE_INTEGER);
2111
+ if (!Number.isFinite(indexedThrough) || indexedThrough <= 0) {
2112
+ return { segments: null, indexedThrough: 0 };
2113
+ }
2114
+
2115
+ let intersection: Set<number> | null = null;
2116
+ for (const candidate of candidates) {
2117
+ const covered = new Set<number>();
2118
+ for (const segmentIndex of candidate.segments) {
2119
+ if (segmentIndex < indexedThrough) covered.add(segmentIndex);
2120
+ }
2121
+ if (intersection == null) {
2122
+ intersection = covered;
2123
+ continue;
2124
+ }
2125
+ for (const segmentIndex of Array.from(intersection)) {
2126
+ if (!covered.has(segmentIndex)) intersection.delete(segmentIndex);
2127
+ }
2128
+ }
2129
+ return { segments: intersection ?? new Set<number>(), indexedThrough };
2130
+ }
2131
+
2132
+ private async resolveSearchExactCandidateSegments(stream: string, query: CompiledSearchQuery): Promise<SegmentCandidateInfo> {
2133
+ if (!this.index) return { segments: null, indexedThrough: 0 };
2134
+ const clauses = collectPositiveSearchExactClauses(query);
2135
+ if (clauses.length === 0) return { segments: null, indexedThrough: 0 };
2136
+
2137
+ const candidates: IndexCandidate[] = [];
2138
+ for (const clause of clauses) {
2139
+ const candidate = await this.index.candidateSegmentsForSecondaryIndex(stream, clause.field, utf8Bytes(clause.canonicalValue));
2140
+ if (candidate) candidates.push(candidate);
2141
+ }
2142
+ if (candidates.length === 0) return { segments: null, indexedThrough: 0 };
2143
+
2144
+ const indexedThrough = candidates.reduce((min, candidate) => Math.min(min, candidate.indexedThrough), Number.MAX_SAFE_INTEGER);
2145
+ if (!Number.isFinite(indexedThrough) || indexedThrough <= 0) return { segments: null, indexedThrough: 0 };
2146
+
2147
+ let intersection: Set<number> | null = null;
2148
+ for (const candidate of candidates) {
2149
+ const covered = new Set<number>();
2150
+ for (const segmentIndex of candidate.segments) {
2151
+ if (segmentIndex < indexedThrough) covered.add(segmentIndex);
2152
+ }
2153
+ if (intersection == null) {
2154
+ intersection = covered;
2155
+ continue;
2156
+ }
2157
+ for (const segmentIndex of Array.from(intersection)) {
2158
+ if (!covered.has(segmentIndex)) intersection.delete(segmentIndex);
2159
+ }
2160
+ }
2161
+ return { segments: intersection ?? new Set<number>(), indexedThrough };
2162
+ }
2163
+
2164
+ private async resolveColumnCandidateDocIdsResult(
2165
+ stream: string,
2166
+ segmentIndex: number,
2167
+ clauses: ReadFilterColumnClause[]
2168
+ ): Promise<Result<Set<number> | null, { message: string }>> {
2169
+ if (!this.index || clauses.length === 0) return Result.ok(null);
2170
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2171
+ if (!companion) return Result.ok(null);
2172
+
2173
+ let intersection: Set<number> | null = null;
2174
+ for (const clause of clauses) {
2175
+ const clauseRes = filterDocIdsByColumnResult({
2176
+ companion,
2177
+ field: clause.field,
2178
+ op: clause.op,
2179
+ value: clause.compareValue,
2180
+ });
2181
+ if (Result.isError(clauseRes)) return Result.ok(null);
2182
+ if (intersection == null) {
2183
+ intersection = clauseRes.value;
2184
+ continue;
2185
+ }
2186
+ for (const docId of Array.from(intersection)) {
2187
+ if (!clauseRes.value.has(docId)) intersection.delete(docId);
2188
+ }
2189
+ if (intersection.size === 0) break;
2190
+ }
2191
+ return Result.ok(intersection ?? new Set<number>());
2192
+ }
2193
+
2194
+ private async resolveSearchColumnCandidateDocIdsResult(
2195
+ stream: string,
2196
+ segmentIndex: number,
2197
+ clauses: SearchColumnClause[]
2198
+ ): Promise<Result<Set<number> | null, { message: string }>> {
2199
+ if (!this.index || clauses.length === 0) return Result.ok(null);
2200
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2201
+ if (!companion) return Result.ok(null);
2202
+
2203
+ let intersection: Set<number> | null = null;
2204
+ for (const clause of clauses) {
2205
+ const clauseRes = filterDocIdsByColumnResult({
2206
+ companion,
2207
+ field: clause.field,
2208
+ op: clause.op,
2209
+ value: clause.compareValue,
2210
+ });
2211
+ if (Result.isError(clauseRes)) return Result.ok(null);
2212
+ if (intersection == null) {
2213
+ intersection = clauseRes.value;
2214
+ continue;
2215
+ }
2216
+ for (const docId of Array.from(intersection)) {
2217
+ if (!clauseRes.value.has(docId)) intersection.delete(docId);
2218
+ }
2219
+ if (intersection.size === 0) break;
2220
+ }
2221
+ return Result.ok(intersection ?? new Set<number>());
2222
+ }
2223
+
2224
+ private async resolveSearchFtsCandidateDocIdsResult(
2225
+ stream: string,
2226
+ segmentIndex: number,
2227
+ clauses: SearchFtsClause[],
2228
+ stats?: {
2229
+ addFtsSectionGetMs?: (deltaMs: number) => void;
2230
+ addFtsDecodeMs?: (deltaMs: number) => void;
2231
+ addFtsClauseEstimateMs?: (deltaMs: number) => void;
2232
+ }
2233
+ ): Promise<Result<Set<number> | null, { message: string }>> {
2234
+ if (!this.index || clauses.length === 0) return Result.ok(null);
2235
+ const companionRes = this.index.getFtsSegmentCompanionWithStats
2236
+ ? await this.index.getFtsSegmentCompanionWithStats(stream, segmentIndex)
2237
+ : { companion: await this.index.getFtsSegmentCompanion(stream, segmentIndex), stats: { sectionGetMs: 0, decodeMs: 0 } };
2238
+ stats?.addFtsSectionGetMs?.(companionRes.stats.sectionGetMs);
2239
+ stats?.addFtsDecodeMs?.(companionRes.stats.decodeMs);
2240
+ const companion = companionRes.companion;
2241
+ if (!companion) return Result.ok(null);
2242
+ const clausesRes = filterDocIdsByFtsClausesResult({
2243
+ companion,
2244
+ clauses,
2245
+ onEstimateMs: (deltaMs) => {
2246
+ stats?.addFtsClauseEstimateMs?.(deltaMs);
2247
+ },
2248
+ });
2249
+ if (Result.isError(clausesRes)) return clausesRes;
2250
+ return Result.ok(clausesRes.value);
2251
+ }
2252
+
2253
+ private async resolveSearchFamilyCandidatesResult(
2254
+ stream: string,
2255
+ segmentIndex: number,
2256
+ columnClauses: SearchColumnClause[],
2257
+ ftsClauses: SearchFtsClause[],
2258
+ stats?: {
2259
+ addFtsSectionGetMs?: (deltaMs: number) => void;
2260
+ addFtsDecodeMs?: (deltaMs: number) => void;
2261
+ addFtsClauseEstimateMs?: (deltaMs: number) => void;
2262
+ }
2263
+ ): Promise<Result<SearchFamilyCandidateInfo, { message: string }>> {
2264
+ let intersection: Set<number> | null = null;
2265
+ const usedFamilies = new Set<string>();
2266
+
2267
+ if (columnClauses.length > 0) {
2268
+ const columnRes = await this.resolveSearchColumnCandidateDocIdsResult(stream, segmentIndex, columnClauses);
2269
+ if (Result.isError(columnRes)) return columnRes;
2270
+ if (columnRes.value) {
2271
+ intersection = columnRes.value;
2272
+ usedFamilies.add("col");
2273
+ }
2274
+ }
2275
+
2276
+ if (ftsClauses.length > 0) {
2277
+ const ftsRes = await this.resolveSearchFtsCandidateDocIdsResult(stream, segmentIndex, ftsClauses, stats);
2278
+ if (Result.isError(ftsRes)) return ftsRes;
2279
+ if (ftsRes.value) {
2280
+ if (intersection == null) intersection = ftsRes.value;
2281
+ else {
2282
+ for (const docId of Array.from(intersection)) {
2283
+ if (!ftsRes.value.has(docId)) intersection.delete(docId);
2284
+ }
2285
+ }
2286
+ usedFamilies.add("fts");
2287
+ }
2288
+ }
2289
+
2290
+ return Result.ok({ docIds: intersection, usedFamilies });
2291
+ }
448
2292
  }
449
2293
 
450
2294
  function bytesEqual(a: Uint8Array, b: Uint8Array): boolean {
@@ -452,3 +2296,146 @@ function bytesEqual(a: Uint8Array, b: Uint8Array): boolean {
452
2296
  for (let i = 0; i < a.byteLength; i++) if (a[i] !== b[i]) return false;
453
2297
  return true;
454
2298
  }
2299
+
2300
+ function buildSearchSortInternalValues(
2301
+ sorts: SearchSortSpec[],
2302
+ fields: Record<string, unknown>,
2303
+ evaluation: SearchEvaluation,
2304
+ offsetSeq: bigint
2305
+ ): Array<bigint | number | string | boolean | null> {
2306
+ return sorts.map((sort) => {
2307
+ if (sort.kind === "score") return evaluation.score;
2308
+ if (sort.kind === "offset") return offsetSeq;
2309
+ const rawValue = fields[sort.field];
2310
+ const scalar = Array.isArray(rawValue) ? rawValue[0] : rawValue;
2311
+ if (scalar == null) return null;
2312
+ if (sort.config.kind === "integer" || sort.config.kind === "float" || sort.config.kind === "date" || sort.config.kind === "bool") {
2313
+ return canonicalizeColumnValue(sort.config, scalar);
2314
+ }
2315
+ return canonicalizeExactValue(sort.config, scalar);
2316
+ });
2317
+ }
2318
+
2319
+ function buildSearchSortResponseValues(
2320
+ sorts: SearchSortSpec[],
2321
+ sortInternal: Array<bigint | number | string | boolean | null>,
2322
+ offset: string
2323
+ ): unknown[] {
2324
+ return sorts.map((sort, index) => {
2325
+ const value = sortInternal[index];
2326
+ if (sort.kind === "offset") return offset;
2327
+ if (typeof value === "bigint") return Number(value);
2328
+ return value;
2329
+ });
2330
+ }
2331
+
2332
+ function compareComparableValues(left: bigint | number | string | boolean | null, right: bigint | number | string | boolean | null): number {
2333
+ if (left == null && right == null) return 0;
2334
+ if (left == null) return 1;
2335
+ if (right == null) return -1;
2336
+ if (typeof left === "bigint" && typeof right === "bigint") return left < right ? -1 : left > right ? 1 : 0;
2337
+ if (typeof left === "number" && typeof right === "number") return left < right ? -1 : left > right ? 1 : 0;
2338
+ if (typeof left === "boolean" && typeof right === "boolean") return left === right ? 0 : left ? 1 : -1;
2339
+ const ls = String(left);
2340
+ const rs = String(right);
2341
+ return ls < rs ? -1 : ls > rs ? 1 : 0;
2342
+ }
2343
+
2344
+ function compareSearchHits(left: SearchHitInternal, right: SearchHitInternal, sorts: SearchSortSpec[]): number {
2345
+ for (let i = 0; i < sorts.length; i++) {
2346
+ const cmp = compareComparableValues(left.sortInternal[i] ?? null, right.sortInternal[i] ?? null);
2347
+ if (cmp === 0) continue;
2348
+ return sorts[i].direction === "asc" ? cmp : -cmp;
2349
+ }
2350
+ return 0;
2351
+ }
2352
+
2353
+ function compareSearchAfterValues(
2354
+ sortInternal: Array<bigint | number | string | boolean | null>,
2355
+ sorts: SearchSortSpec[],
2356
+ searchAfter: unknown[]
2357
+ ): number {
2358
+ for (let i = 0; i < sorts.length; i++) {
2359
+ const after = normalizeSearchAfterValue(sorts[i], searchAfter[i]);
2360
+ const cmp = compareComparableValues(sortInternal[i] ?? null, after);
2361
+ if (cmp === 0) continue;
2362
+ return sorts[i].direction === "asc" ? cmp : -cmp;
2363
+ }
2364
+ return 0;
2365
+ }
2366
+
2367
+ function compareEncodedValues(left: Uint8Array, right: Uint8Array): number {
2368
+ const length = Math.min(left.byteLength, right.byteLength);
2369
+ for (let i = 0; i < length; i++) {
2370
+ if (left[i] === right[i]) continue;
2371
+ return left[i]! < right[i]! ? -1 : 1;
2372
+ }
2373
+ if (left.byteLength === right.byteLength) return 0;
2374
+ return left.byteLength < right.byteLength ? -1 : 1;
2375
+ }
2376
+
2377
+ function encodeSearchCursorValue(sort: Extract<SearchSortSpec, { kind: "field" }>, value: bigint | number | string | boolean | null): Uint8Array | null {
2378
+ if (value == null) return null;
2379
+ if (sort.config.kind === "integer" || sort.config.kind === "date") {
2380
+ return typeof value === "bigint" ? encodeSortableInt64(value) : null;
2381
+ }
2382
+ if (sort.config.kind === "float") {
2383
+ return typeof value === "number" ? encodeSortableFloat64(value) : null;
2384
+ }
2385
+ if (sort.config.kind === "bool") {
2386
+ return typeof value === "boolean" ? encodeSortableBool(value) : null;
2387
+ }
2388
+ return null;
2389
+ }
2390
+
2391
+ function resolveSearchCursorFieldBound(request: SearchRequest): SearchCursorFieldBound | null {
2392
+ if (!request.searchAfter || request.searchAfter.length === 0) return null;
2393
+ const leadingSort = request.sort[0];
2394
+ if (!leadingSort || leadingSort.kind !== "field") return null;
2395
+ if (
2396
+ leadingSort.config.kind !== "integer" &&
2397
+ leadingSort.config.kind !== "float" &&
2398
+ leadingSort.config.kind !== "date" &&
2399
+ leadingSort.config.kind !== "bool"
2400
+ ) {
2401
+ return null;
2402
+ }
2403
+ const after = normalizeSearchAfterValue(leadingSort, request.searchAfter[0]);
2404
+ return {
2405
+ kind: "field",
2406
+ sort: leadingSort,
2407
+ after,
2408
+ encoded: encodeSearchCursorValue(leadingSort, after),
2409
+ };
2410
+ }
2411
+
2412
+ function normalizeSearchAfterValue(sort: SearchSortSpec, raw: unknown): bigint | number | string | boolean | null {
2413
+ if (raw == null) return null;
2414
+ if (sort.kind === "offset") {
2415
+ if (typeof raw !== "string") return null;
2416
+ const parsed = parseOffsetResult(raw);
2417
+ if (Result.isError(parsed)) return null;
2418
+ return offsetToSeqOrNeg1(parsed.value);
2419
+ }
2420
+ if (sort.kind === "score") {
2421
+ return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
2422
+ }
2423
+ if (sort.config.kind === "integer" || sort.config.kind === "date") {
2424
+ if (typeof raw === "number" && Number.isFinite(raw)) return BigInt(Math.trunc(raw));
2425
+ if (typeof raw === "string" && raw.trim() !== "") {
2426
+ try {
2427
+ return BigInt(raw.trim());
2428
+ } catch {
2429
+ return null;
2430
+ }
2431
+ }
2432
+ return null;
2433
+ }
2434
+ if (sort.config.kind === "float") return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
2435
+ if (sort.config.kind === "bool") return typeof raw === "boolean" ? raw : null;
2436
+ return typeof raw === "string" ? raw : null;
2437
+ }
2438
+
2439
+ function compareSearchAfter(hit: SearchHitInternal, sorts: SearchSortSpec[], searchAfter: unknown[]): number {
2440
+ return compareSearchAfterValues(hit.sortInternal, sorts, searchAfter);
2441
+ }