@tungthedev/streams-server 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/CODE_OF_CONDUCT.md +45 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/LICENSE +201 -0
  4. package/README.md +58 -0
  5. package/SECURITY.md +42 -0
  6. package/bin/prisma-streams-server +2 -0
  7. package/package.json +46 -0
  8. package/src/app.ts +583 -0
  9. package/src/app_core.ts +3144 -0
  10. package/src/app_local.ts +206 -0
  11. package/src/auth.ts +124 -0
  12. package/src/auto_tune.ts +69 -0
  13. package/src/backpressure.ts +66 -0
  14. package/src/bootstrap.ts +613 -0
  15. package/src/compute/demo_entry.ts +415 -0
  16. package/src/compute/demo_site.ts +1242 -0
  17. package/src/compute/entry.ts +19 -0
  18. package/src/compute/package_entry.ts +4 -0
  19. package/src/compute/virtual-modules.d.ts +15 -0
  20. package/src/compute/worker_module_url.ts +9 -0
  21. package/src/concurrency_gate.ts +108 -0
  22. package/src/config.ts +402 -0
  23. package/src/db/bootstrap_store.ts +9 -0
  24. package/src/db/db.ts +2424 -0
  25. package/src/db/schema.ts +925 -0
  26. package/src/db/sqlite_manifest_snapshot.ts +81 -0
  27. package/src/db/sqlite_touch_store.ts +491 -0
  28. package/src/db/sqlite_wal_store.ts +472 -0
  29. package/src/details/full_mode_details.ts +568 -0
  30. package/src/expiry_sweeper.ts +47 -0
  31. package/src/foreground_activity.ts +55 -0
  32. package/src/hist.ts +169 -0
  33. package/src/index/binary_fuse.ts +379 -0
  34. package/src/index/indexer.ts +947 -0
  35. package/src/index/lexicon_file_cache.ts +261 -0
  36. package/src/index/lexicon_format.ts +93 -0
  37. package/src/index/lexicon_indexer.ts +863 -0
  38. package/src/index/run_cache.ts +84 -0
  39. package/src/index/run_format.ts +213 -0
  40. package/src/index/schedule.ts +28 -0
  41. package/src/index/secondary_indexer.ts +901 -0
  42. package/src/index/secondary_schema.ts +105 -0
  43. package/src/ingest.ts +309 -0
  44. package/src/lens/lens.ts +501 -0
  45. package/src/manifest.ts +249 -0
  46. package/src/memory.ts +334 -0
  47. package/src/metrics.ts +147 -0
  48. package/src/metrics_emitter.ts +83 -0
  49. package/src/notifier.ts +180 -0
  50. package/src/objectstore/accounting.ts +151 -0
  51. package/src/objectstore/interface.ts +13 -0
  52. package/src/objectstore/mock_r2.ts +269 -0
  53. package/src/objectstore/null.ts +32 -0
  54. package/src/objectstore/r2.ts +318 -0
  55. package/src/observe/pairing.ts +61 -0
  56. package/src/observe/request.ts +772 -0
  57. package/src/offset.ts +70 -0
  58. package/src/postgres/bootstrap.ts +269 -0
  59. package/src/postgres/companions.ts +197 -0
  60. package/src/postgres/control_restore.ts +109 -0
  61. package/src/postgres/details.ts +189 -0
  62. package/src/postgres/lexicon_index.ts +260 -0
  63. package/src/postgres/routing_index.ts +189 -0
  64. package/src/postgres/rows.ts +132 -0
  65. package/src/postgres/schema.ts +355 -0
  66. package/src/postgres/secondary_index.ts +238 -0
  67. package/src/postgres/segments.ts +900 -0
  68. package/src/postgres/stats.ts +103 -0
  69. package/src/postgres/store.ts +947 -0
  70. package/src/postgres/touch.ts +591 -0
  71. package/src/postgres/types.ts +32 -0
  72. package/src/profiles/evlog/schema.ts +234 -0
  73. package/src/profiles/evlog.ts +473 -0
  74. package/src/profiles/generic.ts +51 -0
  75. package/src/profiles/index.ts +237 -0
  76. package/src/profiles/metrics/block_format.ts +109 -0
  77. package/src/profiles/metrics/normalize.ts +366 -0
  78. package/src/profiles/metrics/schema.ts +319 -0
  79. package/src/profiles/metrics.ts +83 -0
  80. package/src/profiles/otelTraces/normalize.ts +955 -0
  81. package/src/profiles/otelTraces/otlp.ts +1002 -0
  82. package/src/profiles/otelTraces/schema.ts +408 -0
  83. package/src/profiles/otelTraces.ts +390 -0
  84. package/src/profiles/profile.ts +284 -0
  85. package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
  86. package/src/profiles/stateProtocol/changes.ts +24 -0
  87. package/src/profiles/stateProtocol/ingest.ts +115 -0
  88. package/src/profiles/stateProtocol/routes.ts +511 -0
  89. package/src/profiles/stateProtocol/types.ts +6 -0
  90. package/src/profiles/stateProtocol/validation.ts +51 -0
  91. package/src/profiles/stateProtocol.ts +107 -0
  92. package/src/read_filter.ts +468 -0
  93. package/src/reader.ts +2986 -0
  94. package/src/runtime/hash.ts +156 -0
  95. package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
  96. package/src/runtime/hash_vendor/NOTICE.md +8 -0
  97. package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
  98. package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
  99. package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
  100. package/src/runtime/host_runtime.ts +5 -0
  101. package/src/runtime_memory.ts +200 -0
  102. package/src/runtime_memory_sampler.ts +237 -0
  103. package/src/schema/lens_schema.ts +290 -0
  104. package/src/schema/proof.ts +547 -0
  105. package/src/schema/read_json.ts +51 -0
  106. package/src/schema/registry.ts +966 -0
  107. package/src/search/agg_format.ts +638 -0
  108. package/src/search/aggregate.ts +409 -0
  109. package/src/search/binary/codec.ts +162 -0
  110. package/src/search/binary/docset.ts +67 -0
  111. package/src/search/binary/restart_strings.ts +181 -0
  112. package/src/search/binary/varint.ts +34 -0
  113. package/src/search/bitset.ts +19 -0
  114. package/src/search/col_format.ts +382 -0
  115. package/src/search/col_runtime.ts +59 -0
  116. package/src/search/column_encoding.ts +43 -0
  117. package/src/search/companion_file_cache.ts +319 -0
  118. package/src/search/companion_format.ts +327 -0
  119. package/src/search/companion_manager.ts +1305 -0
  120. package/src/search/companion_plan.ts +229 -0
  121. package/src/search/exact_format.ts +281 -0
  122. package/src/search/exact_runtime.ts +55 -0
  123. package/src/search/fts_format.ts +423 -0
  124. package/src/search/fts_runtime.ts +333 -0
  125. package/src/search/query.ts +875 -0
  126. package/src/search/schema.ts +245 -0
  127. package/src/segment/cache.ts +270 -0
  128. package/src/segment/cached_segment.ts +89 -0
  129. package/src/segment/format.ts +403 -0
  130. package/src/segment/segmenter.ts +412 -0
  131. package/src/segment/segmenter_worker.ts +72 -0
  132. package/src/segment/segmenter_workers.ts +130 -0
  133. package/src/server.ts +264 -0
  134. package/src/server_auto_tune.ts +158 -0
  135. package/src/sqlite/adapter.ts +335 -0
  136. package/src/sqlite/runtime_stats.ts +163 -0
  137. package/src/stats.ts +205 -0
  138. package/src/store/append.ts +50 -0
  139. package/src/store/bootstrap_restore_store.ts +71 -0
  140. package/src/store/capabilities.ts +86 -0
  141. package/src/store/full_mode_details_store.ts +71 -0
  142. package/src/store/index_store.ts +104 -0
  143. package/src/store/profile_touch_store.ts +1 -0
  144. package/src/store/rows.ts +144 -0
  145. package/src/store/schema_profile_store.ts +73 -0
  146. package/src/store/schema_publication.ts +6 -0
  147. package/src/store/segment_manifest_store.ts +129 -0
  148. package/src/store/segment_read_store.ts +22 -0
  149. package/src/store/stats_accounting_store.ts +83 -0
  150. package/src/store/touch_store.ts +98 -0
  151. package/src/store/wal_store.ts +21 -0
  152. package/src/stream_size_reconciler.ts +100 -0
  153. package/src/touch/canonical_change.ts +7 -0
  154. package/src/touch/live_keys.ts +158 -0
  155. package/src/touch/live_metrics.ts +841 -0
  156. package/src/touch/live_templates.ts +449 -0
  157. package/src/touch/manager.ts +1292 -0
  158. package/src/touch/process_batch.ts +576 -0
  159. package/src/touch/processor_worker.ts +85 -0
  160. package/src/touch/spec.ts +459 -0
  161. package/src/touch/touch_journal.ts +771 -0
  162. package/src/touch/touch_key_id.ts +20 -0
  163. package/src/touch/worker_pool.ts +191 -0
  164. package/src/touch/worker_protocol.ts +57 -0
  165. package/src/types/proper-lockfile.d.ts +1 -0
  166. package/src/uploader.ts +358 -0
  167. package/src/util/base32_crockford.ts +81 -0
  168. package/src/util/bloom256.ts +67 -0
  169. package/src/util/byte_lru.ts +73 -0
  170. package/src/util/cleanup.ts +22 -0
  171. package/src/util/crc32c.ts +29 -0
  172. package/src/util/ds_error.ts +15 -0
  173. package/src/util/duration.ts +17 -0
  174. package/src/util/endian.ts +53 -0
  175. package/src/util/json_pointer.ts +148 -0
  176. package/src/util/log.ts +25 -0
  177. package/src/util/lru.ts +53 -0
  178. package/src/util/retry.ts +35 -0
  179. package/src/util/siphash.ts +71 -0
  180. package/src/util/stream_paths.ts +50 -0
  181. package/src/util/time.ts +14 -0
  182. package/src/util/yield.ts +3 -0
  183. package/src/util/zstd.ts +24 -0
package/src/reader.ts ADDED
@@ -0,0 +1,2986 @@
1
+ import type { Config } from "./config";
2
+ import type { ObjectStore } from "./objectstore/interface";
3
+ import type {
4
+ SearchSegmentCompanionReadRow as SearchSegmentCompanionRow,
5
+ SegmentReadRow as SegmentRow,
6
+ SegmentReadStore,
7
+ StreamReadStore,
8
+ } from "./store/segment_read_store";
9
+ import type { WalReadStore } from "./store/wal_store";
10
+ import {
11
+ type CompiledReadFilter,
12
+ type ReadFilterColumnClause,
13
+ collectPositiveColumnFilterClauses,
14
+ collectPositiveExactFilterClauses,
15
+ evaluateReadFilterResult,
16
+ } from "./read_filter";
17
+ import { decodeJsonPayloadWithRegistryResult } from "./schema/read_json";
18
+ import { SchemaRegistryStore } from "./schema/registry";
19
+ import { parseOffsetResult, offsetToSeqOrNeg1, encodeOffset } from "./offset";
20
+ import {
21
+ type BlockIndexEntry,
22
+ decodeBlockResult,
23
+ iterateBlocksResult,
24
+ parseBlockHeaderResult,
25
+ parseFooter,
26
+ parseFooterBytes,
27
+ DSB3_HEADER_BYTES,
28
+ } from "./segment/format";
29
+ import { SegmentDiskCache, type SegmentCacheStats } from "./segment/cache";
30
+ import { loadSegmentBytesCached, loadSegmentSource, readRangeFromSource, type SegmentReadSource } from "./segment/cached_segment";
31
+ import { Bloom256 } from "./util/bloom256";
32
+ import { readU32BE } from "./util/endian";
33
+ import { type RetryOptions } from "./util/retry";
34
+ import { retry } from "./util/retry";
35
+ import type { IndexCandidate, StreamIndexLookup } from "./index/indexer";
36
+ import { segmentObjectKey, streamHash16Hex } from "./util/stream_paths";
37
+ import { dsError } from "./util/ds_error.ts";
38
+ import { Result } from "better-result";
39
+ import { filterDocIdsByColumnResult } from "./search/col_runtime";
40
+ import { filterDocIdsByExactClausesResult } from "./search/exact_runtime";
41
+ import {
42
+ type AggregateRequest,
43
+ cloneAggMeasureState,
44
+ extractRollupContributionResult,
45
+ extractRollupEligibility,
46
+ formatAggMeasureState,
47
+ mergeAggMeasureState,
48
+ } from "./search/aggregate";
49
+ import {
50
+ type CompiledSearchQuery,
51
+ type SearchColumnClause,
52
+ type SearchEvaluation,
53
+ type SearchExactClause,
54
+ type SearchFtsClause,
55
+ type SearchRequest,
56
+ type SearchSortSpec,
57
+ buildSearchDocumentResult,
58
+ collectPositiveSearchColumnClauses,
59
+ collectPositiveSearchExactClauses,
60
+ collectPositiveSearchFtsClauses,
61
+ evaluateSearchQueryResult,
62
+ extractSearchHitFieldsResult,
63
+ } from "./search/query";
64
+ import { filterDocIdsByFtsClausesResult } from "./search/fts_runtime";
65
+ import { canonicalizeColumnValue, canonicalizeExactValue } from "./search/schema";
66
+ import { encodeSortableBool, encodeSortableFloat64, encodeSortableInt64 } from "./search/column_encoding";
67
+ import type { SchemaRegistry, SearchRollupConfig } from "./schema/registry";
68
+ import type { AggMeasureState } from "./search/agg_format";
69
+ import type { MetricsBlockSectionView } from "./profiles/metrics/block_format";
70
+ import { materializeMetricsBlockRecord } from "./profiles/metrics/normalize";
71
+ import { buildDesiredSearchCompanionPlan, hashSearchCompanionPlan } from "./search/companion_plan";
72
+ import { RuntimeMemorySampler } from "./runtime_memory_sampler";
73
+ import type { MemoryPressureMonitor } from "./memory";
74
+
75
+ export type ReadFormat = "raw" | "json";
76
+
77
+ export type ReadBatch = {
78
+ stream: string;
79
+ format: ReadFormat;
80
+ key: string | null;
81
+ requestOffset: string;
82
+ endOffset: string; // checkpoint at end of stream
83
+ nextOffset: string; // checkpoint after this response
84
+ endOffsetSeq: bigint;
85
+ nextOffsetSeq: bigint;
86
+ records: Array<{ offset: bigint; payload: Uint8Array }>; // payload bytes in wire order
87
+ filterScannedBytes?: number;
88
+ filterScanLimitBytes?: number;
89
+ filterScanLimitReached?: boolean;
90
+ };
91
+
92
+ export type SearchHit = {
93
+ offset: string;
94
+ score: number;
95
+ sort: unknown[];
96
+ fields: Record<string, unknown>;
97
+ source: unknown;
98
+ };
99
+
100
+ export type SearchResultBatch = {
101
+ stream: string;
102
+ snapshotEndOffset: string;
103
+ tookMs: number;
104
+ timedOut: boolean;
105
+ timeoutMs: number | null;
106
+ coverage: {
107
+ mode: "complete" | "published";
108
+ complete: boolean;
109
+ streamHeadOffset: string;
110
+ visibleThroughOffset: string;
111
+ visibleThroughPrimaryTimestampMax: string | null;
112
+ oldestOmittedAppendAt: string | null;
113
+ possibleMissingEventsUpperBound: number;
114
+ possibleMissingUploadedSegments: number;
115
+ possibleMissingSealedRows: number;
116
+ possibleMissingWalRows: number;
117
+ indexedSegments: number;
118
+ indexedSegmentTimeMs: number;
119
+ ftsSectionGetMs: number;
120
+ ftsDecodeMs: number;
121
+ ftsClauseEstimateMs: number;
122
+ scannedSegments: number;
123
+ scannedSegmentTimeMs: number;
124
+ scannedTailDocs: number;
125
+ scannedTailTimeMs: number;
126
+ exactCandidateTimeMs: number;
127
+ candidateDocIds: number;
128
+ decodedRecords: number;
129
+ jsonParseTimeMs: number;
130
+ segmentPayloadBytesFetched: number;
131
+ sortTimeMs: number;
132
+ peakHitsHeld: number;
133
+ indexFamiliesUsed: string[];
134
+ };
135
+ total: {
136
+ value: number;
137
+ relation: "eq" | "gte";
138
+ };
139
+ hits: SearchHit[];
140
+ nextSearchAfter: unknown[] | null;
141
+ };
142
+
143
+ export type AggregateResultBatch = {
144
+ stream: string;
145
+ rollup: string;
146
+ from: string;
147
+ to: string;
148
+ interval: string;
149
+ coverage: {
150
+ mode: "complete" | "published";
151
+ complete: boolean;
152
+ streamHeadOffset: string;
153
+ visibleThroughOffset: string;
154
+ visibleThroughPrimaryTimestampMax: string | null;
155
+ oldestOmittedAppendAt: string | null;
156
+ possibleMissingEventsUpperBound: number;
157
+ possibleMissingUploadedSegments: number;
158
+ possibleMissingSealedRows: number;
159
+ possibleMissingWalRows: number;
160
+ usedRollups: boolean;
161
+ indexedSegments: number;
162
+ scannedSegments: number;
163
+ scannedTailDocs: number;
164
+ indexFamiliesUsed: string[];
165
+ };
166
+ buckets: Array<{
167
+ start: string;
168
+ end: string;
169
+ groups: Array<{
170
+ key: Record<string, string | null>;
171
+ measures: Record<string, unknown>;
172
+ }>;
173
+ }>;
174
+ };
175
+
176
+ export type ReaderError =
177
+ | { kind: "not_found"; message: string }
178
+ | { kind: "gone"; message: string }
179
+ | { kind: "invalid_offset"; message: string }
180
+ | { kind: "internal"; message: string };
181
+
182
+ const READ_FILTER_SCAN_LIMIT_BYTES = 100 * 1024 * 1024;
183
+ type SegmentCandidateInfo = { segments: Set<number> | null; indexedThrough: number };
184
+ type SearchFamilyCandidateInfo = { docIds: Set<number> | null; usedFamilies: Set<string> };
185
+ type HotWalExactCache = {
186
+ startSeq: bigint;
187
+ endSeq: bigint;
188
+ schemaKey: string;
189
+ values: Map<string, Map<string, bigint[]>>;
190
+ };
191
+ type SegmentRangeBlockReader = {
192
+ blocks: BlockIndexEntry[];
193
+ readBlock: (block: BlockIndexEntry) => Promise<Result<Uint8Array, ReaderError>>;
194
+ fetchedBytes: () => number;
195
+ };
196
+ type SearchHitInternal = {
197
+ offsetSeq: bigint;
198
+ offset: string;
199
+ score: number;
200
+ sortInternal: Array<bigint | number | string | boolean | null>;
201
+ sortResponse: unknown[];
202
+ fields: Record<string, unknown>;
203
+ source: unknown;
204
+ };
205
+ type AggregateGroupInternal = {
206
+ key: Record<string, string | null>;
207
+ measures: Record<string, AggMeasureState>;
208
+ };
209
+ type SearchCursorFieldBound = {
210
+ kind: "field";
211
+ sort: Extract<SearchSortSpec, { kind: "field" }>;
212
+ after: bigint | number | string | boolean | null;
213
+ encoded: Uint8Array | null;
214
+ };
215
+ type PublishedCoverageState = {
216
+ mode: "complete" | "published";
217
+ complete: boolean;
218
+ canSearchWalTail: boolean;
219
+ publishedSegmentCount: number;
220
+ visiblePublishedSegmentCount: number;
221
+ streamHeadOffset: string;
222
+ visibleThroughSeq: bigint;
223
+ visibleThroughOffset: string;
224
+ visibleThroughPrimaryTimestampMax: string | null;
225
+ oldestOmittedAppendAt: string | null;
226
+ possibleMissingEventsUpperBound: number;
227
+ possibleMissingUploadedSegments: number;
228
+ possibleMissingSealedRows: number;
229
+ possibleMissingWalRows: number;
230
+ };
231
+
232
+ type PlannedReadSegments = {
233
+ segments: SegmentRow[];
234
+ sealedEndSeq: bigint;
235
+ };
236
+ type PlannedReadOrder = "asc" | "desc";
237
+ type PrimaryTimestampTopKSort = Extract<SearchSortSpec, { kind: "field" }>;
238
+ type ReaderStore = StreamReadStore & WalReadStore;
239
+ export type SegmentReadBundle = {
240
+ segmentReads: SegmentReadStore;
241
+ objectStore: ObjectStore;
242
+ diskCache?: SegmentDiskCache;
243
+ index?: StreamIndexLookup;
244
+ };
245
+
246
+ function errorMessage(e: unknown): string {
247
+ return String((e as any)?.message ?? e);
248
+ }
249
+
250
+ function utf8Bytes(s: string): Uint8Array {
251
+ return new TextEncoder().encode(s);
252
+ }
253
+
254
+ function parseCompanionSections(value: string): Set<string> {
255
+ try {
256
+ const parsed = JSON.parse(value);
257
+ return new Set(Array.isArray(parsed) ? parsed.filter((entry) => typeof entry === "string") : []);
258
+ } catch {
259
+ return new Set();
260
+ }
261
+ }
262
+
263
+ async function loadSegmentBytes(
264
+ os: ObjectStore,
265
+ seg: SegmentRow,
266
+ diskCache?: SegmentDiskCache,
267
+ retryOpts?: RetryOptions
268
+ ): Promise<Uint8Array> {
269
+ return loadSegmentBytesCached(os, seg, diskCache, retryOpts);
270
+ }
271
+
272
+ function loadSegmentDataLimitFromSource(seg: SegmentRow, source: SegmentReadSource): number {
273
+ if (seg.size_bytes < 8) return seg.size_bytes;
274
+ const tail = readRangeFromSource(source, seg.size_bytes - 8, seg.size_bytes - 1);
275
+ const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
276
+ if (magic !== "DSF1") return seg.size_bytes;
277
+ const footerLen = readU32BE(tail, 0);
278
+ const footerStart = seg.size_bytes - 8 - footerLen;
279
+ return footerStart >= 0 ? footerStart : seg.size_bytes;
280
+ }
281
+
282
+ function findFirstRelevantBlockIndex(blocks: BlockIndexEntry[], seq: bigint): number {
283
+ if (blocks.length <= 1) return 0;
284
+ let lo = 0;
285
+ let hi = blocks.length - 1;
286
+ let best = 0;
287
+ while (lo <= hi) {
288
+ const mid = (lo + hi) >>> 1;
289
+ if (blocks[mid]!.firstOffset <= seq) {
290
+ best = mid;
291
+ lo = mid + 1;
292
+ } else {
293
+ hi = mid - 1;
294
+ }
295
+ }
296
+ return best;
297
+ }
298
+
299
+ function loadSegmentFooterBlocksFromSource(seg: SegmentRow, source: SegmentReadSource): BlockIndexEntry[] | null {
300
+ if (seg.size_bytes < 8) return null;
301
+ const tail = readRangeFromSource(source, seg.size_bytes - 8, seg.size_bytes - 1);
302
+ const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
303
+ if (magic !== "DSF1") return null;
304
+ const footerLen = readU32BE(tail, 0);
305
+ const footerStart = seg.size_bytes - 8 - footerLen;
306
+ if (footerStart < 0) return null;
307
+ const footerBytes = readRangeFromSource(source, footerStart, footerStart + footerLen - 1);
308
+ const footer = parseFooterBytes(footerBytes);
309
+ return footer?.blocks ?? null;
310
+ }
311
+
312
+ export class StreamReader {
313
+ private readonly config: Config;
314
+ private readonly store: ReaderStore;
315
+ private readonly segmentReads?: SegmentReadStore;
316
+ private readonly os?: ObjectStore;
317
+ private readonly registry: SchemaRegistryStore;
318
+ private readonly diskCache?: SegmentDiskCache;
319
+ private readonly index?: StreamIndexLookup;
320
+ private readonly memorySampler?: RuntimeMemorySampler;
321
+ private readonly memory?: MemoryPressureMonitor;
322
+ private readonly hotWalExact = new Map<string, HotWalExactCache>();
323
+
324
+ constructor(
325
+ config: Config,
326
+ store: ReaderStore,
327
+ registry: SchemaRegistryStore,
328
+ segmentBundle?: SegmentReadBundle,
329
+ memorySampler?: RuntimeMemorySampler,
330
+ memory?: MemoryPressureMonitor
331
+ ) {
332
+ this.config = config;
333
+ this.store = store;
334
+ this.segmentReads = segmentBundle?.segmentReads;
335
+ this.os = segmentBundle?.objectStore;
336
+ this.registry = registry;
337
+ this.diskCache = segmentBundle?.diskCache;
338
+ this.index = segmentBundle?.index;
339
+ this.memorySampler = memorySampler;
340
+ this.memory = memory;
341
+ }
342
+
343
+ private requireObjectStore(): Result<ObjectStore, ReaderError> {
344
+ return this.os ? Result.ok(this.os) : Result.err({ kind: "internal", message: "object store capability required for segment reads" });
345
+ }
346
+
347
+ private listSegmentsForStream(stream: string): Promise<SegmentRow[]> {
348
+ return this.segmentReads?.listSegmentsForRead(stream) ?? Promise.resolve([]);
349
+ }
350
+
351
+ private getSegmentByIndex(stream: string, segmentIndex: number): Promise<SegmentRow | null> {
352
+ return this.segmentReads?.getSegmentByIndexForRead(stream, segmentIndex) ?? Promise.resolve(null);
353
+ }
354
+
355
+ private findSegmentForOffset(stream: string, offset: bigint): Promise<SegmentRow | null> {
356
+ return this.segmentReads?.findSegmentForOffsetForRead(stream, offset) ?? Promise.resolve(null);
357
+ }
358
+
359
+ private countSegmentsForStream(stream: string): Promise<number> {
360
+ return this.segmentReads?.countSegmentsForRead(stream) ?? Promise.resolve(0);
361
+ }
362
+
363
+ private getSearchCompanionPlan(stream: string) {
364
+ return this.segmentReads?.getSearchCompanionPlanForRead(stream) ?? Promise.resolve(null);
365
+ }
366
+
367
+ private listSearchSegmentCompanions(stream: string) {
368
+ return this.segmentReads?.listSearchSegmentCompanionsForRead(stream) ?? Promise.resolve([]);
369
+ }
370
+
371
+ private getSearchSegmentCompanion(stream: string, segmentIndex: number) {
372
+ return this.segmentReads?.getSearchSegmentCompanionForRead(stream, segmentIndex) ?? Promise.resolve(null);
373
+ }
374
+
375
+ private missingSegmentCapabilityError(srow: { sealed_through: bigint; uploaded_through: bigint }): ReaderError | null {
376
+ if (this.segmentReads) return null;
377
+ if (srow.sealed_through < 0n && srow.uploaded_through < 0n) return null;
378
+ return { kind: "internal", message: "segment read capability required for sealed stream data" };
379
+ }
380
+
381
+ private async planSealedReadSegments(
382
+ stream: string,
383
+ startSeq: bigint,
384
+ sealedEndSeq: bigint,
385
+ candidateSegments: Set<number> | null,
386
+ indexedThrough: number,
387
+ order: PlannedReadOrder = "asc"
388
+ ): Promise<PlannedReadSegments | null> {
389
+ if (startSeq > sealedEndSeq) return { segments: [], sealedEndSeq };
390
+ if (candidateSegments == null) return null;
391
+
392
+ const startSeg = await this.findSegmentForOffset(stream, startSeq);
393
+ const endSeg = await this.findSegmentForOffset(stream, sealedEndSeq);
394
+ if (!startSeg || !endSeg) return null;
395
+
396
+ const startIndex = startSeg.segment_index;
397
+ const endIndex = endSeg.segment_index;
398
+ const plannedIndexes: number[] = [];
399
+ const seenIndexes = new Set<number>();
400
+ const indexedPrefixEnd = Math.min(endIndex, indexedThrough - 1);
401
+
402
+ if (order === "asc") {
403
+ if (startIndex <= indexedPrefixEnd) {
404
+ const sortedCandidateIndexes = Array.from(candidateSegments)
405
+ .filter((segmentIndex) => segmentIndex >= startIndex && segmentIndex <= indexedPrefixEnd)
406
+ .sort((a, b) => a - b);
407
+ for (const segmentIndex of sortedCandidateIndexes) {
408
+ if (seenIndexes.has(segmentIndex)) continue;
409
+ plannedIndexes.push(segmentIndex);
410
+ seenIndexes.add(segmentIndex);
411
+ }
412
+ }
413
+
414
+ const tailStartIndex = Math.max(startIndex, indexedThrough);
415
+ for (let segmentIndex = tailStartIndex; segmentIndex <= endIndex; segmentIndex++) {
416
+ if (seenIndexes.has(segmentIndex)) continue;
417
+ plannedIndexes.push(segmentIndex);
418
+ seenIndexes.add(segmentIndex);
419
+ }
420
+ } else {
421
+ for (let segmentIndex = endIndex; segmentIndex >= Math.max(startIndex, indexedThrough); segmentIndex--) {
422
+ if (seenIndexes.has(segmentIndex)) continue;
423
+ plannedIndexes.push(segmentIndex);
424
+ seenIndexes.add(segmentIndex);
425
+ }
426
+ if (startIndex <= indexedPrefixEnd) {
427
+ const sortedCandidateIndexes = Array.from(candidateSegments)
428
+ .filter((segmentIndex) => segmentIndex >= startIndex && segmentIndex <= indexedPrefixEnd)
429
+ .sort((a, b) => b - a);
430
+ for (const segmentIndex of sortedCandidateIndexes) {
431
+ if (seenIndexes.has(segmentIndex)) continue;
432
+ plannedIndexes.push(segmentIndex);
433
+ seenIndexes.add(segmentIndex);
434
+ }
435
+ }
436
+ }
437
+
438
+ const plannedSegments: SegmentRow[] = [];
439
+ for (const segmentIndex of plannedIndexes) {
440
+ const seg = await this.getSegmentByIndex(stream, segmentIndex);
441
+ if (!seg) return null;
442
+ plannedSegments.push(seg);
443
+ }
444
+ return { segments: plannedSegments, sealedEndSeq };
445
+ }
446
+
447
+ private async planAllSealedReadSegments(
448
+ stream: string,
449
+ startSeq: bigint,
450
+ sealedEndSeq: bigint,
451
+ order: PlannedReadOrder = "asc"
452
+ ): Promise<PlannedReadSegments | null> {
453
+ if (startSeq > sealedEndSeq) return { segments: [], sealedEndSeq };
454
+ const startSeg = await this.findSegmentForOffset(stream, startSeq);
455
+ const endSeg = await this.findSegmentForOffset(stream, sealedEndSeq);
456
+ if (!startSeg || !endSeg) return null;
457
+ const plannedSegments: SegmentRow[] = [];
458
+ if (order === "asc") {
459
+ for (let segmentIndex = startSeg.segment_index; segmentIndex <= endSeg.segment_index; segmentIndex++) {
460
+ const seg = await this.getSegmentByIndex(stream, segmentIndex);
461
+ if (!seg) return null;
462
+ plannedSegments.push(seg);
463
+ }
464
+ } else {
465
+ for (let segmentIndex = endSeg.segment_index; segmentIndex >= startSeg.segment_index; segmentIndex--) {
466
+ const seg = await this.getSegmentByIndex(stream, segmentIndex);
467
+ if (!seg) return null;
468
+ plannedSegments.push(seg);
469
+ }
470
+ }
471
+ return { segments: plannedSegments, sealedEndSeq };
472
+ }
473
+
474
+ private async currentSearchCompanionRowsBySegment(stream: string, registry: SchemaRegistry): Promise<Map<number, SearchSegmentCompanionRow>> {
475
+ const desiredPlan = buildDesiredSearchCompanionPlan(registry);
476
+ const desiredHash = hashSearchCompanionPlan(desiredPlan);
477
+ const companionPlanRow = await this.getSearchCompanionPlan(stream);
478
+ const desiredGeneration =
479
+ companionPlanRow == null
480
+ ? 1
481
+ : companionPlanRow.plan_hash === desiredHash
482
+ ? companionPlanRow.generation
483
+ : companionPlanRow.generation + 1;
484
+ const rowsBySegment = new Map<number, SearchSegmentCompanionRow>();
485
+ for (const row of await this.listSearchSegmentCompanions(stream)) {
486
+ if (row.plan_generation === desiredGeneration) rowsBySegment.set(row.segment_index, row);
487
+ }
488
+ return rowsBySegment;
489
+ }
490
+
491
+ cacheStats(): SegmentCacheStats | null {
492
+ return this.diskCache ? this.diskCache.stats() : null;
493
+ }
494
+
495
+ private retryOpts(): RetryOptions {
496
+ return {
497
+ retries: this.config.objectStoreRetries,
498
+ baseDelayMs: this.config.objectStoreBaseDelayMs,
499
+ maxDelayMs: this.config.objectStoreMaxDelayMs,
500
+ timeoutMs: this.config.objectStoreTimeoutMs,
501
+ };
502
+ }
503
+
504
+ private isoTimestampFromMs(value: bigint | null): string | null {
505
+ if (value == null) return null;
506
+ const ms = Number(value);
507
+ if (!Number.isFinite(ms) || ms < 0) return null;
508
+ return new Date(ms).toISOString();
509
+ }
510
+
511
+ private async shouldSearchWalTail(
512
+ srow: { pending_rows: bigint; pending_bytes: bigint; last_append_ms: bigint; segment_in_progress: number },
513
+ hasOutstandingPublishedSegments: boolean,
514
+ hasOutstandingCompanions: boolean
515
+ ): Promise<boolean> {
516
+ if (srow.pending_rows <= 0n) return false;
517
+ if (hasOutstandingPublishedSegments || hasOutstandingCompanions) return false;
518
+ if (srow.segment_in_progress !== 0) return false;
519
+ const quietPeriodMs = Math.max(0, this.config.searchWalOverlayQuietPeriodMs);
520
+ const quietForMs = Number(await this.store.nowMsForRead() - srow.last_append_ms);
521
+ if (!Number.isFinite(quietForMs) || quietForMs < quietPeriodMs) return false;
522
+ if (srow.pending_bytes > BigInt(this.config.searchWalOverlayMaxBytes)) return false;
523
+ if (srow.pending_rows > BigInt(this.config.segmentTargetRows)) return false;
524
+ return true;
525
+ }
526
+
527
+ private async computeOldestOmittedAppendAt(
528
+ stream: string,
529
+ srow: { uploaded_through: bigint; sealed_through: bigint; pending_rows: bigint },
530
+ visiblePublishedSegmentCount: number,
531
+ publishedSegmentCount: number,
532
+ shouldSearchWalTail: boolean
533
+ ): Promise<string | null> {
534
+ if (visiblePublishedSegmentCount < publishedSegmentCount) {
535
+ const firstOmittedSegment = await this.getSegmentByIndex(stream, visiblePublishedSegmentCount);
536
+ return this.isoTimestampFromMs(firstOmittedSegment?.last_append_ms ?? null);
537
+ }
538
+ if (srow.sealed_through > srow.uploaded_through) {
539
+ const firstSealedOmitted = await this.findSegmentForOffset(stream, srow.uploaded_through + 1n);
540
+ return this.isoTimestampFromMs(firstSealedOmitted?.last_append_ms ?? null);
541
+ }
542
+ if (srow.pending_rows > 0n && !shouldSearchWalTail) {
543
+ return this.isoTimestampFromMs(await this.store.getWalOldestTimestampMsForRead(stream));
544
+ }
545
+ return null;
546
+ }
547
+
548
+ private async computePublishedCoverageState(
549
+ stream: string,
550
+ srow: {
551
+ epoch: number;
552
+ next_offset: bigint;
553
+ sealed_through: bigint;
554
+ uploaded_through: bigint;
555
+ pending_rows: bigint;
556
+ pending_bytes: bigint;
557
+ last_append_ms: bigint;
558
+ segment_in_progress: number;
559
+ },
560
+ registry: { search?: { fields: Record<string, unknown> } }
561
+ ): Promise<PublishedCoverageState> {
562
+ const totalSegmentCount = await this.countSegmentsForStream(stream);
563
+ const publishedSegmentCount =
564
+ srow.uploaded_through >= 0n
565
+ ? (((await this.findSegmentForOffset(stream, srow.uploaded_through))?.segment_index ?? -1) + 1)
566
+ : 0;
567
+
568
+ const desiredPlan = buildDesiredSearchCompanionPlan(registry as any);
569
+ const planHasFamilies = Object.values(desiredPlan.families).some(Boolean);
570
+ let visiblePublishedSegmentCount = publishedSegmentCount;
571
+ let visibleThroughPrimaryTimestampMax: string | null = null;
572
+ if (planHasFamilies) {
573
+ const desiredHash = hashSearchCompanionPlan(desiredPlan);
574
+ const companionPlanRow = await this.getSearchCompanionPlan(stream);
575
+ const desiredGeneration =
576
+ companionPlanRow == null
577
+ ? 1
578
+ : companionPlanRow.plan_hash === desiredHash
579
+ ? companionPlanRow.generation
580
+ : companionPlanRow.generation + 1;
581
+ const currentCompanions = (await this.listSearchSegmentCompanions(stream)).filter(
582
+ (row) => row.plan_generation === desiredGeneration
583
+ );
584
+ const currentSegments = new Set<number>();
585
+ for (const row of currentCompanions) {
586
+ const sections = parseCompanionSections(row.sections_json);
587
+ const hasEnabledFamily = Object.entries(desiredPlan.families).some(([family, enabled]) => enabled && sections.has(family));
588
+ if (hasEnabledFamily) currentSegments.add(row.segment_index);
589
+ }
590
+ visiblePublishedSegmentCount = 0;
591
+ while (visiblePublishedSegmentCount < publishedSegmentCount && currentSegments.has(visiblePublishedSegmentCount)) {
592
+ visiblePublishedSegmentCount += 1;
593
+ }
594
+ if (visiblePublishedSegmentCount > 0) {
595
+ const visibleCompanionRow = currentCompanions.find((row) => row.segment_index === visiblePublishedSegmentCount - 1) ?? null;
596
+ visibleThroughPrimaryTimestampMax = this.isoTimestampFromMs(visibleCompanionRow?.primary_timestamp_max_ms ?? null);
597
+ }
598
+ }
599
+
600
+ const hasOutstandingPublishedSegments = publishedSegmentCount < totalSegmentCount;
601
+ const hasOutstandingCompanions = planHasFamilies && visiblePublishedSegmentCount < publishedSegmentCount;
602
+ const canSearchWalTail = await this.shouldSearchWalTail(srow, hasOutstandingPublishedSegments, hasOutstandingCompanions);
603
+ const omitWalTail = srow.pending_rows > 0n && !canSearchWalTail;
604
+
605
+ let visibleThroughSeq = srow.next_offset - 1n;
606
+ if (hasOutstandingPublishedSegments || hasOutstandingCompanions || omitWalTail) {
607
+ if (visiblePublishedSegmentCount > 0) {
608
+ visibleThroughSeq = (await this.getSegmentByIndex(stream, visiblePublishedSegmentCount - 1))?.end_offset ?? -1n;
609
+ } else {
610
+ visibleThroughSeq = -1n;
611
+ }
612
+ }
613
+
614
+ const possibleMissingUploadedSegments = Math.max(0, publishedSegmentCount - visiblePublishedSegmentCount);
615
+ const hasOmittedPublishedSuffix = hasOutstandingPublishedSegments || hasOutstandingCompanions;
616
+ const possibleMissingUploadedRows = hasOmittedPublishedSuffix && srow.uploaded_through > visibleThroughSeq ? Number(srow.uploaded_through - visibleThroughSeq) : 0;
617
+ const possibleMissingSealedRows = hasOmittedPublishedSuffix && srow.sealed_through > srow.uploaded_through ? Number(srow.sealed_through - srow.uploaded_through) : 0;
618
+ const possibleMissingWalRows = omitWalTail ? Number(srow.pending_rows) : 0;
619
+ const possibleMissingEventsUpperBound = possibleMissingUploadedRows + possibleMissingSealedRows + possibleMissingWalRows;
620
+ const streamHeadOffset = encodeOffset(srow.epoch, srow.next_offset - 1n);
621
+ const oldestOmittedAppendAt = await this.computeOldestOmittedAppendAt(
622
+ stream,
623
+ srow,
624
+ visiblePublishedSegmentCount,
625
+ publishedSegmentCount,
626
+ canSearchWalTail
627
+ );
628
+
629
+ return {
630
+ mode: possibleMissingEventsUpperBound === 0 ? "complete" : "published",
631
+ complete: possibleMissingEventsUpperBound === 0,
632
+ canSearchWalTail,
633
+ publishedSegmentCount,
634
+ visiblePublishedSegmentCount,
635
+ streamHeadOffset,
636
+ visibleThroughSeq,
637
+ visibleThroughOffset: encodeOffset(srow.epoch, visibleThroughSeq),
638
+ visibleThroughPrimaryTimestampMax,
639
+ oldestOmittedAppendAt,
640
+ possibleMissingEventsUpperBound,
641
+ possibleMissingUploadedSegments,
642
+ possibleMissingSealedRows,
643
+ possibleMissingWalRows,
644
+ };
645
+ }
646
+
647
+ async seekOffsetByTimestampResult(stream: string, sinceMs: bigint, key: string | null): Promise<Result<string, ReaderError>> {
648
+ const srow = await this.store.getStreamForRead(stream);
649
+ if (!srow || this.store.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
650
+ if (srow.expires_at_ms != null && await this.store.nowMsForRead() > srow.expires_at_ms) {
651
+ return Result.err({ kind: "gone", message: "stream expired" });
652
+ }
653
+ const segmentCapabilityError = this.missingSegmentCapabilityError(srow);
654
+ if (segmentCapabilityError) return Result.err(segmentCapabilityError);
655
+ try {
656
+ const sinceNs = sinceMs * 1_000_000n;
657
+ const keyBytes = key ? utf8Bytes(key) : null;
658
+ const objectStore =
659
+ srow.sealed_through >= 0n
660
+ ? (() => {
661
+ const res = this.requireObjectStore();
662
+ if (Result.isError(res)) return res;
663
+ return Result.ok(res.value);
664
+ })()
665
+ : Result.ok(null);
666
+ if (Result.isError(objectStore)) return Result.err(objectStore.error);
667
+ const candidateInfo = await this.resolveCandidateSegments(stream, keyBytes, null);
668
+ const plannedSealedSegments = await this.planSealedReadSegments(
669
+ stream,
670
+ 0n,
671
+ srow.sealed_through,
672
+ candidateInfo.segments,
673
+ candidateInfo.indexedThrough,
674
+ "asc"
675
+ );
676
+
677
+ for (const seg of plannedSealedSegments?.segments ?? await this.listSegmentsForStream(stream)) {
678
+ const segBytes = await loadSegmentBytes(objectStore.value!, seg, this.diskCache, this.retryOpts());
679
+ let curOffset = seg.start_offset;
680
+ for (const blockRes of iterateBlocksResult(segBytes)) {
681
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
682
+ const { decoded } = blockRes.value;
683
+ if (decoded.lastAppendNs < sinceNs) {
684
+ curOffset += BigInt(decoded.recordCount);
685
+ continue;
686
+ }
687
+ for (const r of decoded.records) {
688
+ if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
689
+ curOffset += 1n;
690
+ continue;
691
+ }
692
+ if (r.appendNs >= sinceNs) {
693
+ const prev = curOffset - 1n;
694
+ return Result.ok(encodeOffset(srow.epoch, prev));
695
+ }
696
+ curOffset += 1n;
697
+ }
698
+ }
699
+ }
700
+
701
+ // Scan WAL tail.
702
+ const start = srow.sealed_through + 1n;
703
+ const end = srow.next_offset - 1n;
704
+ if (start <= end) {
705
+ for await (const rec of this.store.readWalRange(stream, start, end, keyBytes ?? undefined)) {
706
+ const tsNs = rec.tsMs * 1_000_000n;
707
+ if (tsNs >= sinceNs) {
708
+ const off = rec.offset - 1n;
709
+ return Result.ok(encodeOffset(srow.epoch, off));
710
+ }
711
+ }
712
+ }
713
+
714
+ const endOffsetNum = srow.next_offset - 1n;
715
+ return Result.ok(encodeOffset(srow.epoch, endOffsetNum));
716
+ } catch (e: unknown) {
717
+ return Result.err({ kind: "internal", message: errorMessage(e) });
718
+ }
719
+ }
720
+
721
+ async seekOffsetByTimestamp(stream: string, sinceMs: bigint, key: string | null): Promise<string> {
722
+ const res = await this.seekOffsetByTimestampResult(stream, sinceMs, key);
723
+ if (Result.isError(res)) throw dsError(res.error.message);
724
+ return res.value;
725
+ }
726
+
727
+ async readResult(args: {
728
+ stream: string;
729
+ offset: string;
730
+ key: string | null;
731
+ format: ReadFormat;
732
+ filter?: CompiledReadFilter | null;
733
+ }): Promise<Result<ReadBatch, ReaderError>> {
734
+ const { stream, offset, key, format, filter = null } = args;
735
+ const srow = await this.store.getStreamForRead(stream);
736
+ if (!srow || this.store.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
737
+ if (srow.expires_at_ms != null && await this.store.nowMsForRead() > srow.expires_at_ms) {
738
+ return Result.err({ kind: "gone", message: "stream expired" });
739
+ }
740
+ const segmentCapabilityError = this.missingSegmentCapabilityError(srow);
741
+ if (segmentCapabilityError) return Result.err(segmentCapabilityError);
742
+ const epoch = srow.epoch;
743
+
744
+ try {
745
+ const parsed = parseOffsetResult(offset);
746
+ if (Result.isError(parsed)) {
747
+ return Result.err({ kind: "invalid_offset", message: parsed.error.message });
748
+ }
749
+ const startOffsetExclusive = offsetToSeqOrNeg1(parsed.value);
750
+ const desiredOffset = startOffsetExclusive + 1n;
751
+
752
+ const endOffsetNum = srow.next_offset - 1n;
753
+ const endOffset = encodeOffset(srow.epoch, endOffsetNum);
754
+ const objectStore =
755
+ desiredOffset <= srow.sealed_through
756
+ ? (() => {
757
+ const res = this.requireObjectStore();
758
+ if (Result.isError(res)) return res;
759
+ return Result.ok(res.value);
760
+ })()
761
+ : Result.ok(null);
762
+ if (Result.isError(objectStore)) return Result.err(objectStore.error);
763
+
764
+ const results: Array<{ offset: bigint; payload: Uint8Array }> = [];
765
+ let bytesOut = 0;
766
+ let filterScannedBytes = 0;
767
+ let filterScanLimitReached = false;
768
+
769
+ // Nothing to read.
770
+ if (desiredOffset > endOffsetNum) {
771
+ return Result.ok({
772
+ stream,
773
+ format,
774
+ key,
775
+ requestOffset: offset,
776
+ endOffset,
777
+ nextOffset: encodeOffset(srow.epoch, startOffsetExclusive),
778
+ endOffsetSeq: endOffsetNum,
779
+ nextOffsetSeq: startOffsetExclusive,
780
+ records: [],
781
+ ...(filter
782
+ ? {
783
+ filterScannedBytes,
784
+ filterScanLimitBytes: READ_FILTER_SCAN_LIMIT_BYTES,
785
+ filterScanLimitReached,
786
+ }
787
+ : {}),
788
+ });
789
+ }
790
+
791
+ let seq = desiredOffset;
792
+ const keyBytes = key ? utf8Bytes(key) : null;
793
+ const candidateInfo = await this.resolveCandidateSegments(stream, keyBytes, filter);
794
+ const candidateSegments = candidateInfo.segments;
795
+ const indexedThrough = candidateInfo.indexedThrough;
796
+ const columnClauses = filter ? collectPositiveColumnFilterClauses(filter) : [];
797
+ const filterRegistryRes = filter ? await this.registry.getRegistryResult(stream) : Result.ok(null);
798
+ if (Result.isError(filterRegistryRes)) return Result.err({ kind: "internal", message: filterRegistryRes.error.message });
799
+ const filterRegistry = filterRegistryRes.value;
800
+
801
+ const evaluateRecordResult = (
802
+ offset: bigint,
803
+ routingKey: Uint8Array | null | undefined,
804
+ payload: Uint8Array
805
+ ): Result<{ matched: boolean; stop: boolean }, ReaderError> => {
806
+ if (filter) {
807
+ filterScannedBytes += payload.byteLength;
808
+ }
809
+ if (keyBytes && (!routingKey || !bytesEqual(routingKey, keyBytes))) {
810
+ return Result.ok({
811
+ matched: false,
812
+ stop: !!filter && filterScannedBytes >= READ_FILTER_SCAN_LIMIT_BYTES,
813
+ });
814
+ }
815
+ if (!filter) return Result.ok({ matched: true, stop: false });
816
+ const valueRes = decodeJsonPayloadWithRegistryResult(this.registry, filterRegistry!, offset, payload);
817
+ if (Result.isError(valueRes)) {
818
+ return Result.err({ kind: "internal", message: valueRes.error.message });
819
+ }
820
+ const matchesRes = evaluateReadFilterResult(filterRegistry!, offset, filter, valueRes.value);
821
+ if (Result.isError(matchesRes)) return Result.err({ kind: "internal", message: matchesRes.error.message });
822
+ return Result.ok({
823
+ matched: matchesRes.value,
824
+ stop: filterScannedBytes >= READ_FILTER_SCAN_LIMIT_BYTES,
825
+ });
826
+ };
827
+
828
+ const scanSegmentBytes = async (
829
+ segBytes: Uint8Array,
830
+ seg: SegmentRow,
831
+ allowedDocIds: Set<number> | null
832
+ ): Promise<Result<void, ReaderError>> => {
833
+ const footer = parseFooter(segBytes)?.footer;
834
+ if (footer) {
835
+ for (let blockIndex = findFirstRelevantBlockIndex(footer.blocks, seq); blockIndex < footer.blocks.length; blockIndex++) {
836
+ const block = footer.blocks[blockIndex]!;
837
+ const blockStart = block.firstOffset;
838
+ const blockEnd = blockStart + BigInt(block.recordCount) - 1n;
839
+ if (blockEnd < seq) continue;
840
+ if (blockStart > endOffsetNum) break;
841
+
842
+ if (keyBytes) {
843
+ const headerBytes = segBytes.subarray(block.blockOffset, block.blockOffset + DSB3_HEADER_BYTES);
844
+ const headerRes = parseBlockHeaderResult(headerBytes);
845
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
846
+ const bloom = new Bloom256(headerRes.value.bloom);
847
+ if (!bloom.maybeHas(keyBytes)) continue;
848
+ }
849
+
850
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
851
+ const blockBytes = segBytes.subarray(block.blockOffset, block.blockOffset + totalLen);
852
+ const decodedRes = decodeBlockResult(blockBytes);
853
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
854
+ const decoded = decodedRes.value;
855
+ let curOffset = blockStart;
856
+ for (const r of decoded.records) {
857
+ if (curOffset < seq) {
858
+ curOffset += 1n;
859
+ continue;
860
+ }
861
+ if (curOffset > endOffsetNum) break;
862
+ const localDocId = Number(curOffset - seg.start_offset);
863
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
864
+ curOffset += 1n;
865
+ continue;
866
+ }
867
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
868
+ if (Result.isError(matchRes)) return matchRes;
869
+ if (matchRes.value.matched) {
870
+ results.push({ offset: curOffset, payload: r.payload });
871
+ bytesOut += r.payload.byteLength;
872
+ }
873
+ curOffset += 1n;
874
+ if (matchRes.value.stop) {
875
+ filterScanLimitReached = true;
876
+ seq = curOffset;
877
+ return Result.ok(undefined);
878
+ }
879
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
880
+ seq = curOffset;
881
+ return Result.ok(undefined);
882
+ }
883
+ }
884
+ }
885
+ return Result.ok(undefined);
886
+ }
887
+
888
+ let curOffset = seg.start_offset;
889
+ for (const blockRes of iterateBlocksResult(segBytes)) {
890
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
891
+ const { decoded } = blockRes.value;
892
+ if (keyBytes) {
893
+ const bloom = new Bloom256(decoded.bloom);
894
+ if (!bloom.maybeHas(keyBytes)) {
895
+ curOffset += BigInt(decoded.recordCount);
896
+ continue;
897
+ }
898
+ }
899
+ for (const r of decoded.records) {
900
+ if (curOffset < seq) {
901
+ curOffset += 1n;
902
+ continue;
903
+ }
904
+ if (curOffset > endOffsetNum) break;
905
+ const localDocId = Number(curOffset - seg.start_offset);
906
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
907
+ curOffset += 1n;
908
+ continue;
909
+ }
910
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
911
+ if (Result.isError(matchRes)) return matchRes;
912
+ if (matchRes.value.matched) {
913
+ results.push({ offset: curOffset, payload: r.payload });
914
+ bytesOut += r.payload.byteLength;
915
+ }
916
+ curOffset += 1n;
917
+ if (matchRes.value.stop) {
918
+ filterScanLimitReached = true;
919
+ seq = curOffset;
920
+ return Result.ok(undefined);
921
+ }
922
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
923
+ seq = curOffset;
924
+ return Result.ok(undefined);
925
+ }
926
+ }
927
+ }
928
+ return Result.ok(undefined);
929
+ };
930
+
931
+ const scanSegmentSource = async (
932
+ source: SegmentReadSource,
933
+ seg: SegmentRow,
934
+ allowedDocIds: Set<number> | null
935
+ ): Promise<Result<void, ReaderError>> => {
936
+ const footerBlocks = loadSegmentFooterBlocksFromSource(seg, source);
937
+ if (footerBlocks) {
938
+ for (let blockIndex = findFirstRelevantBlockIndex(footerBlocks, seq); blockIndex < footerBlocks.length; blockIndex++) {
939
+ const block = footerBlocks[blockIndex]!;
940
+ const blockStart = block.firstOffset;
941
+ const blockEnd = blockStart + BigInt(block.recordCount) - 1n;
942
+ if (blockEnd < seq) continue;
943
+ if (blockStart > endOffsetNum) break;
944
+
945
+ const headerBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + DSB3_HEADER_BYTES - 1);
946
+ const headerRes = parseBlockHeaderResult(headerBytes);
947
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
948
+ if (keyBytes) {
949
+ const bloom = new Bloom256(headerRes.value.bloom);
950
+ if (!bloom.maybeHas(keyBytes)) continue;
951
+ }
952
+
953
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
954
+ const blockBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1);
955
+ const decodedRes = decodeBlockResult(blockBytes);
956
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
957
+ const decoded = decodedRes.value;
958
+ let curOffset = blockStart;
959
+ for (const r of decoded.records) {
960
+ if (curOffset < seq) {
961
+ curOffset += 1n;
962
+ continue;
963
+ }
964
+ if (curOffset > endOffsetNum) break;
965
+ const localDocId = Number(curOffset - seg.start_offset);
966
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
967
+ curOffset += 1n;
968
+ continue;
969
+ }
970
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
971
+ if (Result.isError(matchRes)) return matchRes;
972
+ if (matchRes.value.matched) {
973
+ results.push({ offset: curOffset, payload: r.payload });
974
+ bytesOut += r.payload.byteLength;
975
+ }
976
+ curOffset += 1n;
977
+ if (matchRes.value.stop) {
978
+ filterScanLimitReached = true;
979
+ seq = curOffset;
980
+ return Result.ok(undefined);
981
+ }
982
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
983
+ seq = curOffset;
984
+ return Result.ok(undefined);
985
+ }
986
+ }
987
+ }
988
+ return Result.ok(undefined);
989
+ }
990
+
991
+ const limit = loadSegmentDataLimitFromSource(seg, source);
992
+ let blockOffset = 0;
993
+ let blockFirstOffset = seg.start_offset;
994
+ while (blockOffset < limit) {
995
+ const headerBytes = readRangeFromSource(source, blockOffset, blockOffset + DSB3_HEADER_BYTES - 1);
996
+ const headerRes = parseBlockHeaderResult(headerBytes);
997
+ if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
998
+ const header = headerRes.value;
999
+ const totalLen = DSB3_HEADER_BYTES + header.compressedLen;
1000
+ const blockStart = blockFirstOffset;
1001
+ const blockEnd = blockStart + BigInt(header.recordCount) - 1n;
1002
+ if (blockEnd < seq) {
1003
+ blockOffset += totalLen;
1004
+ blockFirstOffset = blockEnd + 1n;
1005
+ continue;
1006
+ }
1007
+ if (blockStart > endOffsetNum) break;
1008
+
1009
+ if (keyBytes) {
1010
+ const bloom = new Bloom256(header.bloom);
1011
+ if (!bloom.maybeHas(keyBytes)) {
1012
+ blockOffset += totalLen;
1013
+ blockFirstOffset = blockEnd + 1n;
1014
+ continue;
1015
+ }
1016
+ }
1017
+
1018
+ const blockBytes = readRangeFromSource(source, blockOffset, blockOffset + totalLen - 1);
1019
+ const decodedRes = decodeBlockResult(blockBytes);
1020
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
1021
+ const decoded = decodedRes.value;
1022
+ let curOffset = blockStart;
1023
+ for (const r of decoded.records) {
1024
+ if (curOffset < seq) {
1025
+ curOffset += 1n;
1026
+ continue;
1027
+ }
1028
+ if (curOffset > endOffsetNum) break;
1029
+ const localDocId = Number(curOffset - seg.start_offset);
1030
+ if (allowedDocIds && !allowedDocIds.has(localDocId)) {
1031
+ curOffset += 1n;
1032
+ continue;
1033
+ }
1034
+ const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
1035
+ if (Result.isError(matchRes)) return matchRes;
1036
+ if (matchRes.value.matched) {
1037
+ results.push({ offset: curOffset, payload: r.payload });
1038
+ bytesOut += r.payload.byteLength;
1039
+ }
1040
+ curOffset += 1n;
1041
+ if (matchRes.value.stop) {
1042
+ filterScanLimitReached = true;
1043
+ seq = curOffset;
1044
+ return Result.ok(undefined);
1045
+ }
1046
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
1047
+ seq = curOffset;
1048
+ return Result.ok(undefined);
1049
+ }
1050
+ }
1051
+ blockOffset += totalLen;
1052
+ blockFirstOffset = blockEnd + 1n;
1053
+ }
1054
+ return Result.ok(undefined);
1055
+ };
1056
+
1057
+ const sealedEndSeq = endOffsetNum < srow.sealed_through ? endOffsetNum : srow.sealed_through;
1058
+ const plannedSealedSegments = await this.planSealedReadSegments(
1059
+ stream,
1060
+ seq,
1061
+ sealedEndSeq,
1062
+ candidateSegments,
1063
+ indexedThrough,
1064
+ "asc"
1065
+ );
1066
+
1067
+ // 1) Read from sealed segments.
1068
+ if (plannedSealedSegments) {
1069
+ for (const seg of plannedSealedSegments.segments) {
1070
+ if (seg.end_offset < seq) continue;
1071
+ if (seg.start_offset > sealedEndSeq) break;
1072
+ let allowedDocIds: Set<number> | null = null;
1073
+ if (columnClauses.length > 0) {
1074
+ const docIdsRes = await this.resolveColumnCandidateDocIdsResult(stream, seg.segment_index, columnClauses);
1075
+ if (Result.isError(docIdsRes)) return Result.err({ kind: "internal", message: docIdsRes.error.message });
1076
+ if (docIdsRes.value) {
1077
+ allowedDocIds = docIdsRes.value;
1078
+ if (allowedDocIds.size === 0) {
1079
+ seq = seg.end_offset + 1n;
1080
+ continue;
1081
+ }
1082
+ }
1083
+ }
1084
+ const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
1085
+ if (preferFull) {
1086
+ const segBytes = await loadSegmentBytes(objectStore.value!, seg, this.diskCache, this.retryOpts());
1087
+ const scanRes = await scanSegmentBytes(segBytes, seg, allowedDocIds);
1088
+ if (Result.isError(scanRes)) return scanRes;
1089
+ if (filterScanLimitReached) return Result.ok(finalize());
1090
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
1091
+ } else {
1092
+ const source = await loadSegmentSource(objectStore.value!, seg, this.diskCache, this.retryOpts());
1093
+ const scanRes = await scanSegmentSource(source, seg, allowedDocIds);
1094
+ if (Result.isError(scanRes)) return scanRes;
1095
+ if (filterScanLimitReached) return Result.ok(finalize());
1096
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
1097
+ }
1098
+ seq = seg.end_offset + 1n;
1099
+ }
1100
+ if (seq <= plannedSealedSegments.sealedEndSeq) {
1101
+ seq = plannedSealedSegments.sealedEndSeq + 1n;
1102
+ }
1103
+ } else {
1104
+ while (seq <= endOffsetNum && seq <= srow.sealed_through) {
1105
+ const seg = await this.findSegmentForOffset(stream, seq);
1106
+ if (!seg) {
1107
+ // Corruption in local metadata: sealed_through points past segments table.
1108
+ break;
1109
+ }
1110
+ if (candidateSegments && seg.segment_index < indexedThrough && !candidateSegments.has(seg.segment_index)) {
1111
+ seq = seg.end_offset + 1n;
1112
+ continue;
1113
+ }
1114
+ let allowedDocIds: Set<number> | null = null;
1115
+ if (columnClauses.length > 0) {
1116
+ const docIdsRes = await this.resolveColumnCandidateDocIdsResult(stream, seg.segment_index, columnClauses);
1117
+ if (Result.isError(docIdsRes)) return Result.err({ kind: "internal", message: docIdsRes.error.message });
1118
+ if (docIdsRes.value) {
1119
+ allowedDocIds = docIdsRes.value;
1120
+ if (allowedDocIds.size === 0) {
1121
+ seq = seg.end_offset + 1n;
1122
+ continue;
1123
+ }
1124
+ }
1125
+ }
1126
+ const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
1127
+ if (preferFull) {
1128
+ const segBytes = await loadSegmentBytes(objectStore.value!, seg, this.diskCache, this.retryOpts());
1129
+ const scanRes = await scanSegmentBytes(segBytes, seg, allowedDocIds);
1130
+ if (Result.isError(scanRes)) return scanRes;
1131
+ if (filterScanLimitReached) return Result.ok(finalize());
1132
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
1133
+ } else {
1134
+ const source = await loadSegmentSource(objectStore.value!, seg, this.diskCache, this.retryOpts());
1135
+ const scanRes = await scanSegmentSource(source, seg, allowedDocIds);
1136
+ if (Result.isError(scanRes)) return scanRes;
1137
+ if (filterScanLimitReached) return Result.ok(finalize());
1138
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
1139
+ }
1140
+
1141
+ // Move to next segment.
1142
+ seq = seg.end_offset + 1n;
1143
+ }
1144
+ }
1145
+
1146
+ // 2) Read remaining from WAL tail.
1147
+ if (seq <= endOffsetNum) {
1148
+ let hitLimit = false;
1149
+ for await (const rec of this.store.readWalRange(stream, seq, endOffsetNum, keyBytes ?? undefined)) {
1150
+ const s = rec.offset;
1151
+ const payload: Uint8Array = rec.payload;
1152
+ const matchRes = evaluateRecordResult(s, rec.routingKey, payload);
1153
+ if (Result.isError(matchRes)) return matchRes;
1154
+ if (matchRes.value.matched) {
1155
+ results.push({ offset: s, payload });
1156
+ bytesOut += payload.byteLength;
1157
+ }
1158
+ if (matchRes.value.stop) {
1159
+ filterScanLimitReached = true;
1160
+ hitLimit = true;
1161
+ seq = s + 1n;
1162
+ break;
1163
+ }
1164
+ if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
1165
+ hitLimit = true;
1166
+ // We only emitted payloads up through this offset (key-filtered reads
1167
+ // may skip offsets in SQL). Resume from the next offset.
1168
+ seq = s + 1n;
1169
+ break;
1170
+ }
1171
+ }
1172
+ if (!hitLimit) {
1173
+ // We exhausted the iterator for this [seq, endOffsetNum] range. Even if
1174
+ // it yielded zero records (common for key-filtered reads), we have
1175
+ // scanned through endOffsetNum and should advance the stream cursor to
1176
+ // avoid tight catchup loops.
1177
+ seq = endOffsetNum + 1n;
1178
+ }
1179
+ }
1180
+
1181
+ return Result.ok(finalize());
1182
+
1183
+ function finalize(): ReadBatch {
1184
+ // nextOffset is a stream cursor, not a "last matching record" cursor. For
1185
+ // key-filtered reads, this must still advance past non-matching offsets,
1186
+ // otherwise SSE/long-poll can spin forever when the stream advances but no
1187
+ // matching keys appear.
1188
+ const scannedThrough = seq - 1n;
1189
+ const nextOffset = encodeOffset(epoch, scannedThrough);
1190
+ return {
1191
+ stream,
1192
+ format,
1193
+ key,
1194
+ requestOffset: offset,
1195
+ endOffset,
1196
+ nextOffset,
1197
+ endOffsetSeq: endOffsetNum,
1198
+ nextOffsetSeq: scannedThrough,
1199
+ records: results,
1200
+ ...(filter
1201
+ ? {
1202
+ filterScannedBytes,
1203
+ filterScanLimitBytes: READ_FILTER_SCAN_LIMIT_BYTES,
1204
+ filterScanLimitReached,
1205
+ }
1206
+ : {}),
1207
+ };
1208
+ }
1209
+ } catch (e: unknown) {
1210
+ return Result.err({ kind: "internal", message: errorMessage(e) });
1211
+ }
1212
+ }
1213
+
1214
+ async read(args: {
1215
+ stream: string;
1216
+ offset: string;
1217
+ key: string | null;
1218
+ format: ReadFormat;
1219
+ filter?: CompiledReadFilter | null;
1220
+ }): Promise<ReadBatch> {
1221
+ const res = await this.readResult(args);
1222
+ if (Result.isError(res)) throw dsError(res.error.message);
1223
+ return res.value;
1224
+ }
1225
+
1226
+ async searchResult(args: { stream: string; request: SearchRequest }): Promise<Result<SearchResultBatch, ReaderError>> {
1227
+ const startedAt = Date.now();
1228
+ const { stream, request } = args;
1229
+ const leaveSearchPhase = this.memorySampler?.enter("search", {
1230
+ stream,
1231
+ has_query: request.q != null,
1232
+ over_limit: this.memory?.isOverLimit() === true,
1233
+ });
1234
+ const srow = await this.store.getStreamForRead(stream);
1235
+ try {
1236
+ if (!srow || this.store.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
1237
+ if (srow.expires_at_ms != null && await this.store.nowMsForRead() > srow.expires_at_ms) {
1238
+ return Result.err({ kind: "gone", message: "stream expired" });
1239
+ }
1240
+ const segmentCapabilityError = this.missingSegmentCapabilityError(srow);
1241
+ if (segmentCapabilityError) return Result.err(segmentCapabilityError);
1242
+
1243
+ const regRes = await this.registry.getRegistryResult(stream);
1244
+ if (Result.isError(regRes)) return Result.err({ kind: "internal", message: regRes.error.message });
1245
+ const registry = regRes.value;
1246
+ if (!registry.search) return Result.err({ kind: "internal", message: "search is not configured for this stream" });
1247
+
1248
+ const snapshotEndSeq = srow.next_offset - 1n;
1249
+ const snapshotEndOffset = encodeOffset(srow.epoch, snapshotEndSeq);
1250
+ const coverageState = await this.computePublishedCoverageState(stream, srow, registry);
1251
+ const visibleSnapshotEndSeq = coverageState.canSearchWalTail
1252
+ ? snapshotEndSeq
1253
+ : (coverageState.visibleThroughSeq < snapshotEndSeq ? coverageState.visibleThroughSeq : snapshotEndSeq);
1254
+ const visibleSealedThrough = coverageState.canSearchWalTail
1255
+ ? srow.sealed_through
1256
+ : (coverageState.visibleThroughSeq < srow.sealed_through ? coverageState.visibleThroughSeq : srow.sealed_through);
1257
+ const objectStore =
1258
+ visibleSealedThrough >= 0n
1259
+ ? (() => {
1260
+ const res = this.requireObjectStore();
1261
+ if (Result.isError(res)) return res;
1262
+ return Result.ok(res.value);
1263
+ })()
1264
+ : Result.ok(null);
1265
+ if (Result.isError(objectStore)) return Result.err(objectStore.error);
1266
+ const deadline = request.timeoutMs == null ? null : Date.now() + request.timeoutMs;
1267
+ const leadingSort = request.sort[0] ?? null;
1268
+ const offsetSearchAfter =
1269
+ request.searchAfter && leadingSort?.kind === "offset" ? normalizeSearchAfterValue(leadingSort, request.searchAfter[0]) : null;
1270
+ const cursorFieldBound = resolveSearchCursorFieldBound(request);
1271
+ const primaryTimestampTopKSort = resolvePrimaryTimestampTopKSort(registry, request);
1272
+ const primaryTimestampRowsBySegment =
1273
+ primaryTimestampTopKSort && request.size > 0 ? await this.currentSearchCompanionRowsBySegment(stream, registry) : null;
1274
+
1275
+ const hits: SearchHitInternal[] = [];
1276
+ let timedOut = false;
1277
+ const markTimedOutIfNeeded = (): boolean => {
1278
+ if (deadline == null || Date.now() < deadline) return false;
1279
+ timedOut = true;
1280
+ return true;
1281
+ };
1282
+ let indexedSegments = 0;
1283
+ let indexedSegmentTimeMs = 0;
1284
+ let ftsSectionGetMs = 0;
1285
+ let ftsDecodeMs = 0;
1286
+ let ftsClauseEstimateMs = 0;
1287
+ let scannedSegments = 0;
1288
+ let scannedSegmentTimeMs = 0;
1289
+ let scannedTailDocs = 0;
1290
+ let scannedTailTimeMs = 0;
1291
+ let candidateDocIds = 0;
1292
+ let decodedRecords = 0;
1293
+ let jsonParseTimeMs = 0;
1294
+ let segmentPayloadBytesFetched = 0;
1295
+ let sortTimeMs = 0;
1296
+ let peakHitsHeld = 0;
1297
+ const indexFamiliesUsed = new Set<string>();
1298
+ const exactClauses = collectPositiveSearchExactClauses(request.q);
1299
+ const columnClauses = collectPositiveSearchColumnClauses(request.q);
1300
+ const ftsClauses = collectPositiveSearchFtsClauses(request.q);
1301
+ let exactCandidateInfo: SegmentCandidateInfo = { segments: null, indexedThrough: 0 };
1302
+ let exactCandidateTimeMs = 0;
1303
+ if (!markTimedOutIfNeeded()) {
1304
+ const exactCandidateStartedAt = Date.now();
1305
+ exactCandidateInfo = await this.resolveSearchExactCandidateSegments(stream, request.q);
1306
+ exactCandidateTimeMs = Date.now() - exactCandidateStartedAt;
1307
+ markTimedOutIfNeeded();
1308
+ }
1309
+
1310
+ const collectSearchMatchResult = (
1311
+ offsetSeq: bigint,
1312
+ payload: Uint8Array
1313
+ ): Result<void, ReaderError> => {
1314
+ const parseStartedAt = Date.now();
1315
+ const parsedRes = decodeJsonPayloadWithRegistryResult(this.registry, registry, offsetSeq, payload);
1316
+ jsonParseTimeMs += Date.now() - parseStartedAt;
1317
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
1318
+ const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, parsedRes.value);
1319
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
1320
+ if (!evalRes.value.matched) return Result.ok(undefined);
1321
+ const fieldsRes = extractSearchHitFieldsResult(registry, offsetSeq, parsedRes.value);
1322
+ if (Result.isError(fieldsRes)) return Result.err({ kind: "internal", message: fieldsRes.error.message });
1323
+ const sortInternal = buildSearchSortInternalValues(request.sort, fieldsRes.value, evalRes.value, offsetSeq);
1324
+ if (request.searchAfter && compareSearchAfterValues(sortInternal, request.sort, request.searchAfter) <= 0) {
1325
+ return Result.ok(undefined);
1326
+ }
1327
+ const hit: SearchHitInternal = {
1328
+ offsetSeq,
1329
+ offset: encodeOffset(srow.epoch, offsetSeq),
1330
+ score: evalRes.value.score,
1331
+ sortInternal,
1332
+ sortResponse: buildSearchSortResponseValues(request.sort, sortInternal, encodeOffset(srow.epoch, offsetSeq)),
1333
+ fields: fieldsRes.value,
1334
+ source: parsedRes.value,
1335
+ };
1336
+ hits.push(hit);
1337
+ if (primaryTimestampTopKSort && request.size > 0 && hits.length > request.size) {
1338
+ hits.splice(worstSearchHitIndex(hits, request.sort), 1);
1339
+ }
1340
+ if (hits.length > peakHitsHeld) peakHitsHeld = hits.length;
1341
+ return Result.ok(undefined);
1342
+ };
1343
+
1344
+ const primaryTimestampTopKCutoff = (): bigint | null => {
1345
+ if (!primaryTimestampTopKSort || hits.length < request.size) return null;
1346
+ const worstHit = hits[worstSearchHitIndex(hits, request.sort)];
1347
+ const value = worstHit?.sortInternal[0];
1348
+ return typeof value === "bigint" ? value : null;
1349
+ };
1350
+
1351
+ const primaryTimestampSegmentMayBeatTopK = (seg: SegmentRow): boolean => {
1352
+ if (!primaryTimestampTopKSort || !primaryTimestampRowsBySegment) return true;
1353
+ const cutoff = primaryTimestampTopKCutoff();
1354
+ if (cutoff == null) return true;
1355
+ const row = primaryTimestampRowsBySegment.get(seg.segment_index);
1356
+ if (row?.primary_timestamp_min_ms == null || row.primary_timestamp_max_ms == null) return true;
1357
+ if (primaryTimestampTopKSort.direction === "desc") return row.primary_timestamp_max_ms >= cutoff;
1358
+ return row.primary_timestamp_min_ms <= cutoff;
1359
+ };
1360
+
1361
+ const scanSegmentForSearchResult = async (
1362
+ seg: SegmentRow,
1363
+ allowedDocIds: Set<number> | null,
1364
+ rangeStartSeq: bigint,
1365
+ rangeEndSeq: bigint
1366
+ ): Promise<Result<void, ReaderError>> => {
1367
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1368
+ const segBytes = await loadSegmentBytes(objectStore.value!, seg, this.diskCache, this.retryOpts());
1369
+ segmentPayloadBytesFetched += seg.size_bytes;
1370
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1371
+ let curOffset = seg.start_offset;
1372
+ for (const blockRes of iterateBlocksResult(segBytes)) {
1373
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
1374
+ decodedRecords += blockRes.value.decoded.recordCount;
1375
+ for (const record of blockRes.value.decoded.records) {
1376
+ if (curOffset > rangeEndSeq) return Result.ok(undefined);
1377
+ if (curOffset < rangeStartSeq) {
1378
+ curOffset += 1n;
1379
+ continue;
1380
+ }
1381
+ const localDocId = Number(curOffset - seg.start_offset);
1382
+ if (!allowedDocIds || allowedDocIds.has(localDocId)) {
1383
+ const matchRes = collectSearchMatchResult(curOffset, record.payload);
1384
+ if (Result.isError(matchRes)) return matchRes;
1385
+ }
1386
+ curOffset += 1n;
1387
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1388
+ }
1389
+ }
1390
+ return Result.ok(undefined);
1391
+ };
1392
+
1393
+ const scanSegmentWithFamiliesResult = async (
1394
+ seg: SegmentRow,
1395
+ rangeStartSeq: bigint,
1396
+ rangeEndSeq: bigint
1397
+ ): Promise<Result<void, ReaderError>> => {
1398
+ const segmentStartedAt = Date.now();
1399
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1400
+ if (
1401
+ exactCandidateInfo.segments &&
1402
+ seg.segment_index < exactCandidateInfo.indexedThrough &&
1403
+ !exactCandidateInfo.segments.has(seg.segment_index)
1404
+ ) {
1405
+ return Result.ok(undefined);
1406
+ }
1407
+ if (cursorFieldBound) {
1408
+ const overlapsCursor = await this.segmentMayOverlapSearchCursor(stream, seg.segment_index, cursorFieldBound);
1409
+ if (!overlapsCursor) {
1410
+ indexFamiliesUsed.add("col");
1411
+ indexedSegments += 1;
1412
+ indexedSegmentTimeMs += Date.now() - segmentStartedAt;
1413
+ return Result.ok(undefined);
1414
+ }
1415
+ }
1416
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1417
+
1418
+ const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult(
1419
+ stream,
1420
+ seg.segment_index,
1421
+ exactClauses,
1422
+ columnClauses,
1423
+ ftsClauses,
1424
+ {
1425
+ addFtsSectionGetMs: (deltaMs) => {
1426
+ ftsSectionGetMs += deltaMs;
1427
+ },
1428
+ addFtsDecodeMs: (deltaMs) => {
1429
+ ftsDecodeMs += deltaMs;
1430
+ },
1431
+ addFtsClauseEstimateMs: (deltaMs) => {
1432
+ ftsClauseEstimateMs += deltaMs;
1433
+ },
1434
+ }
1435
+ );
1436
+ if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message });
1437
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
1438
+ const familyCandidates = familyCandidatesRes.value;
1439
+ if (familyCandidates.docIds) candidateDocIds += familyCandidates.docIds.size;
1440
+ if (familyCandidates.docIds && familyCandidates.docIds.size === 0) {
1441
+ indexedSegments += familyCandidates.usedFamilies.size > 0 ? 1 : 0;
1442
+ for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family);
1443
+ if (familyCandidates.usedFamilies.size > 0) indexedSegmentTimeMs += Date.now() - segmentStartedAt;
1444
+ return Result.ok(undefined);
1445
+ }
1446
+ const usedIndexedFamilies = familyCandidates.usedFamilies.size > 0;
1447
+ if (familyCandidates.usedFamilies.size > 0) {
1448
+ indexedSegments += 1;
1449
+ for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family);
1450
+ } else {
1451
+ scannedSegments += 1;
1452
+ }
1453
+
1454
+ const scanRes = await scanSegmentForSearchResult(seg, familyCandidates.docIds, rangeStartSeq, rangeEndSeq);
1455
+ if (Result.isError(scanRes)) return scanRes;
1456
+ if (usedIndexedFamilies) indexedSegmentTimeMs += Date.now() - segmentStartedAt;
1457
+ else scannedSegmentTimeMs += Date.now() - segmentStartedAt;
1458
+ return Result.ok(undefined);
1459
+ };
1460
+
1461
+ const stopIfPageComplete = (): boolean => hits.length >= request.size;
1462
+ const scanWalTailResult = async (
1463
+ startSeq: bigint,
1464
+ endSeq: bigint,
1465
+ direction: "asc" | "desc",
1466
+ stopOnPageComplete: boolean
1467
+ ): Promise<Result<void, ReaderError>> => {
1468
+ const tailStartedAt = Date.now();
1469
+ const hotOffsetsRes = await this.hotWalExactOffsetsResult(stream, startSeq, endSeq, exactClauses, registry);
1470
+ if (Result.isError(hotOffsetsRes)) return hotOffsetsRes;
1471
+ const hotOffsets = hotOffsetsRes.value;
1472
+ if (hotOffsets) {
1473
+ candidateDocIds += hotOffsets.length;
1474
+ const orderedOffsets = direction === "desc" ? [...hotOffsets].reverse() : hotOffsets;
1475
+ for (const offsetSeq of orderedOffsets) {
1476
+ const record = await this.walRecordAt(stream, offsetSeq);
1477
+ if (!record) continue;
1478
+ scannedTailDocs += 1;
1479
+ const matchRes = collectSearchMatchResult(record.offset, record.payload);
1480
+ if (Result.isError(matchRes)) return matchRes;
1481
+ if (markTimedOutIfNeeded()) break;
1482
+ if (stopOnPageComplete && stopIfPageComplete()) break;
1483
+ }
1484
+ scannedTailTimeMs += Date.now() - tailStartedAt;
1485
+ return Result.ok(undefined);
1486
+ }
1487
+
1488
+ const rows =
1489
+ direction === "desc"
1490
+ ? this.store.readWalRangeDesc(stream, startSeq, endSeq)
1491
+ : this.store.readWalRange(stream, startSeq, endSeq);
1492
+ for await (const record of rows) {
1493
+ scannedTailDocs += 1;
1494
+ const matchRes = collectSearchMatchResult(record.offset, record.payload);
1495
+ if (Result.isError(matchRes)) return matchRes;
1496
+ if (markTimedOutIfNeeded()) break;
1497
+ if (stopOnPageComplete && stopIfPageComplete()) break;
1498
+ }
1499
+ scannedTailTimeMs += Date.now() - tailStartedAt;
1500
+ return Result.ok(undefined);
1501
+ };
1502
+
1503
+ if (leadingSort?.kind === "offset") {
1504
+ const descending = leadingSort.direction === "desc";
1505
+ const rangeStartSeq = descending ? 0n : typeof offsetSearchAfter === "bigint" ? offsetSearchAfter + 1n : 0n;
1506
+ const requestedRangeEndSeq = descending ? (typeof offsetSearchAfter === "bigint" ? offsetSearchAfter - 1n : snapshotEndSeq) : snapshotEndSeq;
1507
+ const rangeEndSeq = requestedRangeEndSeq < visibleSnapshotEndSeq ? requestedRangeEndSeq : visibleSnapshotEndSeq;
1508
+
1509
+ if (rangeStartSeq <= rangeEndSeq) {
1510
+ if (descending) {
1511
+ const tailStart = srow.sealed_through + 1n;
1512
+ if (coverageState.canSearchWalTail && tailStart <= rangeEndSeq) {
1513
+ const walStart = rangeStartSeq > tailStart ? rangeStartSeq : tailStart;
1514
+ const walEnd = rangeEndSeq;
1515
+ if (walStart <= walEnd) {
1516
+ const tailRes = await scanWalTailResult(walStart, walEnd, "desc", true);
1517
+ if (Result.isError(tailRes)) return tailRes;
1518
+ }
1519
+ }
1520
+ if (!timedOut && !stopIfPageComplete()) {
1521
+ const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough;
1522
+ if (sealedEnd >= rangeStartSeq) {
1523
+ const plannedSealedSegments = await this.planSealedReadSegments(
1524
+ stream,
1525
+ rangeStartSeq,
1526
+ sealedEnd,
1527
+ exactCandidateInfo.segments,
1528
+ exactCandidateInfo.indexedThrough,
1529
+ "desc"
1530
+ );
1531
+ if (plannedSealedSegments) {
1532
+ for (const seg of plannedSealedSegments.segments) {
1533
+ const scanRes = await this.scanSegmentReverseForSearchResult(
1534
+ stream,
1535
+ seg,
1536
+ exactCandidateInfo,
1537
+ cursorFieldBound,
1538
+ exactClauses,
1539
+ columnClauses,
1540
+ ftsClauses,
1541
+ rangeStartSeq,
1542
+ sealedEnd,
1543
+ {
1544
+ indexFamiliesUsed,
1545
+ collectSearchMatchResult,
1546
+ deadline,
1547
+ isTimedOut: () => timedOut,
1548
+ setTimedOut: (next) => {
1549
+ timedOut = next;
1550
+ },
1551
+ stopIfPageComplete,
1552
+ addIndexedSegment: () => {
1553
+ indexedSegments += 1;
1554
+ },
1555
+ addScannedSegment: () => {
1556
+ scannedSegments += 1;
1557
+ },
1558
+ addIndexedSegmentTimeMs: (deltaMs) => {
1559
+ indexedSegmentTimeMs += deltaMs;
1560
+ },
1561
+ addFtsSectionGetMs: (deltaMs) => {
1562
+ ftsSectionGetMs += deltaMs;
1563
+ },
1564
+ addFtsDecodeMs: (deltaMs) => {
1565
+ ftsDecodeMs += deltaMs;
1566
+ },
1567
+ addFtsClauseEstimateMs: (deltaMs) => {
1568
+ ftsClauseEstimateMs += deltaMs;
1569
+ },
1570
+ addScannedSegmentTimeMs: (deltaMs) => {
1571
+ scannedSegmentTimeMs += deltaMs;
1572
+ },
1573
+ addCandidateDocIds: (count) => {
1574
+ candidateDocIds += count;
1575
+ },
1576
+ addDecodedRecords: (count) => {
1577
+ decodedRecords += count;
1578
+ },
1579
+ addSegmentPayloadBytesFetched: (count) => {
1580
+ segmentPayloadBytesFetched += count;
1581
+ },
1582
+ }
1583
+ );
1584
+ if (Result.isError(scanRes)) return scanRes;
1585
+ if (timedOut || stopIfPageComplete()) break;
1586
+ }
1587
+ } else {
1588
+ const startSeg = await this.findSegmentForOffset(stream, sealedEnd);
1589
+ let segmentIndex = startSeg?.segment_index ?? await this.countSegmentsForStream(stream) - 1;
1590
+ while (segmentIndex >= 0) {
1591
+ const seg = await this.getSegmentByIndex(stream, segmentIndex);
1592
+ if (!seg) {
1593
+ segmentIndex -= 1;
1594
+ continue;
1595
+ }
1596
+ if (seg.end_offset < rangeStartSeq) break;
1597
+ if (seg.start_offset > sealedEnd) {
1598
+ segmentIndex -= 1;
1599
+ continue;
1600
+ }
1601
+ const scanRes = await this.scanSegmentReverseForSearchResult(
1602
+ stream,
1603
+ seg,
1604
+ exactCandidateInfo,
1605
+ cursorFieldBound,
1606
+ exactClauses,
1607
+ columnClauses,
1608
+ ftsClauses,
1609
+ rangeStartSeq,
1610
+ sealedEnd,
1611
+ {
1612
+ indexFamiliesUsed,
1613
+ collectSearchMatchResult,
1614
+ deadline,
1615
+ isTimedOut: () => timedOut,
1616
+ setTimedOut: (next) => {
1617
+ timedOut = next;
1618
+ },
1619
+ stopIfPageComplete,
1620
+ addIndexedSegment: () => {
1621
+ indexedSegments += 1;
1622
+ },
1623
+ addScannedSegment: () => {
1624
+ scannedSegments += 1;
1625
+ },
1626
+ addIndexedSegmentTimeMs: (deltaMs) => {
1627
+ indexedSegmentTimeMs += deltaMs;
1628
+ },
1629
+ addFtsSectionGetMs: (deltaMs) => {
1630
+ ftsSectionGetMs += deltaMs;
1631
+ },
1632
+ addFtsDecodeMs: (deltaMs) => {
1633
+ ftsDecodeMs += deltaMs;
1634
+ },
1635
+ addFtsClauseEstimateMs: (deltaMs) => {
1636
+ ftsClauseEstimateMs += deltaMs;
1637
+ },
1638
+ addScannedSegmentTimeMs: (deltaMs) => {
1639
+ scannedSegmentTimeMs += deltaMs;
1640
+ },
1641
+ addCandidateDocIds: (count) => {
1642
+ candidateDocIds += count;
1643
+ },
1644
+ addDecodedRecords: (count) => {
1645
+ decodedRecords += count;
1646
+ },
1647
+ addSegmentPayloadBytesFetched: (count) => {
1648
+ segmentPayloadBytesFetched += count;
1649
+ },
1650
+ }
1651
+ );
1652
+ if (Result.isError(scanRes)) return scanRes;
1653
+ if (timedOut || stopIfPageComplete()) break;
1654
+ segmentIndex -= 1;
1655
+ }
1656
+ }
1657
+ }
1658
+ }
1659
+ } else {
1660
+ let seq = rangeStartSeq;
1661
+ const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough;
1662
+ const plannedSealedSegments = await this.planSealedReadSegments(
1663
+ stream,
1664
+ rangeStartSeq,
1665
+ sealedEnd,
1666
+ exactCandidateInfo.segments,
1667
+ exactCandidateInfo.indexedThrough,
1668
+ "asc"
1669
+ );
1670
+ if (plannedSealedSegments) {
1671
+ for (const seg of plannedSealedSegments.segments) {
1672
+ const scanRes = await scanSegmentWithFamiliesResult(seg, rangeStartSeq, rangeEndSeq);
1673
+ if (Result.isError(scanRes)) return scanRes;
1674
+ seq = seg.end_offset + 1n;
1675
+ if (timedOut || stopIfPageComplete()) break;
1676
+ }
1677
+ if (seq <= plannedSealedSegments.sealedEndSeq) seq = plannedSealedSegments.sealedEndSeq + 1n;
1678
+ } else {
1679
+ while (seq <= rangeEndSeq && seq <= visibleSealedThrough) {
1680
+ const seg = await this.findSegmentForOffset(stream, seq);
1681
+ if (!seg) break;
1682
+ const scanRes = await scanSegmentWithFamiliesResult(seg, rangeStartSeq, rangeEndSeq);
1683
+ if (Result.isError(scanRes)) return scanRes;
1684
+ seq = seg.end_offset + 1n;
1685
+ if (timedOut || stopIfPageComplete()) break;
1686
+ }
1687
+ }
1688
+ if (!timedOut && !stopIfPageComplete() && coverageState.canSearchWalTail && seq <= rangeEndSeq) {
1689
+ const tailRes = await scanWalTailResult(seq, rangeEndSeq, "asc", true);
1690
+ if (Result.isError(tailRes)) return tailRes;
1691
+ }
1692
+ }
1693
+ }
1694
+
1695
+ const pageHits = hits.slice(0, request.size);
1696
+ const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null;
1697
+ const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null;
1698
+ return Result.ok({
1699
+ stream,
1700
+ snapshotEndOffset,
1701
+ tookMs: Date.now() - startedAt,
1702
+ timedOut,
1703
+ timeoutMs: request.timeoutMs,
1704
+ coverage: {
1705
+ mode: coverageState.mode,
1706
+ complete: coverageState.complete && !timedOut,
1707
+ streamHeadOffset: coverageState.streamHeadOffset,
1708
+ visibleThroughOffset: coverageState.visibleThroughOffset,
1709
+ visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
1710
+ oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
1711
+ possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
1712
+ possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
1713
+ possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
1714
+ possibleMissingWalRows: coverageState.possibleMissingWalRows,
1715
+ indexedSegments,
1716
+ indexedSegmentTimeMs,
1717
+ ftsSectionGetMs,
1718
+ ftsDecodeMs,
1719
+ ftsClauseEstimateMs,
1720
+ scannedSegments,
1721
+ scannedSegmentTimeMs,
1722
+ scannedTailDocs,
1723
+ scannedTailTimeMs,
1724
+ exactCandidateTimeMs,
1725
+ candidateDocIds,
1726
+ decodedRecords,
1727
+ jsonParseTimeMs,
1728
+ segmentPayloadBytesFetched,
1729
+ sortTimeMs,
1730
+ peakHitsHeld,
1731
+ indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
1732
+ },
1733
+ total: {
1734
+ value: pageHits.length,
1735
+ relation: exactTotalKnown ? "eq" : "gte",
1736
+ },
1737
+ hits: pageHits.map((hit) => ({
1738
+ offset: hit.offset,
1739
+ score: hit.score,
1740
+ sort: hit.sortResponse,
1741
+ fields: hit.fields,
1742
+ source: hit.source,
1743
+ })),
1744
+ nextSearchAfter,
1745
+ });
1746
+ }
1747
+
1748
+ let seq = 0n;
1749
+ const sealedEnd = visibleSnapshotEndSeq < visibleSealedThrough ? visibleSnapshotEndSeq : visibleSealedThrough;
1750
+ const plannedSealedSegments = await this.planSealedReadSegments(
1751
+ stream,
1752
+ 0n,
1753
+ sealedEnd,
1754
+ exactCandidateInfo.segments,
1755
+ exactCandidateInfo.indexedThrough,
1756
+ "asc"
1757
+ );
1758
+ const allSealedSegments =
1759
+ primaryTimestampTopKSort && !plannedSealedSegments ? await this.planAllSealedReadSegments(stream, 0n, sealedEnd, "asc") : null;
1760
+ const sealedSegmentPlan = plannedSealedSegments ?? allSealedSegments;
1761
+ if (sealedSegmentPlan) {
1762
+ const sealedSegments =
1763
+ primaryTimestampTopKSort && primaryTimestampRowsBySegment
1764
+ ? orderSegmentsByPrimaryTimestampBounds(sealedSegmentPlan.segments, primaryTimestampRowsBySegment, primaryTimestampTopKSort.direction)
1765
+ : sealedSegmentPlan.segments;
1766
+ for (const seg of sealedSegments) {
1767
+ if (!primaryTimestampSegmentMayBeatTopK(seg)) break;
1768
+ const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq);
1769
+ if (Result.isError(scanRes)) return scanRes;
1770
+ if (seg.end_offset >= seq) seq = seg.end_offset + 1n;
1771
+ if (timedOut) break;
1772
+ }
1773
+ if (seq <= sealedSegmentPlan.sealedEndSeq) seq = sealedSegmentPlan.sealedEndSeq + 1n;
1774
+ } else {
1775
+ while (seq <= visibleSnapshotEndSeq && seq <= visibleSealedThrough) {
1776
+ const seg = await this.findSegmentForOffset(stream, seq);
1777
+ if (!seg) break;
1778
+ if (!primaryTimestampSegmentMayBeatTopK(seg)) break;
1779
+ const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq);
1780
+ if (Result.isError(scanRes)) return scanRes;
1781
+ seq = seg.end_offset + 1n;
1782
+ if (timedOut) break;
1783
+ }
1784
+ }
1785
+
1786
+ if (!timedOut && coverageState.canSearchWalTail && seq <= snapshotEndSeq) {
1787
+ const tailRes = await scanWalTailResult(seq, snapshotEndSeq, "asc", false);
1788
+ if (Result.isError(tailRes)) return tailRes;
1789
+ }
1790
+
1791
+ const sortStartedAt = Date.now();
1792
+ hits.sort((left, right) => compareSearchHits(left, right, request.sort));
1793
+ sortTimeMs += Date.now() - sortStartedAt;
1794
+ const pageHits = hits.slice(0, request.size);
1795
+ const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null;
1796
+ const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null;
1797
+
1798
+ return Result.ok({
1799
+ stream,
1800
+ snapshotEndOffset,
1801
+ tookMs: Date.now() - startedAt,
1802
+ timedOut,
1803
+ timeoutMs: request.timeoutMs,
1804
+ coverage: {
1805
+ mode: coverageState.mode,
1806
+ complete: coverageState.complete && !timedOut,
1807
+ streamHeadOffset: coverageState.streamHeadOffset,
1808
+ visibleThroughOffset: coverageState.visibleThroughOffset,
1809
+ visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
1810
+ oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
1811
+ possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
1812
+ possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
1813
+ possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
1814
+ possibleMissingWalRows: coverageState.possibleMissingWalRows,
1815
+ indexedSegments,
1816
+ indexedSegmentTimeMs,
1817
+ ftsSectionGetMs,
1818
+ ftsDecodeMs,
1819
+ ftsClauseEstimateMs,
1820
+ scannedSegments,
1821
+ scannedSegmentTimeMs,
1822
+ scannedTailDocs,
1823
+ scannedTailTimeMs,
1824
+ exactCandidateTimeMs,
1825
+ candidateDocIds,
1826
+ decodedRecords,
1827
+ jsonParseTimeMs,
1828
+ segmentPayloadBytesFetched,
1829
+ sortTimeMs,
1830
+ peakHitsHeld,
1831
+ indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
1832
+ },
1833
+ total: {
1834
+ value: pageHits.length,
1835
+ relation: exactTotalKnown ? "eq" : "gte",
1836
+ },
1837
+ hits: pageHits.map((hit) => ({
1838
+ offset: hit.offset,
1839
+ score: hit.score,
1840
+ sort: hit.sortResponse,
1841
+ fields: hit.fields,
1842
+ source: hit.source,
1843
+ })),
1844
+ nextSearchAfter,
1845
+ });
1846
+ } catch (e: unknown) {
1847
+ return Result.err({ kind: "internal", message: errorMessage(e) });
1848
+ } finally {
1849
+ leaveSearchPhase?.();
1850
+ }
1851
+ }
1852
+
1853
+ async search(args: { stream: string; request: SearchRequest }): Promise<SearchResultBatch> {
1854
+ const res = await this.searchResult(args);
1855
+ if (Result.isError(res)) throw dsError(res.error.message);
1856
+ return res.value;
1857
+ }
1858
+
1859
+ async aggregateResult(args: { stream: string; request: AggregateRequest }): Promise<Result<AggregateResultBatch, ReaderError>> {
1860
+ const { stream, request } = args;
1861
+ const leaveAggregatePhase = this.memorySampler?.enter("aggregate", {
1862
+ stream,
1863
+ rollup: request.rollup,
1864
+ over_limit: this.memory?.isOverLimit() === true,
1865
+ });
1866
+ const srow = await this.store.getStreamForRead(stream);
1867
+ try {
1868
+ if (!srow || this.store.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
1869
+ if (srow.expires_at_ms != null && await this.store.nowMsForRead() > srow.expires_at_ms) {
1870
+ return Result.err({ kind: "gone", message: "stream expired" });
1871
+ }
1872
+ const segmentCapabilityError = this.missingSegmentCapabilityError(srow);
1873
+ if (segmentCapabilityError) return Result.err(segmentCapabilityError);
1874
+
1875
+ const regRes = await this.registry.getRegistryResult(stream);
1876
+ if (Result.isError(regRes)) return Result.err({ kind: "internal", message: regRes.error.message });
1877
+ const registry = regRes.value;
1878
+ const rollup = registry.search?.rollups?.[request.rollup];
1879
+ if (!registry.search || !rollup) {
1880
+ return Result.err({ kind: "internal", message: "rollup is not configured for this stream" });
1881
+ }
1882
+
1883
+ const coverageState = await this.computePublishedCoverageState(stream, srow, registry);
1884
+ const objectStore =
1885
+ srow.sealed_through >= 0n
1886
+ ? (() => {
1887
+ const res = this.requireObjectStore();
1888
+ if (Result.isError(res)) return res;
1889
+ return Result.ok(res.value);
1890
+ })()
1891
+ : Result.ok(null);
1892
+ if (Result.isError(objectStore)) return Result.err(objectStore.error);
1893
+ const intervalMs = request.intervalMs;
1894
+ const intervalBig = BigInt(intervalMs);
1895
+ const fromMs = Number(request.fromMs);
1896
+ const toMs = Number(request.toMs);
1897
+ const fullStartMs = Number(((request.fromMs + intervalBig - 1n) / intervalBig) * intervalBig);
1898
+ const fullEndMs = Number((request.toMs / intervalBig) * intervalBig);
1899
+ const hasFullWindows = fullEndMs > fullStartMs;
1900
+ const dimensions = new Set(rollup.dimensions ?? []);
1901
+ const eligibility = extractRollupEligibility(request.q, dimensions);
1902
+ const selectedMeasures = new Set(request.measures ?? Object.keys(rollup.measures));
1903
+ const timestampField = rollup.timestampField ?? registry.search.primaryTimestampField;
1904
+ const primaryTimestampField = registry.search.primaryTimestampField;
1905
+ const usesPrimaryTimestampBounds = timestampField === primaryTimestampField;
1906
+
1907
+ const buckets = new Map<number, Map<string, AggregateGroupInternal>>();
1908
+ const indexedSegmentSet = new Set<number>();
1909
+ const scannedSegmentSet = new Set<number>();
1910
+ let scannedTailDocs = 0;
1911
+ const indexFamiliesUsed = new Set<string>();
1912
+ const metricsProfile = registry.search.profile === "metrics";
1913
+ let usedRollups = false;
1914
+
1915
+ const mergeBucketMeasures = (bucketStartMs: number, dimensionsKey: Record<string, string | null>, measures: Record<string, AggMeasureState>): void => {
1916
+ let groups = buckets.get(bucketStartMs);
1917
+ if (!groups) {
1918
+ groups = new Map();
1919
+ buckets.set(bucketStartMs, groups);
1920
+ }
1921
+ const projectedKey: Record<string, string | null> = {};
1922
+ for (const field of request.groupBy) projectedKey[field] = dimensionsKey[field] ?? null;
1923
+ const groupKey = JSON.stringify(projectedKey);
1924
+ let group = groups.get(groupKey);
1925
+ if (!group) {
1926
+ group = { key: projectedKey, measures: {} };
1927
+ groups.set(groupKey, group);
1928
+ }
1929
+ for (const [measureName, state] of Object.entries(measures)) {
1930
+ if (!selectedMeasures.has(measureName)) continue;
1931
+ const existing = group.measures[measureName];
1932
+ if (!existing) {
1933
+ group.measures[measureName] = cloneAggMeasureState(state);
1934
+ continue;
1935
+ }
1936
+ group.measures[measureName] = mergeAggMeasureState(existing, state);
1937
+ }
1938
+ };
1939
+
1940
+ const matchesExactFilters = (dimensionsKey: Record<string, string | null>): boolean => {
1941
+ for (const [field, value] of Object.entries(eligibility.exactFilters)) {
1942
+ if ((dimensionsKey[field] ?? null) !== value) return false;
1943
+ }
1944
+ return true;
1945
+ };
1946
+
1947
+ const partialRanges: Array<{ startMs: number; endMs: number }> = [];
1948
+ if (!eligibility.eligible || !hasFullWindows) {
1949
+ partialRanges.push({ startMs: fromMs, endMs: toMs });
1950
+ } else {
1951
+ if (fromMs < fullStartMs) partialRanges.push({ startMs: fromMs, endMs: fullStartMs });
1952
+ if (fullEndMs < toMs) partialRanges.push({ startMs: fullEndMs, endMs: toMs });
1953
+ }
1954
+
1955
+ const scanSegmentForAggregateResult = async (
1956
+ seg: SegmentRow,
1957
+ scanRanges: Array<{ startMs: number; endMs: number }>
1958
+ ): Promise<Result<void, ReaderError>> => {
1959
+ const segBytes = await loadSegmentBytes(objectStore.value!, seg, this.diskCache, this.retryOpts());
1960
+ let curOffset = seg.start_offset;
1961
+ for (const blockRes of iterateBlocksResult(segBytes)) {
1962
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
1963
+ for (const record of blockRes.value.decoded.records) {
1964
+ const parsedRes = decodeJsonPayloadWithRegistryResult(this.registry, registry, curOffset, record.payload);
1965
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
1966
+ const contributionRes = extractRollupContributionResult(registry, rollup, curOffset, parsedRes.value);
1967
+ if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
1968
+ const contribution = contributionRes.value;
1969
+ if (!contribution) {
1970
+ curOffset += 1n;
1971
+ continue;
1972
+ }
1973
+ const inRange = scanRanges.some((range) => contribution.timestampMs >= range.startMs && contribution.timestampMs < range.endMs);
1974
+ if (!inRange) {
1975
+ curOffset += 1n;
1976
+ continue;
1977
+ }
1978
+ if (request.q) {
1979
+ const evalRes = evaluateSearchQueryResult(registry, curOffset, request.q, parsedRes.value);
1980
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
1981
+ if (!evalRes.value.matched) {
1982
+ curOffset += 1n;
1983
+ continue;
1984
+ }
1985
+ }
1986
+ const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
1987
+ mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
1988
+ curOffset += 1n;
1989
+ }
1990
+ }
1991
+ scannedSegmentSet.add(seg.segment_index);
1992
+ return Result.ok(undefined);
1993
+ };
1994
+
1995
+ const segmentMayOverlapAggregateRange = async (
1996
+ seg: SegmentRow,
1997
+ startMs: number,
1998
+ endMs: number
1999
+ ): Promise<boolean> => {
2000
+ if (usesPrimaryTimestampBounds) {
2001
+ const companionRow = await this.getSearchSegmentCompanion(stream, seg.segment_index);
2002
+ if (companionRow?.primary_timestamp_min_ms != null && companionRow.primary_timestamp_max_ms != null) {
2003
+ return companionRow.primary_timestamp_max_ms >= BigInt(startMs) && companionRow.primary_timestamp_min_ms < BigInt(endMs);
2004
+ }
2005
+ }
2006
+ return this.segmentMayOverlapTimeRange(stream, seg.segment_index, startMs, endMs, timestampField);
2007
+ };
2008
+
2009
+ const scanMetricsBlockForAggregateResult = async (
2010
+ seg: SegmentRow,
2011
+ companion: MetricsBlockSectionView,
2012
+ scanRanges: Array<{ startMs: number; endMs: number }>
2013
+ ): Promise<Result<void, ReaderError>> => {
2014
+ for (const record of companion.records()) {
2015
+ const offsetSeq = seg.start_offset + BigInt(record.doc_id);
2016
+ const timestampMs = record.windowStartMs;
2017
+ const inRange = scanRanges.some((range) => timestampMs >= range.startMs && timestampMs < range.endMs);
2018
+ if (!inRange) continue;
2019
+ const materialized = materializeMetricsBlockRecord(record);
2020
+ if (request.q) {
2021
+ const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, materialized);
2022
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
2023
+ if (!evalRes.value.matched) continue;
2024
+ }
2025
+ const contributionRes = extractRollupContributionResult(registry, rollup, offsetSeq, materialized);
2026
+ if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
2027
+ const contribution = contributionRes.value;
2028
+ if (!contribution) continue;
2029
+ const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
2030
+ mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
2031
+ }
2032
+ indexedSegmentSet.add(seg.segment_index);
2033
+ indexFamiliesUsed.add("mblk");
2034
+ return Result.ok(undefined);
2035
+ };
2036
+
2037
+ for (const seg of await this.listSegmentsForStream(stream)) {
2038
+ if (seg.segment_index >= coverageState.visiblePublishedSegmentCount) break;
2039
+ let coveredAlignedWindows = false;
2040
+ if (eligibility.eligible && this.index && hasFullWindows) {
2041
+ const overlapsAlignedWindow = await segmentMayOverlapAggregateRange(seg, fullStartMs, fullEndMs);
2042
+ if (overlapsAlignedWindow) {
2043
+ const companion = await this.index.getAggSegmentCompanion(stream, seg.segment_index);
2044
+ const intervalCompanion = companion?.getInterval(request.rollup, intervalMs);
2045
+ if (intervalCompanion) {
2046
+ coveredAlignedWindows = true;
2047
+ indexedSegmentSet.add(seg.segment_index);
2048
+ indexFamiliesUsed.add("agg");
2049
+ usedRollups = true;
2050
+ intervalCompanion.forEachGroupInRange(fullStartMs, fullEndMs, (windowStartMs, group) => {
2051
+ if (!matchesExactFilters(group.dimensions)) return;
2052
+ mergeBucketMeasures(windowStartMs, group.dimensions, group.measures);
2053
+ });
2054
+ }
2055
+ }
2056
+ }
2057
+
2058
+ const scanRanges =
2059
+ !eligibility.eligible || !hasFullWindows
2060
+ ? [{ startMs: fromMs, endMs: toMs }]
2061
+ : coveredAlignedWindows
2062
+ ? partialRanges
2063
+ : [{ startMs: fromMs, endMs: toMs }];
2064
+ if (scanRanges.length === 0) continue;
2065
+ let overlaps = false;
2066
+ for (const range of scanRanges) {
2067
+ if (await segmentMayOverlapAggregateRange(seg, range.startMs, range.endMs)) {
2068
+ overlaps = true;
2069
+ break;
2070
+ }
2071
+ }
2072
+ if (!overlaps) continue;
2073
+ let scanRes: Result<void, ReaderError>;
2074
+ if (metricsProfile && this.index) {
2075
+ const companion = await this.index.getMetricsBlockSegmentCompanion(stream, seg.segment_index);
2076
+ if (companion) {
2077
+ scanRes = await scanMetricsBlockForAggregateResult(seg, companion, scanRanges);
2078
+ } else {
2079
+ scanRes = await scanSegmentForAggregateResult(seg, scanRanges);
2080
+ }
2081
+ } else {
2082
+ scanRes = await scanSegmentForAggregateResult(seg, scanRanges);
2083
+ }
2084
+ if (Result.isError(scanRes)) return scanRes;
2085
+ }
2086
+
2087
+ const tailStart = srow.sealed_through + 1n;
2088
+ const tailEnd = srow.next_offset - 1n;
2089
+ if (coverageState.canSearchWalTail && tailStart <= tailEnd) {
2090
+ for await (const record of this.store.readWalRange(stream, tailStart, tailEnd)) {
2091
+ scannedTailDocs += 1;
2092
+ const parsedRes = decodeJsonPayloadWithRegistryResult(this.registry, registry, record.offset, record.payload);
2093
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
2094
+ const contributionRes = extractRollupContributionResult(registry, rollup, record.offset, parsedRes.value);
2095
+ if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
2096
+ const contribution = contributionRes.value;
2097
+ if (!contribution || contribution.timestampMs < fromMs || contribution.timestampMs >= toMs) continue;
2098
+ if (request.q) {
2099
+ const evalRes = evaluateSearchQueryResult(registry, record.offset, request.q, parsedRes.value);
2100
+ if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
2101
+ if (!evalRes.value.matched) continue;
2102
+ }
2103
+ const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
2104
+ mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
2105
+ }
2106
+ }
2107
+
2108
+ const bucketList = Array.from(buckets.entries())
2109
+ .sort((a, b) => a[0] - b[0])
2110
+ .map(([startMs, groups]) => ({
2111
+ start: new Date(startMs).toISOString(),
2112
+ end: new Date(startMs + intervalMs).toISOString(),
2113
+ groups: Array.from(groups.values())
2114
+ .sort((a, b) => JSON.stringify(a.key).localeCompare(JSON.stringify(b.key)))
2115
+ .map((group) => ({
2116
+ key: group.key,
2117
+ measures: Object.fromEntries(
2118
+ Object.entries(group.measures)
2119
+ .sort((a, b) => a[0].localeCompare(b[0]))
2120
+ .map(([name, state]) => [name, formatAggMeasureState(state)])
2121
+ ),
2122
+ })),
2123
+ }));
2124
+
2125
+ return Result.ok({
2126
+ stream,
2127
+ rollup: request.rollup,
2128
+ from: new Date(fromMs).toISOString(),
2129
+ to: new Date(toMs).toISOString(),
2130
+ interval: request.interval,
2131
+ coverage: {
2132
+ mode: coverageState.mode,
2133
+ complete: coverageState.complete,
2134
+ streamHeadOffset: coverageState.streamHeadOffset,
2135
+ visibleThroughOffset: coverageState.visibleThroughOffset,
2136
+ visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
2137
+ oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
2138
+ possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
2139
+ possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
2140
+ possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
2141
+ possibleMissingWalRows: coverageState.possibleMissingWalRows,
2142
+ usedRollups,
2143
+ indexedSegments: indexedSegmentSet.size,
2144
+ scannedSegments: scannedSegmentSet.size,
2145
+ scannedTailDocs,
2146
+ indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
2147
+ },
2148
+ buckets: bucketList,
2149
+ });
2150
+ } catch (e: unknown) {
2151
+ return Result.err({ kind: "internal", message: errorMessage(e) });
2152
+ } finally {
2153
+ leaveAggregatePhase?.();
2154
+ }
2155
+ }
2156
+
2157
+ async aggregate(args: { stream: string; request: AggregateRequest }): Promise<AggregateResultBatch> {
2158
+ const res = await this.aggregateResult(args);
2159
+ if (Result.isError(res)) throw dsError(res.error.message);
2160
+ return res.value;
2161
+ }
2162
+
2163
+ private async loadSegmentRangeBlockReaderResult(seg: SegmentRow): Promise<Result<SegmentRangeBlockReader | null, ReaderError>> {
2164
+ const objectStoreRes = this.requireObjectStore();
2165
+ if (Result.isError(objectStoreRes)) return Result.err(objectStoreRes.error);
2166
+ const objectStore = objectStoreRes.value;
2167
+ const objectKey = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index);
2168
+ let fetchedBytes = 0;
2169
+ const readRange = async (start: number, end: number): Promise<Result<Uint8Array, ReaderError>> => {
2170
+ const bytes = await retry(
2171
+ async () => {
2172
+ const res = await objectStore.get(objectKey, { range: { start, end } });
2173
+ if (!res) throw dsError(`object store missing segment: ${objectKey}`);
2174
+ return res;
2175
+ },
2176
+ this.retryOpts()
2177
+ );
2178
+ fetchedBytes += bytes.byteLength;
2179
+ return Result.ok(bytes);
2180
+ };
2181
+
2182
+ if (seg.size_bytes < 8) return Result.ok(null);
2183
+ const tailRes = await readRange(seg.size_bytes - 8, seg.size_bytes - 1);
2184
+ if (Result.isError(tailRes)) return tailRes;
2185
+ const tail = tailRes.value;
2186
+ if (tail.byteLength < 8) return Result.ok(null);
2187
+ const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
2188
+ if (magic !== "DSF1") return Result.ok(null);
2189
+ const footerLen = readU32BE(tail, 0);
2190
+ const footerStart = seg.size_bytes - 8 - footerLen;
2191
+ if (footerStart < 0) return Result.ok(null);
2192
+ const footerRes = await readRange(footerStart, footerStart + footerLen - 1);
2193
+ if (Result.isError(footerRes)) return footerRes;
2194
+ const footer = parseFooterBytes(footerRes.value);
2195
+ if (!footer?.blocks) return Result.ok(null);
2196
+
2197
+ return Result.ok({
2198
+ blocks: footer.blocks,
2199
+ readBlock: async (block) => {
2200
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
2201
+ return readRange(block.blockOffset, block.blockOffset + totalLen - 1);
2202
+ },
2203
+ fetchedBytes: () => fetchedBytes,
2204
+ });
2205
+ }
2206
+
2207
+ private async scanSegmentReverseForSearchResult(
2208
+ stream: string,
2209
+ seg: SegmentRow,
2210
+ exactCandidateInfo: SegmentCandidateInfo,
2211
+ cursorFieldBound: SearchCursorFieldBound | null,
2212
+ exactClauses: SearchExactClause[],
2213
+ columnClauses: SearchColumnClause[],
2214
+ ftsClauses: SearchFtsClause[],
2215
+ rangeStartSeq: bigint,
2216
+ rangeEndSeq: bigint,
2217
+ state: {
2218
+ indexFamiliesUsed: Set<string>;
2219
+ collectSearchMatchResult: (offsetSeq: bigint, payload: Uint8Array) => Result<void, ReaderError>;
2220
+ deadline: number | null;
2221
+ isTimedOut: () => boolean;
2222
+ setTimedOut: (next: boolean) => void;
2223
+ stopIfPageComplete: () => boolean;
2224
+ addIndexedSegment: () => void;
2225
+ addScannedSegment: () => void;
2226
+ addIndexedSegmentTimeMs: (deltaMs: number) => void;
2227
+ addFtsSectionGetMs: (deltaMs: number) => void;
2228
+ addFtsDecodeMs: (deltaMs: number) => void;
2229
+ addFtsClauseEstimateMs: (deltaMs: number) => void;
2230
+ addScannedSegmentTimeMs: (deltaMs: number) => void;
2231
+ addCandidateDocIds: (count: number) => void;
2232
+ addDecodedRecords: (count: number) => void;
2233
+ addSegmentPayloadBytesFetched: (count: number) => void;
2234
+ }
2235
+ ): Promise<Result<void, ReaderError>> {
2236
+ const segmentStartedAt = Date.now();
2237
+ const markTimedOutIfNeeded = (): boolean => {
2238
+ if (state.deadline == null || Date.now() < state.deadline) return false;
2239
+ state.setTimedOut(true);
2240
+ return true;
2241
+ };
2242
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
2243
+ if (
2244
+ exactCandidateInfo.segments &&
2245
+ seg.segment_index < exactCandidateInfo.indexedThrough &&
2246
+ !exactCandidateInfo.segments.has(seg.segment_index)
2247
+ ) {
2248
+ return Result.ok(undefined);
2249
+ }
2250
+ if (cursorFieldBound) {
2251
+ const overlapsCursor = await this.segmentMayOverlapSearchCursor(stream, seg.segment_index, cursorFieldBound);
2252
+ if (!overlapsCursor) {
2253
+ state.indexFamiliesUsed.add("col");
2254
+ state.addIndexedSegment();
2255
+ state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2256
+ return Result.ok(undefined);
2257
+ }
2258
+ }
2259
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
2260
+
2261
+ const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult(
2262
+ stream,
2263
+ seg.segment_index,
2264
+ exactClauses,
2265
+ columnClauses,
2266
+ ftsClauses,
2267
+ {
2268
+ addFtsSectionGetMs: state.addFtsSectionGetMs,
2269
+ addFtsDecodeMs: state.addFtsDecodeMs,
2270
+ addFtsClauseEstimateMs: state.addFtsClauseEstimateMs,
2271
+ }
2272
+ );
2273
+ if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message });
2274
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
2275
+ const familyCandidates = familyCandidatesRes.value;
2276
+ if (familyCandidates.docIds) state.addCandidateDocIds(familyCandidates.docIds.size);
2277
+ if (familyCandidates.docIds && familyCandidates.docIds.size === 0) {
2278
+ if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegment();
2279
+ for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family);
2280
+ if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2281
+ return Result.ok(undefined);
2282
+ }
2283
+ const usedIndexedFamilies = familyCandidates.usedFamilies.size > 0;
2284
+ if (familyCandidates.usedFamilies.size > 0) {
2285
+ state.addIndexedSegment();
2286
+ for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family);
2287
+ } else {
2288
+ state.addScannedSegment();
2289
+ }
2290
+
2291
+ const addSegmentTime = (): void => {
2292
+ if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
2293
+ else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
2294
+ };
2295
+ const scanCandidateDocIdsWithBlocksResult = async (
2296
+ blocks: BlockIndexEntry[],
2297
+ readBlock: (block: BlockIndexEntry) => Promise<Result<Uint8Array, ReaderError>>
2298
+ ): Promise<Result<void, ReaderError>> => {
2299
+ const candidateDocIds = Array.from(familyCandidates.docIds!)
2300
+ .filter((docId) => {
2301
+ const offsetSeq = seg.start_offset + BigInt(docId);
2302
+ return offsetSeq >= rangeStartSeq && offsetSeq <= rangeEndSeq;
2303
+ })
2304
+ .sort((left, right) => right - left);
2305
+ let currentBlockIndex = -1;
2306
+ let currentBlockStartOffset = 0n;
2307
+ let currentRecords: Array<{ payload: Uint8Array }> = [];
2308
+ for (const docId of candidateDocIds) {
2309
+ const offsetSeq = seg.start_offset + BigInt(docId);
2310
+ const blockIndex = findFirstRelevantBlockIndex(blocks, offsetSeq);
2311
+ const block = blocks[blockIndex]!;
2312
+ const blockStartOffset = block.firstOffset;
2313
+ const blockEndOffset = blockStartOffset + BigInt(block.recordCount) - 1n;
2314
+ if (offsetSeq < blockStartOffset || offsetSeq > blockEndOffset) continue;
2315
+ if (blockIndex !== currentBlockIndex) {
2316
+ const blockBytesRes = await readBlock(block);
2317
+ if (Result.isError(blockBytesRes)) return blockBytesRes;
2318
+ const decodedRes = decodeBlockResult(blockBytesRes.value);
2319
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
2320
+ currentBlockIndex = blockIndex;
2321
+ currentBlockStartOffset = blockStartOffset;
2322
+ currentRecords = decodedRes.value.records;
2323
+ state.addDecodedRecords(decodedRes.value.recordCount);
2324
+ }
2325
+ const recordIndex = Number(offsetSeq - currentBlockStartOffset);
2326
+ const record = currentRecords[recordIndex];
2327
+ if (!record) continue;
2328
+ const matchRes = state.collectSearchMatchResult(offsetSeq, record.payload);
2329
+ if (Result.isError(matchRes)) return matchRes;
2330
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
2331
+ if (state.stopIfPageComplete()) return Result.ok(undefined);
2332
+ }
2333
+ return Result.ok(undefined);
2334
+ };
2335
+
2336
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
2337
+ if (familyCandidates.docIds) {
2338
+ const rangeReaderRes = await this.loadSegmentRangeBlockReaderResult(seg);
2339
+ if (Result.isError(rangeReaderRes)) return rangeReaderRes;
2340
+ if (rangeReaderRes.value) {
2341
+ const rangeReader = rangeReaderRes.value;
2342
+ const scanRes = await scanCandidateDocIdsWithBlocksResult(rangeReader.blocks, rangeReader.readBlock);
2343
+ state.addSegmentPayloadBytesFetched(rangeReader.fetchedBytes());
2344
+ addSegmentTime();
2345
+ return scanRes;
2346
+ }
2347
+ }
2348
+
2349
+ const objectStoreRes = this.requireObjectStore();
2350
+ if (Result.isError(objectStoreRes)) return Result.err(objectStoreRes.error);
2351
+ const source = await loadSegmentSource(objectStoreRes.value, seg, this.diskCache, this.retryOpts());
2352
+ state.addSegmentPayloadBytesFetched(seg.size_bytes);
2353
+ if (markTimedOutIfNeeded()) return Result.ok(undefined);
2354
+ const footerBlocks = loadSegmentFooterBlocksFromSource(seg, source);
2355
+ if (footerBlocks) {
2356
+ if (familyCandidates.docIds) {
2357
+ const scanRes = await scanCandidateDocIdsWithBlocksResult(footerBlocks, async (block) => {
2358
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
2359
+ return Result.ok(readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1));
2360
+ });
2361
+ addSegmentTime();
2362
+ return scanRes;
2363
+ }
2364
+
2365
+ for (let blockIndex = findFirstRelevantBlockIndex(footerBlocks, rangeEndSeq); blockIndex >= 0; blockIndex--) {
2366
+ const block = footerBlocks[blockIndex]!;
2367
+ const blockStartOffset = block.firstOffset;
2368
+ const blockEndOffset = blockStartOffset + BigInt(block.recordCount) - 1n;
2369
+ if (blockStartOffset > rangeEndSeq) continue;
2370
+ if (blockEndOffset < rangeStartSeq) break;
2371
+
2372
+ const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
2373
+ const blockBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1);
2374
+ const decodedRes = decodeBlockResult(blockBytes);
2375
+ if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
2376
+ const decoded = decodedRes.value;
2377
+ state.addDecodedRecords(decoded.recordCount);
2378
+ for (let recordIndex = decoded.records.length - 1; recordIndex >= 0; recordIndex--) {
2379
+ const offsetSeq = blockStartOffset + BigInt(recordIndex);
2380
+ if (offsetSeq > rangeEndSeq) continue;
2381
+ if (offsetSeq < rangeStartSeq) {
2382
+ addSegmentTime();
2383
+ return Result.ok(undefined);
2384
+ }
2385
+ const matchRes = state.collectSearchMatchResult(offsetSeq, decoded.records[recordIndex]!.payload);
2386
+ if (Result.isError(matchRes)) return matchRes;
2387
+ if (markTimedOutIfNeeded()) {
2388
+ addSegmentTime();
2389
+ return Result.ok(undefined);
2390
+ }
2391
+ if (state.stopIfPageComplete()) {
2392
+ addSegmentTime();
2393
+ return Result.ok(undefined);
2394
+ }
2395
+ }
2396
+ }
2397
+
2398
+ addSegmentTime();
2399
+ return Result.ok(undefined);
2400
+ }
2401
+
2402
+ const decodedBlocks: Array<{ records: Array<{ payload: Uint8Array }> }> = [];
2403
+ for (const blockRes of iterateBlocksResult(source.bytes)) {
2404
+ if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
2405
+ decodedBlocks.push({ records: blockRes.value.decoded.records });
2406
+ state.addDecodedRecords(blockRes.value.decoded.recordCount);
2407
+ if (markTimedOutIfNeeded()) {
2408
+ addSegmentTime();
2409
+ return Result.ok(undefined);
2410
+ }
2411
+ }
2412
+
2413
+ let blockEndOffset = seg.end_offset;
2414
+ for (let blockIndex = decodedBlocks.length - 1; blockIndex >= 0; blockIndex--) {
2415
+ const decoded = decodedBlocks[blockIndex]!;
2416
+ const blockStartOffset = blockEndOffset - BigInt(decoded.records.length) + 1n;
2417
+ for (let recordIndex = decoded.records.length - 1; recordIndex >= 0; recordIndex--) {
2418
+ const offsetSeq = blockStartOffset + BigInt(recordIndex);
2419
+ if (offsetSeq > rangeEndSeq) continue;
2420
+ if (offsetSeq < rangeStartSeq) {
2421
+ addSegmentTime();
2422
+ return Result.ok(undefined);
2423
+ }
2424
+ const localDocId = Number(offsetSeq - seg.start_offset);
2425
+ if (!familyCandidates.docIds || familyCandidates.docIds.has(localDocId)) {
2426
+ const matchRes = state.collectSearchMatchResult(offsetSeq, decoded.records[recordIndex]!.payload);
2427
+ if (Result.isError(matchRes)) return matchRes;
2428
+ }
2429
+ if (markTimedOutIfNeeded()) {
2430
+ addSegmentTime();
2431
+ return Result.ok(undefined);
2432
+ }
2433
+ if (state.stopIfPageComplete()) {
2434
+ addSegmentTime();
2435
+ return Result.ok(undefined);
2436
+ }
2437
+ }
2438
+ blockEndOffset = blockStartOffset - 1n;
2439
+ }
2440
+
2441
+ addSegmentTime();
2442
+ return Result.ok(undefined);
2443
+ }
2444
+
2445
+ private searchSchemaKey(registry: SchemaRegistry): string {
2446
+ return `${registry.currentVersion}:${JSON.stringify(registry.search ?? null)}`;
2447
+ }
2448
+
2449
+ private async buildHotWalExactCacheResult(
2450
+ stream: string,
2451
+ startSeq: bigint,
2452
+ endSeq: bigint,
2453
+ registry: SchemaRegistry
2454
+ ): Promise<Result<HotWalExactCache, ReaderError>> {
2455
+ const schemaKey = this.searchSchemaKey(registry);
2456
+ const cached = this.hotWalExact.get(stream);
2457
+ if (cached && cached.startSeq === startSeq && cached.endSeq === endSeq && cached.schemaKey === schemaKey) {
2458
+ return Result.ok(cached);
2459
+ }
2460
+
2461
+ const values = new Map<string, Map<string, bigint[]>>();
2462
+ if (startSeq <= endSeq) {
2463
+ for await (const record of this.store.readWalRange(stream, startSeq, endSeq)) {
2464
+ const offsetSeq = record.offset;
2465
+ const parsedRes = decodeJsonPayloadWithRegistryResult(this.registry, registry, offsetSeq, record.payload);
2466
+ if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
2467
+ const docRes = buildSearchDocumentResult(registry, offsetSeq, parsedRes.value);
2468
+ if (Result.isError(docRes)) return Result.err({ kind: "internal", message: docRes.error.message });
2469
+ for (const [field, fieldValues] of docRes.value.exactValues) {
2470
+ let byValue = values.get(field);
2471
+ if (!byValue) {
2472
+ byValue = new Map();
2473
+ values.set(field, byValue);
2474
+ }
2475
+ for (const value of fieldValues) {
2476
+ let offsets = byValue.get(value);
2477
+ if (!offsets) {
2478
+ offsets = [];
2479
+ byValue.set(value, offsets);
2480
+ }
2481
+ offsets.push(offsetSeq);
2482
+ }
2483
+ }
2484
+ }
2485
+ }
2486
+
2487
+ const next: HotWalExactCache = { startSeq, endSeq, schemaKey, values };
2488
+ this.hotWalExact.set(stream, next);
2489
+ return Result.ok(next);
2490
+ }
2491
+
2492
+ private async hotWalExactOffsetsResult(
2493
+ stream: string,
2494
+ startSeq: bigint,
2495
+ endSeq: bigint,
2496
+ clauses: SearchExactClause[],
2497
+ registry: SchemaRegistry
2498
+ ): Promise<Result<bigint[] | null, ReaderError>> {
2499
+ if (clauses.length === 0 || startSeq > endSeq) return Result.ok(null);
2500
+ const cacheRes = await this.buildHotWalExactCacheResult(stream, startSeq, endSeq, registry);
2501
+ if (Result.isError(cacheRes)) return cacheRes;
2502
+
2503
+ const postings = clauses.map((clause) => cacheRes.value.values.get(clause.field)?.get(clause.canonicalValue) ?? []);
2504
+ if (postings.some((offsets) => offsets.length === 0)) return Result.ok([]);
2505
+ postings.sort((left, right) => left.length - right.length);
2506
+ const [smallest, ...rest] = postings;
2507
+ const restSets = rest.map((offsets) => new Set(offsets));
2508
+ return Result.ok(smallest!.filter((offset) => restSets.every((set) => set.has(offset))));
2509
+ }
2510
+
2511
+ private async walRecordAt(stream: string, offsetSeq: bigint): Promise<{ offset: bigint; payload: Uint8Array } | null> {
2512
+ for await (const record of this.store.readWalRange(stream, offsetSeq, offsetSeq)) {
2513
+ return { offset: record.offset, payload: record.payload };
2514
+ }
2515
+ return null;
2516
+ }
2517
+
2518
+ private async segmentMayOverlapSearchCursor(
2519
+ stream: string,
2520
+ segmentIndex: number,
2521
+ bound: SearchCursorFieldBound
2522
+ ): Promise<boolean> {
2523
+ if (!this.index || bound.encoded == null) return true;
2524
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2525
+ if (!companion) return true;
2526
+
2527
+ if (companion.primaryTimestampField === bound.sort.field && companion.minTimestampMs() != null && companion.maxTimestampMs() != null) {
2528
+ const target = bound.after;
2529
+ if (typeof target !== "bigint") return true;
2530
+ const minMs = companion.minTimestampMs()!;
2531
+ const maxMs = companion.maxTimestampMs()!;
2532
+ return bound.sort.direction === "desc" ? minMs <= target : maxMs >= target;
2533
+ }
2534
+
2535
+ const field = companion.getField(bound.sort.field);
2536
+ if (!field) return true;
2537
+ const minValue = field.minValue();
2538
+ const maxValue = field.maxValue();
2539
+ if (minValue == null || maxValue == null) return true;
2540
+ const boundValue = bound.after;
2541
+ const cmpMin = compareComparableValues(minValue, boundValue);
2542
+ const cmpMax = compareComparableValues(maxValue, boundValue);
2543
+ return bound.sort.direction === "desc" ? cmpMin <= 0 : cmpMax >= 0;
2544
+ }
2545
+
2546
+ private async segmentMayOverlapTimeRange(
2547
+ stream: string,
2548
+ segmentIndex: number,
2549
+ startMs: number,
2550
+ endMs: number,
2551
+ timestampField: string
2552
+ ): Promise<boolean> {
2553
+ if (!this.index) return true;
2554
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2555
+ if (companion && companion.primaryTimestampField === timestampField) {
2556
+ const minMs = companion.minTimestampMs() == null ? null : Number(companion.minTimestampMs());
2557
+ const maxMs = companion.maxTimestampMs() == null ? null : Number(companion.maxTimestampMs());
2558
+ if (Number.isFinite(minMs) && Number.isFinite(maxMs)) {
2559
+ return (maxMs as number) >= startMs && (minMs as number) < endMs;
2560
+ }
2561
+ }
2562
+ const metricsBlock = await this.index.getMetricsBlockSegmentCompanion(stream, segmentIndex);
2563
+ if (!metricsBlock) return true;
2564
+ const minMs = metricsBlock.minWindowStartMs;
2565
+ const maxMs = metricsBlock.maxWindowEndMs;
2566
+ if (!Number.isFinite(minMs) || !Number.isFinite(maxMs)) return true;
2567
+ return (maxMs as number) >= startMs && (minMs as number) < endMs;
2568
+ }
2569
+
2570
+ private async resolveCandidateSegments(
2571
+ stream: string,
2572
+ keyBytes: Uint8Array | null,
2573
+ filter: CompiledReadFilter | null
2574
+ ): Promise<SegmentCandidateInfo> {
2575
+ if (!this.index) return { segments: null, indexedThrough: 0 };
2576
+
2577
+ const candidates: IndexCandidate[] = [];
2578
+ if (keyBytes) {
2579
+ const keyCandidate = await this.index.candidateSegmentsForRoutingKey(stream, keyBytes);
2580
+ if (keyCandidate) candidates.push(keyCandidate);
2581
+ }
2582
+ if (filter) {
2583
+ for (const clause of collectPositiveExactFilterClauses(filter)) {
2584
+ const filterCandidate = await this.index.candidateSegmentsForSecondaryIndex(
2585
+ stream,
2586
+ clause.field,
2587
+ utf8Bytes(clause.canonicalValue)
2588
+ );
2589
+ if (filterCandidate) candidates.push(filterCandidate);
2590
+ }
2591
+ }
2592
+ if (candidates.length === 0) return { segments: null, indexedThrough: 0 };
2593
+
2594
+ const indexedThrough = candidates.reduce((min, candidate) => Math.min(min, candidate.indexedThrough), Number.MAX_SAFE_INTEGER);
2595
+ if (!Number.isFinite(indexedThrough) || indexedThrough <= 0) {
2596
+ return { segments: null, indexedThrough: 0 };
2597
+ }
2598
+
2599
+ let intersection: Set<number> | null = null;
2600
+ for (const candidate of candidates) {
2601
+ const covered = new Set<number>();
2602
+ for (const segmentIndex of candidate.segments) {
2603
+ if (segmentIndex < indexedThrough) covered.add(segmentIndex);
2604
+ }
2605
+ if (intersection == null) {
2606
+ intersection = covered;
2607
+ continue;
2608
+ }
2609
+ for (const segmentIndex of Array.from(intersection)) {
2610
+ if (!covered.has(segmentIndex)) intersection.delete(segmentIndex);
2611
+ }
2612
+ }
2613
+ return { segments: intersection ?? new Set<number>(), indexedThrough };
2614
+ }
2615
+
2616
+ private async resolveSearchExactCandidateSegments(stream: string, query: CompiledSearchQuery): Promise<SegmentCandidateInfo> {
2617
+ if (!this.index) return { segments: null, indexedThrough: 0 };
2618
+ const clauses = collectPositiveSearchExactClauses(query);
2619
+ if (clauses.length === 0) return { segments: null, indexedThrough: 0 };
2620
+
2621
+ const candidates: IndexCandidate[] = [];
2622
+ for (const clause of clauses) {
2623
+ const candidate = await this.index.candidateSegmentsForSecondaryIndex(stream, clause.field, utf8Bytes(clause.canonicalValue));
2624
+ if (candidate) candidates.push(candidate);
2625
+ }
2626
+ if (candidates.length === 0) return { segments: null, indexedThrough: 0 };
2627
+
2628
+ const indexedThrough = candidates.reduce((min, candidate) => Math.min(min, candidate.indexedThrough), Number.MAX_SAFE_INTEGER);
2629
+ if (!Number.isFinite(indexedThrough) || indexedThrough <= 0) return { segments: null, indexedThrough: 0 };
2630
+
2631
+ let intersection: Set<number> | null = null;
2632
+ for (const candidate of candidates) {
2633
+ const covered = new Set<number>();
2634
+ for (const segmentIndex of candidate.segments) {
2635
+ if (segmentIndex < indexedThrough) covered.add(segmentIndex);
2636
+ }
2637
+ if (intersection == null) {
2638
+ intersection = covered;
2639
+ continue;
2640
+ }
2641
+ for (const segmentIndex of Array.from(intersection)) {
2642
+ if (!covered.has(segmentIndex)) intersection.delete(segmentIndex);
2643
+ }
2644
+ }
2645
+ return { segments: intersection ?? new Set<number>(), indexedThrough };
2646
+ }
2647
+
2648
+ private async resolveColumnCandidateDocIdsResult(
2649
+ stream: string,
2650
+ segmentIndex: number,
2651
+ clauses: ReadFilterColumnClause[]
2652
+ ): Promise<Result<Set<number> | null, { message: string }>> {
2653
+ if (!this.index || clauses.length === 0) return Result.ok(null);
2654
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2655
+ if (!companion) return Result.ok(null);
2656
+
2657
+ let intersection: Set<number> | null = null;
2658
+ for (const clause of clauses) {
2659
+ const clauseRes = filterDocIdsByColumnResult({
2660
+ companion,
2661
+ field: clause.field,
2662
+ op: clause.op,
2663
+ value: clause.compareValue,
2664
+ });
2665
+ if (Result.isError(clauseRes)) return Result.ok(null);
2666
+ if (intersection == null) {
2667
+ intersection = clauseRes.value;
2668
+ continue;
2669
+ }
2670
+ for (const docId of Array.from(intersection)) {
2671
+ if (!clauseRes.value.has(docId)) intersection.delete(docId);
2672
+ }
2673
+ if (intersection.size === 0) break;
2674
+ }
2675
+ return Result.ok(intersection ?? new Set<number>());
2676
+ }
2677
+
2678
+ private async resolveSearchColumnCandidateDocIdsResult(
2679
+ stream: string,
2680
+ segmentIndex: number,
2681
+ clauses: SearchColumnClause[]
2682
+ ): Promise<Result<Set<number> | null, { message: string }>> {
2683
+ if (!this.index || clauses.length === 0) return Result.ok(null);
2684
+ const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
2685
+ if (!companion) return Result.ok(null);
2686
+
2687
+ let intersection: Set<number> | null = null;
2688
+ for (const clause of clauses) {
2689
+ const clauseRes = filterDocIdsByColumnResult({
2690
+ companion,
2691
+ field: clause.field,
2692
+ op: clause.op,
2693
+ value: clause.compareValue,
2694
+ });
2695
+ if (Result.isError(clauseRes)) return Result.ok(null);
2696
+ if (intersection == null) {
2697
+ intersection = clauseRes.value;
2698
+ continue;
2699
+ }
2700
+ for (const docId of Array.from(intersection)) {
2701
+ if (!clauseRes.value.has(docId)) intersection.delete(docId);
2702
+ }
2703
+ if (intersection.size === 0) break;
2704
+ }
2705
+ return Result.ok(intersection ?? new Set<number>());
2706
+ }
2707
+
2708
+ private async resolveSearchFtsCandidateDocIdsResult(
2709
+ stream: string,
2710
+ segmentIndex: number,
2711
+ clauses: SearchFtsClause[],
2712
+ stats?: {
2713
+ addFtsSectionGetMs?: (deltaMs: number) => void;
2714
+ addFtsDecodeMs?: (deltaMs: number) => void;
2715
+ addFtsClauseEstimateMs?: (deltaMs: number) => void;
2716
+ }
2717
+ ): Promise<Result<Set<number> | null, { message: string }>> {
2718
+ if (!this.index || clauses.length === 0) return Result.ok(null);
2719
+ const companionRes = this.index.getFtsSegmentCompanionWithStats
2720
+ ? await this.index.getFtsSegmentCompanionWithStats(stream, segmentIndex)
2721
+ : { companion: await this.index.getFtsSegmentCompanion(stream, segmentIndex), stats: { sectionGetMs: 0, decodeMs: 0 } };
2722
+ stats?.addFtsSectionGetMs?.(companionRes.stats.sectionGetMs);
2723
+ stats?.addFtsDecodeMs?.(companionRes.stats.decodeMs);
2724
+ const companion = companionRes.companion;
2725
+ if (!companion) return Result.ok(null);
2726
+ const clausesRes = filterDocIdsByFtsClausesResult({
2727
+ companion,
2728
+ clauses,
2729
+ onEstimateMs: (deltaMs) => {
2730
+ stats?.addFtsClauseEstimateMs?.(deltaMs);
2731
+ },
2732
+ });
2733
+ if (Result.isError(clausesRes)) return clausesRes;
2734
+ return Result.ok(clausesRes.value);
2735
+ }
2736
+
2737
+ private async resolveSearchFamilyCandidatesResult(
2738
+ stream: string,
2739
+ segmentIndex: number,
2740
+ exactClauses: SearchExactClause[],
2741
+ columnClauses: SearchColumnClause[],
2742
+ ftsClauses: SearchFtsClause[],
2743
+ stats?: {
2744
+ addFtsSectionGetMs?: (deltaMs: number) => void;
2745
+ addFtsDecodeMs?: (deltaMs: number) => void;
2746
+ addFtsClauseEstimateMs?: (deltaMs: number) => void;
2747
+ }
2748
+ ): Promise<Result<SearchFamilyCandidateInfo, { message: string }>> {
2749
+ let intersection: Set<number> | null = null;
2750
+ const usedFamilies = new Set<string>();
2751
+
2752
+ if (exactClauses.length > 0) {
2753
+ const exactCompanion = await this.index?.getExactSegmentCompanion(stream, segmentIndex);
2754
+ if (exactCompanion) {
2755
+ const exactRes = filterDocIdsByExactClausesResult({ companion: exactCompanion, clauses: exactClauses });
2756
+ if (Result.isError(exactRes)) return exactRes;
2757
+ intersection = exactRes.value;
2758
+ usedFamilies.add("exact");
2759
+ }
2760
+ }
2761
+
2762
+ if (columnClauses.length > 0) {
2763
+ const columnRes = await this.resolveSearchColumnCandidateDocIdsResult(stream, segmentIndex, columnClauses);
2764
+ if (Result.isError(columnRes)) return columnRes;
2765
+ if (columnRes.value) {
2766
+ if (intersection == null) intersection = columnRes.value;
2767
+ else {
2768
+ for (const docId of Array.from(intersection)) {
2769
+ if (!columnRes.value.has(docId)) intersection.delete(docId);
2770
+ }
2771
+ }
2772
+ usedFamilies.add("col");
2773
+ }
2774
+ }
2775
+
2776
+ if (ftsClauses.length > 0) {
2777
+ const ftsRes = await this.resolveSearchFtsCandidateDocIdsResult(stream, segmentIndex, ftsClauses, stats);
2778
+ if (Result.isError(ftsRes)) return ftsRes;
2779
+ if (ftsRes.value) {
2780
+ if (intersection == null) intersection = ftsRes.value;
2781
+ else {
2782
+ for (const docId of Array.from(intersection)) {
2783
+ if (!ftsRes.value.has(docId)) intersection.delete(docId);
2784
+ }
2785
+ }
2786
+ usedFamilies.add("fts");
2787
+ }
2788
+ }
2789
+
2790
+ return Result.ok({ docIds: intersection, usedFamilies });
2791
+ }
2792
+ }
2793
+
2794
+ function bytesEqual(a: Uint8Array, b: Uint8Array): boolean {
2795
+ if (a.byteLength !== b.byteLength) return false;
2796
+ for (let i = 0; i < a.byteLength; i++) if (a[i] !== b[i]) return false;
2797
+ return true;
2798
+ }
2799
+
2800
+ function buildSearchSortInternalValues(
2801
+ sorts: SearchSortSpec[],
2802
+ fields: Record<string, unknown>,
2803
+ evaluation: SearchEvaluation,
2804
+ offsetSeq: bigint
2805
+ ): Array<bigint | number | string | boolean | null> {
2806
+ return sorts.map((sort) => {
2807
+ if (sort.kind === "score") return evaluation.score;
2808
+ if (sort.kind === "offset") return offsetSeq;
2809
+ const rawValue = fields[sort.field];
2810
+ const scalar = Array.isArray(rawValue) ? rawValue[0] : rawValue;
2811
+ if (scalar == null) return null;
2812
+ if (sort.config.kind === "integer" || sort.config.kind === "float" || sort.config.kind === "date" || sort.config.kind === "bool") {
2813
+ return canonicalizeColumnValue(sort.config, scalar);
2814
+ }
2815
+ return canonicalizeExactValue(sort.config, scalar);
2816
+ });
2817
+ }
2818
+
2819
+ function buildSearchSortResponseValues(
2820
+ sorts: SearchSortSpec[],
2821
+ sortInternal: Array<bigint | number | string | boolean | null>,
2822
+ offset: string
2823
+ ): unknown[] {
2824
+ return sorts.map((sort, index) => {
2825
+ const value = sortInternal[index];
2826
+ if (sort.kind === "offset") return offset;
2827
+ if (typeof value === "bigint") return Number(value);
2828
+ return value;
2829
+ });
2830
+ }
2831
+
2832
+ function compareComparableValues(left: bigint | number | string | boolean | null, right: bigint | number | string | boolean | null): number {
2833
+ if (left == null && right == null) return 0;
2834
+ if (left == null) return 1;
2835
+ if (right == null) return -1;
2836
+ if (typeof left === "bigint" && typeof right === "bigint") return left < right ? -1 : left > right ? 1 : 0;
2837
+ if (typeof left === "number" && typeof right === "number") return left < right ? -1 : left > right ? 1 : 0;
2838
+ if (typeof left === "boolean" && typeof right === "boolean") return left === right ? 0 : left ? 1 : -1;
2839
+ const ls = String(left);
2840
+ const rs = String(right);
2841
+ return ls < rs ? -1 : ls > rs ? 1 : 0;
2842
+ }
2843
+
2844
+ function compareSearchHits(left: SearchHitInternal, right: SearchHitInternal, sorts: SearchSortSpec[]): number {
2845
+ for (let i = 0; i < sorts.length; i++) {
2846
+ const cmp = compareComparableValues(left.sortInternal[i] ?? null, right.sortInternal[i] ?? null);
2847
+ if (cmp === 0) continue;
2848
+ return sorts[i].direction === "asc" ? cmp : -cmp;
2849
+ }
2850
+ return 0;
2851
+ }
2852
+
2853
+ function compareSearchAfterValues(
2854
+ sortInternal: Array<bigint | number | string | boolean | null>,
2855
+ sorts: SearchSortSpec[],
2856
+ searchAfter: unknown[]
2857
+ ): number {
2858
+ for (let i = 0; i < sorts.length; i++) {
2859
+ const after = normalizeSearchAfterValue(sorts[i], searchAfter[i]);
2860
+ const cmp = compareComparableValues(sortInternal[i] ?? null, after);
2861
+ if (cmp === 0) continue;
2862
+ return sorts[i].direction === "asc" ? cmp : -cmp;
2863
+ }
2864
+ return 0;
2865
+ }
2866
+
2867
+ function compareEncodedValues(left: Uint8Array, right: Uint8Array): number {
2868
+ const length = Math.min(left.byteLength, right.byteLength);
2869
+ for (let i = 0; i < length; i++) {
2870
+ if (left[i] === right[i]) continue;
2871
+ return left[i]! < right[i]! ? -1 : 1;
2872
+ }
2873
+ if (left.byteLength === right.byteLength) return 0;
2874
+ return left.byteLength < right.byteLength ? -1 : 1;
2875
+ }
2876
+
2877
+ function encodeSearchCursorValue(sort: Extract<SearchSortSpec, { kind: "field" }>, value: bigint | number | string | boolean | null): Uint8Array | null {
2878
+ if (value == null) return null;
2879
+ if (sort.config.kind === "integer" || sort.config.kind === "date") {
2880
+ return typeof value === "bigint" ? encodeSortableInt64(value) : null;
2881
+ }
2882
+ if (sort.config.kind === "float") {
2883
+ return typeof value === "number" ? encodeSortableFloat64(value) : null;
2884
+ }
2885
+ if (sort.config.kind === "bool") {
2886
+ return typeof value === "boolean" ? encodeSortableBool(value) : null;
2887
+ }
2888
+ return null;
2889
+ }
2890
+
2891
+ function resolveSearchCursorFieldBound(request: SearchRequest): SearchCursorFieldBound | null {
2892
+ if (!request.searchAfter || request.searchAfter.length === 0) return null;
2893
+ const leadingSort = request.sort[0];
2894
+ if (!leadingSort || leadingSort.kind !== "field") return null;
2895
+ if (
2896
+ leadingSort.config.kind !== "integer" &&
2897
+ leadingSort.config.kind !== "float" &&
2898
+ leadingSort.config.kind !== "date" &&
2899
+ leadingSort.config.kind !== "bool"
2900
+ ) {
2901
+ return null;
2902
+ }
2903
+ const after = normalizeSearchAfterValue(leadingSort, request.searchAfter[0]);
2904
+ return {
2905
+ kind: "field",
2906
+ sort: leadingSort,
2907
+ after,
2908
+ encoded: encodeSearchCursorValue(leadingSort, after),
2909
+ };
2910
+ }
2911
+
2912
+ function normalizeSearchAfterValue(sort: SearchSortSpec, raw: unknown): bigint | number | string | boolean | null {
2913
+ if (raw == null) return null;
2914
+ if (sort.kind === "offset") {
2915
+ if (typeof raw !== "string") return null;
2916
+ const parsed = parseOffsetResult(raw);
2917
+ if (Result.isError(parsed)) return null;
2918
+ return offsetToSeqOrNeg1(parsed.value);
2919
+ }
2920
+ if (sort.kind === "score") {
2921
+ return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
2922
+ }
2923
+ if (sort.config.kind === "integer" || sort.config.kind === "date") {
2924
+ if (typeof raw === "number" && Number.isFinite(raw)) return BigInt(Math.trunc(raw));
2925
+ if (typeof raw === "string" && raw.trim() !== "") {
2926
+ try {
2927
+ return BigInt(raw.trim());
2928
+ } catch {
2929
+ return null;
2930
+ }
2931
+ }
2932
+ return null;
2933
+ }
2934
+ if (sort.config.kind === "float") return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
2935
+ if (sort.config.kind === "bool") return typeof raw === "boolean" ? raw : null;
2936
+ return typeof raw === "string" ? raw : null;
2937
+ }
2938
+
2939
+ function compareSearchAfter(hit: SearchHitInternal, sorts: SearchSortSpec[], searchAfter: unknown[]): number {
2940
+ return compareSearchAfterValues(hit.sortInternal, sorts, searchAfter);
2941
+ }
2942
+
2943
+ function resolvePrimaryTimestampTopKSort(registry: SchemaRegistry, request: SearchRequest): PrimaryTimestampTopKSort | null {
2944
+ const leadingSort = request.sort[0];
2945
+ if (!leadingSort || leadingSort.kind !== "field") return null;
2946
+ if (registry.search?.primaryTimestampField !== leadingSort.field) return null;
2947
+ if (leadingSort.config.kind !== "date") return null;
2948
+ return leadingSort;
2949
+ }
2950
+
2951
+ function worstSearchHitIndex(hits: SearchHitInternal[], sorts: SearchSortSpec[]): number {
2952
+ let worstIndex = 0;
2953
+ for (let index = 1; index < hits.length; index++) {
2954
+ if (compareSearchHits(hits[index]!, hits[worstIndex]!, sorts) > 0) worstIndex = index;
2955
+ }
2956
+ return worstIndex;
2957
+ }
2958
+
2959
+ function orderSegmentsByPrimaryTimestampBounds(
2960
+ segments: SegmentRow[],
2961
+ rowsBySegment: Map<number, SearchSegmentCompanionRow>,
2962
+ direction: "asc" | "desc"
2963
+ ): SegmentRow[] {
2964
+ const unknown: SegmentRow[] = [];
2965
+ const known: SegmentRow[] = [];
2966
+ for (const seg of segments) {
2967
+ const row = rowsBySegment.get(seg.segment_index);
2968
+ if (row?.primary_timestamp_min_ms == null || row.primary_timestamp_max_ms == null) unknown.push(seg);
2969
+ else known.push(seg);
2970
+ }
2971
+ known.sort((left, right) => {
2972
+ const leftRow = rowsBySegment.get(left.segment_index)!;
2973
+ const rightRow = rowsBySegment.get(right.segment_index)!;
2974
+ if (direction === "desc") {
2975
+ if (leftRow.primary_timestamp_max_ms !== rightRow.primary_timestamp_max_ms) {
2976
+ return leftRow.primary_timestamp_max_ms! > rightRow.primary_timestamp_max_ms! ? -1 : 1;
2977
+ }
2978
+ return right.segment_index - left.segment_index;
2979
+ }
2980
+ if (leftRow.primary_timestamp_min_ms !== rightRow.primary_timestamp_min_ms) {
2981
+ return leftRow.primary_timestamp_min_ms! < rightRow.primary_timestamp_min_ms! ? -1 : 1;
2982
+ }
2983
+ return left.segment_index - right.segment_index;
2984
+ });
2985
+ return [...unknown, ...known];
2986
+ }