@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
package/src/reader.ts
CHANGED
|
@@ -1,18 +1,66 @@
|
|
|
1
|
-
import { existsSync, openSync, readSync, closeSync } from "node:fs";
|
|
2
1
|
import type { Config } from "./config";
|
|
3
2
|
import type { SqliteDurableStore, SegmentRow } from "./db/db";
|
|
4
3
|
import type { ObjectStore } from "./objectstore/interface";
|
|
4
|
+
import {
|
|
5
|
+
type CompiledReadFilter,
|
|
6
|
+
type ReadFilterColumnClause,
|
|
7
|
+
collectPositiveColumnFilterClauses,
|
|
8
|
+
collectPositiveExactFilterClauses,
|
|
9
|
+
evaluateReadFilterResult,
|
|
10
|
+
} from "./read_filter";
|
|
11
|
+
import { decodeJsonPayloadResult } from "./schema/read_json";
|
|
12
|
+
import { SchemaRegistryStore } from "./schema/registry";
|
|
5
13
|
import { parseOffsetResult, offsetToSeqOrNeg1, encodeOffset } from "./offset";
|
|
6
|
-
import {
|
|
14
|
+
import {
|
|
15
|
+
type BlockIndexEntry,
|
|
16
|
+
decodeBlockResult,
|
|
17
|
+
iterateBlocksResult,
|
|
18
|
+
parseBlockHeaderResult,
|
|
19
|
+
parseFooter,
|
|
20
|
+
parseFooterBytes,
|
|
21
|
+
DSB3_HEADER_BYTES,
|
|
22
|
+
} from "./segment/format";
|
|
7
23
|
import { SegmentDiskCache, type SegmentCacheStats } from "./segment/cache";
|
|
24
|
+
import { loadSegmentBytesCached, loadSegmentSource, readRangeFromSource, type SegmentReadSource } from "./segment/cached_segment";
|
|
8
25
|
import { Bloom256 } from "./util/bloom256";
|
|
9
|
-
import { segmentObjectKey, streamHash16Hex } from "./util/stream_paths";
|
|
10
26
|
import { readU32BE } from "./util/endian";
|
|
11
|
-
import {
|
|
12
|
-
import {
|
|
13
|
-
import type { IndexManager } from "./index/indexer";
|
|
27
|
+
import { type RetryOptions } from "./util/retry";
|
|
28
|
+
import type { IndexCandidate, StreamIndexLookup } from "./index/indexer";
|
|
14
29
|
import { dsError } from "./util/ds_error.ts";
|
|
15
30
|
import { Result } from "better-result";
|
|
31
|
+
import { filterDocIdsByColumnResult } from "./search/col_runtime";
|
|
32
|
+
import {
|
|
33
|
+
type AggregateRequest,
|
|
34
|
+
cloneAggMeasureState,
|
|
35
|
+
extractRollupContributionResult,
|
|
36
|
+
extractRollupEligibility,
|
|
37
|
+
formatAggMeasureState,
|
|
38
|
+
mergeAggMeasureState,
|
|
39
|
+
} from "./search/aggregate";
|
|
40
|
+
import {
|
|
41
|
+
type CompiledSearchQuery,
|
|
42
|
+
type SearchColumnClause,
|
|
43
|
+
type SearchEvaluation,
|
|
44
|
+
type SearchExactClause,
|
|
45
|
+
type SearchFtsClause,
|
|
46
|
+
type SearchRequest,
|
|
47
|
+
type SearchSortSpec,
|
|
48
|
+
collectPositiveSearchColumnClauses,
|
|
49
|
+
collectPositiveSearchExactClauses,
|
|
50
|
+
collectPositiveSearchFtsClauses,
|
|
51
|
+
evaluateSearchQueryResult,
|
|
52
|
+
extractSearchHitFieldsResult,
|
|
53
|
+
} from "./search/query";
|
|
54
|
+
import { filterDocIdsByFtsClausesResult } from "./search/fts_runtime";
|
|
55
|
+
import { canonicalizeColumnValue, canonicalizeExactValue } from "./search/schema";
|
|
56
|
+
import { encodeSortableBool, encodeSortableFloat64, encodeSortableInt64 } from "./search/column_encoding";
|
|
57
|
+
import type { SearchRollupConfig } from "./schema/registry";
|
|
58
|
+
import type { AggMeasureState } from "./search/agg_format";
|
|
59
|
+
import type { MetricsBlockSectionView } from "./profiles/metrics/block_format";
|
|
60
|
+
import { materializeMetricsBlockRecord } from "./profiles/metrics/normalize";
|
|
61
|
+
import { buildDesiredSearchCompanionPlan, hashSearchCompanionPlan } from "./search/companion_plan";
|
|
62
|
+
import { RuntimeMemorySampler } from "./runtime_memory_sampler";
|
|
63
|
+
import type { MemoryPressureMonitor } from "./memory";
|
|
16
64
|
|
|
17
65
|
export type ReadFormat = "raw" | "json";
|
|
18
66
|
|
|
@@ -26,6 +74,87 @@ export type ReadBatch = {
|
|
|
26
74
|
endOffsetSeq: bigint;
|
|
27
75
|
nextOffsetSeq: bigint;
|
|
28
76
|
records: Array<{ offset: bigint; payload: Uint8Array }>; // payload bytes in wire order
|
|
77
|
+
filterScannedBytes?: number;
|
|
78
|
+
filterScanLimitBytes?: number;
|
|
79
|
+
filterScanLimitReached?: boolean;
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
export type SearchHit = {
|
|
83
|
+
offset: string;
|
|
84
|
+
score: number;
|
|
85
|
+
sort: unknown[];
|
|
86
|
+
fields: Record<string, unknown>;
|
|
87
|
+
source: unknown;
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
export type SearchResultBatch = {
|
|
91
|
+
stream: string;
|
|
92
|
+
snapshotEndOffset: string;
|
|
93
|
+
tookMs: number;
|
|
94
|
+
timedOut: boolean;
|
|
95
|
+
timeoutMs: number | null;
|
|
96
|
+
coverage: {
|
|
97
|
+
mode: "complete" | "published";
|
|
98
|
+
complete: boolean;
|
|
99
|
+
streamHeadOffset: string;
|
|
100
|
+
visibleThroughOffset: string;
|
|
101
|
+
visibleThroughPrimaryTimestampMax: string | null;
|
|
102
|
+
oldestOmittedAppendAt: string | null;
|
|
103
|
+
possibleMissingEventsUpperBound: number;
|
|
104
|
+
possibleMissingUploadedSegments: number;
|
|
105
|
+
possibleMissingSealedRows: number;
|
|
106
|
+
possibleMissingWalRows: number;
|
|
107
|
+
indexedSegments: number;
|
|
108
|
+
indexedSegmentTimeMs: number;
|
|
109
|
+
ftsSectionGetMs: number;
|
|
110
|
+
ftsDecodeMs: number;
|
|
111
|
+
ftsClauseEstimateMs: number;
|
|
112
|
+
scannedSegments: number;
|
|
113
|
+
scannedSegmentTimeMs: number;
|
|
114
|
+
scannedTailDocs: number;
|
|
115
|
+
scannedTailTimeMs: number;
|
|
116
|
+
exactCandidateTimeMs: number;
|
|
117
|
+
indexFamiliesUsed: string[];
|
|
118
|
+
};
|
|
119
|
+
total: {
|
|
120
|
+
value: number;
|
|
121
|
+
relation: "eq" | "gte";
|
|
122
|
+
};
|
|
123
|
+
hits: SearchHit[];
|
|
124
|
+
nextSearchAfter: unknown[] | null;
|
|
125
|
+
};
|
|
126
|
+
|
|
127
|
+
export type AggregateResultBatch = {
|
|
128
|
+
stream: string;
|
|
129
|
+
rollup: string;
|
|
130
|
+
from: string;
|
|
131
|
+
to: string;
|
|
132
|
+
interval: string;
|
|
133
|
+
coverage: {
|
|
134
|
+
mode: "complete" | "published";
|
|
135
|
+
complete: boolean;
|
|
136
|
+
streamHeadOffset: string;
|
|
137
|
+
visibleThroughOffset: string;
|
|
138
|
+
visibleThroughPrimaryTimestampMax: string | null;
|
|
139
|
+
oldestOmittedAppendAt: string | null;
|
|
140
|
+
possibleMissingEventsUpperBound: number;
|
|
141
|
+
possibleMissingUploadedSegments: number;
|
|
142
|
+
possibleMissingSealedRows: number;
|
|
143
|
+
possibleMissingWalRows: number;
|
|
144
|
+
usedRollups: boolean;
|
|
145
|
+
indexedSegments: number;
|
|
146
|
+
scannedSegments: number;
|
|
147
|
+
scannedTailDocs: number;
|
|
148
|
+
indexFamiliesUsed: string[];
|
|
149
|
+
};
|
|
150
|
+
buckets: Array<{
|
|
151
|
+
start: string;
|
|
152
|
+
end: string;
|
|
153
|
+
groups: Array<{
|
|
154
|
+
key: Record<string, string | null>;
|
|
155
|
+
measures: Record<string, unknown>;
|
|
156
|
+
}>;
|
|
157
|
+
}>;
|
|
29
158
|
};
|
|
30
159
|
|
|
31
160
|
export type ReaderError =
|
|
@@ -34,7 +163,50 @@ export type ReaderError =
|
|
|
34
163
|
| { kind: "invalid_offset"; message: string }
|
|
35
164
|
| { kind: "internal"; message: string };
|
|
36
165
|
|
|
37
|
-
|
|
166
|
+
const READ_FILTER_SCAN_LIMIT_BYTES = 100 * 1024 * 1024;
|
|
167
|
+
type SegmentCandidateInfo = { segments: Set<number> | null; indexedThrough: number };
|
|
168
|
+
type SearchFamilyCandidateInfo = { docIds: Set<number> | null; usedFamilies: Set<string> };
|
|
169
|
+
type SearchHitInternal = {
|
|
170
|
+
offsetSeq: bigint;
|
|
171
|
+
offset: string;
|
|
172
|
+
score: number;
|
|
173
|
+
sortInternal: Array<bigint | number | string | boolean | null>;
|
|
174
|
+
sortResponse: unknown[];
|
|
175
|
+
fields: Record<string, unknown>;
|
|
176
|
+
source: unknown;
|
|
177
|
+
};
|
|
178
|
+
type AggregateGroupInternal = {
|
|
179
|
+
key: Record<string, string | null>;
|
|
180
|
+
measures: Record<string, AggMeasureState>;
|
|
181
|
+
};
|
|
182
|
+
type SearchCursorFieldBound = {
|
|
183
|
+
kind: "field";
|
|
184
|
+
sort: Extract<SearchSortSpec, { kind: "field" }>;
|
|
185
|
+
after: bigint | number | string | boolean | null;
|
|
186
|
+
encoded: Uint8Array | null;
|
|
187
|
+
};
|
|
188
|
+
type PublishedCoverageState = {
|
|
189
|
+
mode: "complete" | "published";
|
|
190
|
+
complete: boolean;
|
|
191
|
+
canSearchWalTail: boolean;
|
|
192
|
+
publishedSegmentCount: number;
|
|
193
|
+
visiblePublishedSegmentCount: number;
|
|
194
|
+
streamHeadOffset: string;
|
|
195
|
+
visibleThroughSeq: bigint;
|
|
196
|
+
visibleThroughOffset: string;
|
|
197
|
+
visibleThroughPrimaryTimestampMax: string | null;
|
|
198
|
+
oldestOmittedAppendAt: string | null;
|
|
199
|
+
possibleMissingEventsUpperBound: number;
|
|
200
|
+
possibleMissingUploadedSegments: number;
|
|
201
|
+
possibleMissingSealedRows: number;
|
|
202
|
+
possibleMissingWalRows: number;
|
|
203
|
+
};
|
|
204
|
+
|
|
205
|
+
type PlannedReadSegments = {
|
|
206
|
+
segments: SegmentRow[];
|
|
207
|
+
sealedEndSeq: bigint;
|
|
208
|
+
};
|
|
209
|
+
type PlannedReadOrder = "asc" | "desc";
|
|
38
210
|
|
|
39
211
|
function errorMessage(e: unknown): string {
|
|
40
212
|
return String((e as any)?.message ?? e);
|
|
@@ -44,109 +216,158 @@ function utf8Bytes(s: string): Uint8Array {
|
|
|
44
216
|
return new TextEncoder().encode(s);
|
|
45
217
|
}
|
|
46
218
|
|
|
47
|
-
function
|
|
48
|
-
const streamHash = streamHash16Hex(seg.stream);
|
|
49
|
-
return segmentObjectKey(streamHash, seg.segment_index);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
function readRangeFromFile(path: string, start: number, end: number): Uint8Array {
|
|
53
|
-
const len = end - start + 1;
|
|
54
|
-
const fd = openSync(path, "r");
|
|
219
|
+
function parseCompanionSections(value: string): Set<string> {
|
|
55
220
|
try {
|
|
56
|
-
const
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
return new
|
|
60
|
-
} finally {
|
|
61
|
-
closeSync(fd);
|
|
221
|
+
const parsed = JSON.parse(value);
|
|
222
|
+
return new Set(Array.isArray(parsed) ? parsed.filter((entry) => typeof entry === "string") : []);
|
|
223
|
+
} catch {
|
|
224
|
+
return new Set();
|
|
62
225
|
}
|
|
63
226
|
}
|
|
64
227
|
|
|
65
|
-
async function
|
|
228
|
+
async function loadSegmentBytes(
|
|
66
229
|
os: ObjectStore,
|
|
67
230
|
seg: SegmentRow,
|
|
68
|
-
start: number,
|
|
69
|
-
end: number,
|
|
70
231
|
diskCache?: SegmentDiskCache,
|
|
71
232
|
retryOpts?: RetryOptions
|
|
72
233
|
): Promise<Uint8Array> {
|
|
73
|
-
|
|
74
|
-
if (existsSync(local)) return readRangeFromFile(local, start, end);
|
|
75
|
-
|
|
76
|
-
const objectKey = objectKeyForSegment(seg);
|
|
77
|
-
if (diskCache && diskCache.has(objectKey)) {
|
|
78
|
-
diskCache.recordHit();
|
|
79
|
-
diskCache.touch(objectKey);
|
|
80
|
-
return readRangeFromFile(diskCache.getPath(objectKey), start, end);
|
|
81
|
-
}
|
|
82
|
-
if (diskCache) diskCache.recordMiss();
|
|
83
|
-
|
|
84
|
-
const bytes = await retry(
|
|
85
|
-
async () => {
|
|
86
|
-
const res = await os.get(objectKey, { range: { start, end } });
|
|
87
|
-
if (!res) throw dsError(`object store missing segment: ${objectKey}`);
|
|
88
|
-
return res;
|
|
89
|
-
},
|
|
90
|
-
retryOpts ?? { retries: 0, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 0 }
|
|
91
|
-
);
|
|
92
|
-
if (diskCache && start === 0 && end === seg.size_bytes - 1) {
|
|
93
|
-
diskCache.put(objectKey, bytes);
|
|
94
|
-
}
|
|
95
|
-
return bytes;
|
|
234
|
+
return loadSegmentBytesCached(os, seg, diskCache, retryOpts);
|
|
96
235
|
}
|
|
97
236
|
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
seg
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
237
|
+
function loadSegmentDataLimitFromSource(seg: SegmentRow, source: SegmentReadSource): number {
|
|
238
|
+
if (seg.size_bytes < 8) return seg.size_bytes;
|
|
239
|
+
const tail = readRangeFromSource(source, seg.size_bytes - 8, seg.size_bytes - 1);
|
|
240
|
+
const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
|
|
241
|
+
if (magic !== "DSF1") return seg.size_bytes;
|
|
242
|
+
const footerLen = readU32BE(tail, 0);
|
|
243
|
+
const footerStart = seg.size_bytes - 8 - footerLen;
|
|
244
|
+
return footerStart >= 0 ? footerStart : seg.size_bytes;
|
|
105
245
|
}
|
|
106
246
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
247
|
+
function findFirstRelevantBlockIndex(blocks: BlockIndexEntry[], seq: bigint): number {
|
|
248
|
+
if (blocks.length <= 1) return 0;
|
|
249
|
+
let lo = 0;
|
|
250
|
+
let hi = blocks.length - 1;
|
|
251
|
+
let best = 0;
|
|
252
|
+
while (lo <= hi) {
|
|
253
|
+
const mid = (lo + hi) >>> 1;
|
|
254
|
+
if (blocks[mid]!.firstOffset <= seq) {
|
|
255
|
+
best = mid;
|
|
256
|
+
lo = mid + 1;
|
|
257
|
+
} else {
|
|
258
|
+
hi = mid - 1;
|
|
259
|
+
}
|
|
118
260
|
}
|
|
261
|
+
return best;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
function loadSegmentFooterBlocksFromSource(seg: SegmentRow, source: SegmentReadSource): BlockIndexEntry[] | null {
|
|
119
265
|
if (seg.size_bytes < 8) return null;
|
|
120
|
-
const tail =
|
|
266
|
+
const tail = readRangeFromSource(source, seg.size_bytes - 8, seg.size_bytes - 1);
|
|
121
267
|
const magic = String.fromCharCode(tail[4], tail[5], tail[6], tail[7]);
|
|
122
268
|
if (magic !== "DSF1") return null;
|
|
123
269
|
const footerLen = readU32BE(tail, 0);
|
|
124
270
|
const footerStart = seg.size_bytes - 8 - footerLen;
|
|
125
271
|
if (footerStart < 0) return null;
|
|
126
|
-
const footerBytes =
|
|
272
|
+
const footerBytes = readRangeFromSource(source, footerStart, footerStart + footerLen - 1);
|
|
127
273
|
const footer = parseFooterBytes(footerBytes);
|
|
128
|
-
|
|
129
|
-
if (footerCache) footerCache.set(cacheKey, result);
|
|
130
|
-
return result;
|
|
274
|
+
return footer?.blocks ?? null;
|
|
131
275
|
}
|
|
132
276
|
|
|
133
277
|
export class StreamReader {
|
|
134
278
|
private readonly config: Config;
|
|
135
279
|
private readonly db: SqliteDurableStore;
|
|
136
280
|
private readonly os: ObjectStore;
|
|
281
|
+
private readonly registry: SchemaRegistryStore;
|
|
137
282
|
private readonly diskCache?: SegmentDiskCache;
|
|
138
|
-
private readonly
|
|
139
|
-
private readonly
|
|
283
|
+
private readonly index?: StreamIndexLookup;
|
|
284
|
+
private readonly memorySampler?: RuntimeMemorySampler;
|
|
285
|
+
private readonly memory?: MemoryPressureMonitor;
|
|
140
286
|
|
|
141
|
-
constructor(
|
|
287
|
+
constructor(
|
|
288
|
+
config: Config,
|
|
289
|
+
db: SqliteDurableStore,
|
|
290
|
+
os: ObjectStore,
|
|
291
|
+
registry: SchemaRegistryStore,
|
|
292
|
+
diskCache?: SegmentDiskCache,
|
|
293
|
+
index?: StreamIndexLookup,
|
|
294
|
+
memorySampler?: RuntimeMemorySampler,
|
|
295
|
+
memory?: MemoryPressureMonitor
|
|
296
|
+
) {
|
|
142
297
|
this.config = config;
|
|
143
298
|
this.db = db;
|
|
144
299
|
this.os = os;
|
|
300
|
+
this.registry = registry;
|
|
145
301
|
this.diskCache = diskCache;
|
|
146
302
|
this.index = index;
|
|
147
|
-
|
|
148
|
-
|
|
303
|
+
this.memorySampler = memorySampler;
|
|
304
|
+
this.memory = memory;
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
private planSealedReadSegments(
|
|
308
|
+
stream: string,
|
|
309
|
+
startSeq: bigint,
|
|
310
|
+
sealedEndSeq: bigint,
|
|
311
|
+
candidateSegments: Set<number> | null,
|
|
312
|
+
indexedThrough: number,
|
|
313
|
+
order: PlannedReadOrder = "asc"
|
|
314
|
+
): PlannedReadSegments | null {
|
|
315
|
+
if (startSeq > sealedEndSeq) return { segments: [], sealedEndSeq };
|
|
316
|
+
if (candidateSegments == null) return null;
|
|
317
|
+
|
|
318
|
+
const startSeg = this.db.findSegmentForOffset(stream, startSeq);
|
|
319
|
+
const endSeg = this.db.findSegmentForOffset(stream, sealedEndSeq);
|
|
320
|
+
if (!startSeg || !endSeg) return null;
|
|
321
|
+
|
|
322
|
+
const startIndex = startSeg.segment_index;
|
|
323
|
+
const endIndex = endSeg.segment_index;
|
|
324
|
+
const plannedIndexes: number[] = [];
|
|
325
|
+
const seenIndexes = new Set<number>();
|
|
326
|
+
const indexedPrefixEnd = Math.min(endIndex, indexedThrough - 1);
|
|
327
|
+
|
|
328
|
+
if (order === "asc") {
|
|
329
|
+
if (startIndex <= indexedPrefixEnd) {
|
|
330
|
+
const sortedCandidateIndexes = Array.from(candidateSegments)
|
|
331
|
+
.filter((segmentIndex) => segmentIndex >= startIndex && segmentIndex <= indexedPrefixEnd)
|
|
332
|
+
.sort((a, b) => a - b);
|
|
333
|
+
for (const segmentIndex of sortedCandidateIndexes) {
|
|
334
|
+
if (seenIndexes.has(segmentIndex)) continue;
|
|
335
|
+
plannedIndexes.push(segmentIndex);
|
|
336
|
+
seenIndexes.add(segmentIndex);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
const tailStartIndex = Math.max(startIndex, indexedThrough);
|
|
341
|
+
for (let segmentIndex = tailStartIndex; segmentIndex <= endIndex; segmentIndex++) {
|
|
342
|
+
if (seenIndexes.has(segmentIndex)) continue;
|
|
343
|
+
plannedIndexes.push(segmentIndex);
|
|
344
|
+
seenIndexes.add(segmentIndex);
|
|
345
|
+
}
|
|
346
|
+
} else {
|
|
347
|
+
for (let segmentIndex = endIndex; segmentIndex >= Math.max(startIndex, indexedThrough); segmentIndex--) {
|
|
348
|
+
if (seenIndexes.has(segmentIndex)) continue;
|
|
349
|
+
plannedIndexes.push(segmentIndex);
|
|
350
|
+
seenIndexes.add(segmentIndex);
|
|
351
|
+
}
|
|
352
|
+
if (startIndex <= indexedPrefixEnd) {
|
|
353
|
+
const sortedCandidateIndexes = Array.from(candidateSegments)
|
|
354
|
+
.filter((segmentIndex) => segmentIndex >= startIndex && segmentIndex <= indexedPrefixEnd)
|
|
355
|
+
.sort((a, b) => b - a);
|
|
356
|
+
for (const segmentIndex of sortedCandidateIndexes) {
|
|
357
|
+
if (seenIndexes.has(segmentIndex)) continue;
|
|
358
|
+
plannedIndexes.push(segmentIndex);
|
|
359
|
+
seenIndexes.add(segmentIndex);
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
const plannedSegments: SegmentRow[] = [];
|
|
365
|
+
for (const segmentIndex of plannedIndexes) {
|
|
366
|
+
const seg = this.db.getSegmentByIndex(stream, segmentIndex);
|
|
367
|
+
if (!seg) return null;
|
|
368
|
+
plannedSegments.push(seg);
|
|
149
369
|
}
|
|
370
|
+
return { segments: plannedSegments, sealedEndSeq };
|
|
150
371
|
}
|
|
151
372
|
|
|
152
373
|
cacheStats(): SegmentCacheStats | null {
|
|
@@ -162,6 +383,149 @@ export class StreamReader {
|
|
|
162
383
|
};
|
|
163
384
|
}
|
|
164
385
|
|
|
386
|
+
private isoTimestampFromMs(value: bigint | null): string | null {
|
|
387
|
+
if (value == null) return null;
|
|
388
|
+
const ms = Number(value);
|
|
389
|
+
if (!Number.isFinite(ms) || ms < 0) return null;
|
|
390
|
+
return new Date(ms).toISOString();
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
private shouldSearchWalTail(
|
|
394
|
+
srow: { pending_rows: bigint; pending_bytes: bigint; last_append_ms: bigint; segment_in_progress: number },
|
|
395
|
+
hasOutstandingPublishedSegments: boolean,
|
|
396
|
+
hasOutstandingCompanions: boolean
|
|
397
|
+
): boolean {
|
|
398
|
+
if (srow.pending_rows <= 0n) return false;
|
|
399
|
+
if (hasOutstandingPublishedSegments || hasOutstandingCompanions) return false;
|
|
400
|
+
if (srow.segment_in_progress !== 0) return false;
|
|
401
|
+
const quietPeriodMs = Math.max(0, this.config.searchWalOverlayQuietPeriodMs);
|
|
402
|
+
const quietForMs = Number(this.db.nowMs() - srow.last_append_ms);
|
|
403
|
+
if (!Number.isFinite(quietForMs) || quietForMs < quietPeriodMs) return false;
|
|
404
|
+
if (srow.pending_bytes > BigInt(this.config.searchWalOverlayMaxBytes)) return false;
|
|
405
|
+
if (srow.pending_rows > BigInt(this.config.segmentTargetRows)) return false;
|
|
406
|
+
return true;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
private computeOldestOmittedAppendAt(
|
|
410
|
+
stream: string,
|
|
411
|
+
srow: { uploaded_through: bigint; sealed_through: bigint; pending_rows: bigint },
|
|
412
|
+
visiblePublishedSegmentCount: number,
|
|
413
|
+
publishedSegmentCount: number,
|
|
414
|
+
shouldSearchWalTail: boolean
|
|
415
|
+
): string | null {
|
|
416
|
+
if (visiblePublishedSegmentCount < publishedSegmentCount) {
|
|
417
|
+
const firstOmittedSegment = this.db.getSegmentByIndex(stream, visiblePublishedSegmentCount);
|
|
418
|
+
return this.isoTimestampFromMs(firstOmittedSegment?.last_append_ms ?? null);
|
|
419
|
+
}
|
|
420
|
+
if (srow.sealed_through > srow.uploaded_through) {
|
|
421
|
+
const firstSealedOmitted = this.db.findSegmentForOffset(stream, srow.uploaded_through + 1n);
|
|
422
|
+
return this.isoTimestampFromMs(firstSealedOmitted?.last_append_ms ?? null);
|
|
423
|
+
}
|
|
424
|
+
if (srow.pending_rows > 0n && !shouldSearchWalTail) {
|
|
425
|
+
return this.isoTimestampFromMs(this.db.getWalOldestTimestampMs(stream));
|
|
426
|
+
}
|
|
427
|
+
return null;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
private computePublishedCoverageState(
|
|
431
|
+
stream: string,
|
|
432
|
+
srow: {
|
|
433
|
+
epoch: number;
|
|
434
|
+
next_offset: bigint;
|
|
435
|
+
sealed_through: bigint;
|
|
436
|
+
uploaded_through: bigint;
|
|
437
|
+
pending_rows: bigint;
|
|
438
|
+
pending_bytes: bigint;
|
|
439
|
+
last_append_ms: bigint;
|
|
440
|
+
segment_in_progress: number;
|
|
441
|
+
},
|
|
442
|
+
registry: { search?: { fields: Record<string, unknown> } }
|
|
443
|
+
): PublishedCoverageState {
|
|
444
|
+
const totalSegmentCount = this.db.countSegmentsForStream(stream);
|
|
445
|
+
const publishedSegmentCount =
|
|
446
|
+
srow.uploaded_through >= 0n
|
|
447
|
+
? ((this.db.findSegmentForOffset(stream, srow.uploaded_through)?.segment_index ?? -1) + 1)
|
|
448
|
+
: 0;
|
|
449
|
+
|
|
450
|
+
const desiredPlan = buildDesiredSearchCompanionPlan(registry as any);
|
|
451
|
+
const planHasFamilies = Object.values(desiredPlan.families).some(Boolean);
|
|
452
|
+
let visiblePublishedSegmentCount = publishedSegmentCount;
|
|
453
|
+
let visibleThroughPrimaryTimestampMax: string | null = null;
|
|
454
|
+
if (planHasFamilies) {
|
|
455
|
+
const desiredHash = hashSearchCompanionPlan(desiredPlan);
|
|
456
|
+
const companionPlanRow = this.db.getSearchCompanionPlan(stream);
|
|
457
|
+
const desiredGeneration =
|
|
458
|
+
companionPlanRow == null
|
|
459
|
+
? 1
|
|
460
|
+
: companionPlanRow.plan_hash === desiredHash
|
|
461
|
+
? companionPlanRow.generation
|
|
462
|
+
: companionPlanRow.generation + 1;
|
|
463
|
+
const currentCompanions = this.db
|
|
464
|
+
.listSearchSegmentCompanions(stream)
|
|
465
|
+
.filter((row) => row.plan_generation === desiredGeneration);
|
|
466
|
+
const currentSegments = new Set<number>();
|
|
467
|
+
for (const row of currentCompanions) {
|
|
468
|
+
const sections = parseCompanionSections(row.sections_json);
|
|
469
|
+
const hasEnabledFamily = Object.entries(desiredPlan.families).some(([family, enabled]) => enabled && sections.has(family));
|
|
470
|
+
if (hasEnabledFamily) currentSegments.add(row.segment_index);
|
|
471
|
+
}
|
|
472
|
+
visiblePublishedSegmentCount = 0;
|
|
473
|
+
while (visiblePublishedSegmentCount < publishedSegmentCount && currentSegments.has(visiblePublishedSegmentCount)) {
|
|
474
|
+
visiblePublishedSegmentCount += 1;
|
|
475
|
+
}
|
|
476
|
+
if (visiblePublishedSegmentCount > 0) {
|
|
477
|
+
const visibleCompanionRow = currentCompanions.find((row) => row.segment_index === visiblePublishedSegmentCount - 1) ?? null;
|
|
478
|
+
visibleThroughPrimaryTimestampMax = this.isoTimestampFromMs(visibleCompanionRow?.primary_timestamp_max_ms ?? null);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const hasOutstandingPublishedSegments = publishedSegmentCount < totalSegmentCount;
|
|
483
|
+
const hasOutstandingCompanions = planHasFamilies && visiblePublishedSegmentCount < publishedSegmentCount;
|
|
484
|
+
const canSearchWalTail = this.shouldSearchWalTail(srow, hasOutstandingPublishedSegments, hasOutstandingCompanions);
|
|
485
|
+
const omitWalTail = srow.pending_rows > 0n && !canSearchWalTail;
|
|
486
|
+
|
|
487
|
+
let visibleThroughSeq = srow.next_offset - 1n;
|
|
488
|
+
if (hasOutstandingPublishedSegments || hasOutstandingCompanions || omitWalTail) {
|
|
489
|
+
if (visiblePublishedSegmentCount > 0) {
|
|
490
|
+
visibleThroughSeq = this.db.getSegmentByIndex(stream, visiblePublishedSegmentCount - 1)?.end_offset ?? -1n;
|
|
491
|
+
} else {
|
|
492
|
+
visibleThroughSeq = -1n;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
const possibleMissingUploadedSegments = Math.max(0, publishedSegmentCount - visiblePublishedSegmentCount);
|
|
497
|
+
const hasOmittedPublishedSuffix = hasOutstandingPublishedSegments || hasOutstandingCompanions;
|
|
498
|
+
const possibleMissingUploadedRows = hasOmittedPublishedSuffix && srow.uploaded_through > visibleThroughSeq ? Number(srow.uploaded_through - visibleThroughSeq) : 0;
|
|
499
|
+
const possibleMissingSealedRows = hasOmittedPublishedSuffix && srow.sealed_through > srow.uploaded_through ? Number(srow.sealed_through - srow.uploaded_through) : 0;
|
|
500
|
+
const possibleMissingWalRows = omitWalTail ? Number(srow.pending_rows) : 0;
|
|
501
|
+
const possibleMissingEventsUpperBound = possibleMissingUploadedRows + possibleMissingSealedRows + possibleMissingWalRows;
|
|
502
|
+
const streamHeadOffset = encodeOffset(srow.epoch, srow.next_offset - 1n);
|
|
503
|
+
const oldestOmittedAppendAt = this.computeOldestOmittedAppendAt(
|
|
504
|
+
stream,
|
|
505
|
+
srow,
|
|
506
|
+
visiblePublishedSegmentCount,
|
|
507
|
+
publishedSegmentCount,
|
|
508
|
+
canSearchWalTail
|
|
509
|
+
);
|
|
510
|
+
|
|
511
|
+
return {
|
|
512
|
+
mode: possibleMissingEventsUpperBound === 0 ? "complete" : "published",
|
|
513
|
+
complete: possibleMissingEventsUpperBound === 0,
|
|
514
|
+
canSearchWalTail,
|
|
515
|
+
publishedSegmentCount,
|
|
516
|
+
visiblePublishedSegmentCount,
|
|
517
|
+
streamHeadOffset,
|
|
518
|
+
visibleThroughSeq,
|
|
519
|
+
visibleThroughOffset: encodeOffset(srow.epoch, visibleThroughSeq),
|
|
520
|
+
visibleThroughPrimaryTimestampMax,
|
|
521
|
+
oldestOmittedAppendAt,
|
|
522
|
+
possibleMissingEventsUpperBound,
|
|
523
|
+
possibleMissingUploadedSegments,
|
|
524
|
+
possibleMissingSealedRows,
|
|
525
|
+
possibleMissingWalRows,
|
|
526
|
+
};
|
|
527
|
+
}
|
|
528
|
+
|
|
165
529
|
async seekOffsetByTimestampResult(stream: string, sinceMs: bigint, key: string | null): Promise<Result<string, ReaderError>> {
|
|
166
530
|
const srow = this.db.getStream(stream);
|
|
167
531
|
if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
|
|
@@ -171,10 +535,17 @@ export class StreamReader {
|
|
|
171
535
|
try {
|
|
172
536
|
const sinceNs = sinceMs * 1_000_000n;
|
|
173
537
|
const keyBytes = key ? utf8Bytes(key) : null;
|
|
538
|
+
const candidateInfo = await this.resolveCandidateSegments(stream, keyBytes, null);
|
|
539
|
+
const plannedSealedSegments = this.planSealedReadSegments(
|
|
540
|
+
stream,
|
|
541
|
+
0n,
|
|
542
|
+
srow.sealed_through,
|
|
543
|
+
candidateInfo.segments,
|
|
544
|
+
candidateInfo.indexedThrough,
|
|
545
|
+
"asc"
|
|
546
|
+
);
|
|
174
547
|
|
|
175
|
-
|
|
176
|
-
const segments = this.db.listSegmentsForStream(stream);
|
|
177
|
-
for (const seg of segments) {
|
|
548
|
+
for (const seg of plannedSealedSegments?.segments ?? this.db.listSegmentsForStream(stream)) {
|
|
178
549
|
const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
|
|
179
550
|
let curOffset = seg.start_offset;
|
|
180
551
|
for (const blockRes of iterateBlocksResult(segBytes)) {
|
|
@@ -224,8 +595,14 @@ export class StreamReader {
|
|
|
224
595
|
return res.value;
|
|
225
596
|
}
|
|
226
597
|
|
|
227
|
-
async readResult(args: {
|
|
228
|
-
|
|
598
|
+
async readResult(args: {
|
|
599
|
+
stream: string;
|
|
600
|
+
offset: string;
|
|
601
|
+
key: string | null;
|
|
602
|
+
format: ReadFormat;
|
|
603
|
+
filter?: CompiledReadFilter | null;
|
|
604
|
+
}): Promise<Result<ReadBatch, ReaderError>> {
|
|
605
|
+
const { stream, offset, key, format, filter = null } = args;
|
|
229
606
|
const srow = this.db.getStream(stream);
|
|
230
607
|
if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
|
|
231
608
|
if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
|
|
@@ -246,6 +623,8 @@ export class StreamReader {
|
|
|
246
623
|
|
|
247
624
|
const results: Array<{ offset: bigint; payload: Uint8Array }> = [];
|
|
248
625
|
let bytesOut = 0;
|
|
626
|
+
let filterScannedBytes = 0;
|
|
627
|
+
let filterScanLimitReached = false;
|
|
249
628
|
|
|
250
629
|
// Nothing to read.
|
|
251
630
|
if (desiredOffset > endOffsetNum) {
|
|
@@ -259,16 +638,113 @@ export class StreamReader {
|
|
|
259
638
|
endOffsetSeq: endOffsetNum,
|
|
260
639
|
nextOffsetSeq: startOffsetExclusive,
|
|
261
640
|
records: [],
|
|
641
|
+
...(filter
|
|
642
|
+
? {
|
|
643
|
+
filterScannedBytes,
|
|
644
|
+
filterScanLimitBytes: READ_FILTER_SCAN_LIMIT_BYTES,
|
|
645
|
+
filterScanLimitReached,
|
|
646
|
+
}
|
|
647
|
+
: {}),
|
|
262
648
|
});
|
|
263
649
|
}
|
|
264
650
|
|
|
265
651
|
let seq = desiredOffset;
|
|
266
652
|
const keyBytes = key ? utf8Bytes(key) : null;
|
|
267
|
-
const
|
|
268
|
-
const candidateSegments =
|
|
269
|
-
const indexedThrough =
|
|
653
|
+
const candidateInfo = await this.resolveCandidateSegments(stream, keyBytes, filter);
|
|
654
|
+
const candidateSegments = candidateInfo.segments;
|
|
655
|
+
const indexedThrough = candidateInfo.indexedThrough;
|
|
656
|
+
const columnClauses = filter ? collectPositiveColumnFilterClauses(filter) : [];
|
|
657
|
+
const filterRegistryRes = filter ? this.registry.getRegistryResult(stream) : Result.ok(null);
|
|
658
|
+
if (Result.isError(filterRegistryRes)) return Result.err({ kind: "internal", message: filterRegistryRes.error.message });
|
|
659
|
+
const filterRegistry = filterRegistryRes.value;
|
|
660
|
+
|
|
661
|
+
const evaluateRecordResult = (
|
|
662
|
+
offset: bigint,
|
|
663
|
+
routingKey: Uint8Array | null | undefined,
|
|
664
|
+
payload: Uint8Array
|
|
665
|
+
): Result<{ matched: boolean; stop: boolean }, ReaderError> => {
|
|
666
|
+
if (filter) {
|
|
667
|
+
filterScannedBytes += payload.byteLength;
|
|
668
|
+
}
|
|
669
|
+
if (keyBytes && (!routingKey || !bytesEqual(routingKey, keyBytes))) {
|
|
670
|
+
return Result.ok({
|
|
671
|
+
matched: false,
|
|
672
|
+
stop: !!filter && filterScannedBytes >= READ_FILTER_SCAN_LIMIT_BYTES,
|
|
673
|
+
});
|
|
674
|
+
}
|
|
675
|
+
if (!filter) return Result.ok({ matched: true, stop: false });
|
|
676
|
+
const valueRes = decodeJsonPayloadResult(this.registry, stream, offset, payload);
|
|
677
|
+
if (Result.isError(valueRes)) {
|
|
678
|
+
return Result.err({ kind: "internal", message: valueRes.error.message });
|
|
679
|
+
}
|
|
680
|
+
const matchesRes = evaluateReadFilterResult(filterRegistry!, offset, filter, valueRes.value);
|
|
681
|
+
if (Result.isError(matchesRes)) return Result.err({ kind: "internal", message: matchesRes.error.message });
|
|
682
|
+
return Result.ok({
|
|
683
|
+
matched: matchesRes.value,
|
|
684
|
+
stop: filterScannedBytes >= READ_FILTER_SCAN_LIMIT_BYTES,
|
|
685
|
+
});
|
|
686
|
+
};
|
|
687
|
+
|
|
688
|
+
const scanSegmentBytes = async (
|
|
689
|
+
segBytes: Uint8Array,
|
|
690
|
+
seg: SegmentRow,
|
|
691
|
+
allowedDocIds: Set<number> | null
|
|
692
|
+
): Promise<Result<void, ReaderError>> => {
|
|
693
|
+
const footer = parseFooter(segBytes)?.footer;
|
|
694
|
+
if (footer) {
|
|
695
|
+
for (let blockIndex = findFirstRelevantBlockIndex(footer.blocks, seq); blockIndex < footer.blocks.length; blockIndex++) {
|
|
696
|
+
const block = footer.blocks[blockIndex]!;
|
|
697
|
+
const blockStart = block.firstOffset;
|
|
698
|
+
const blockEnd = blockStart + BigInt(block.recordCount) - 1n;
|
|
699
|
+
if (blockEnd < seq) continue;
|
|
700
|
+
if (blockStart > endOffsetNum) break;
|
|
701
|
+
|
|
702
|
+
if (keyBytes) {
|
|
703
|
+
const headerBytes = segBytes.subarray(block.blockOffset, block.blockOffset + DSB3_HEADER_BYTES);
|
|
704
|
+
const headerRes = parseBlockHeaderResult(headerBytes);
|
|
705
|
+
if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
|
|
706
|
+
const bloom = new Bloom256(headerRes.value.bloom);
|
|
707
|
+
if (!bloom.maybeHas(keyBytes)) continue;
|
|
708
|
+
}
|
|
709
|
+
|
|
710
|
+
const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
|
|
711
|
+
const blockBytes = segBytes.subarray(block.blockOffset, block.blockOffset + totalLen);
|
|
712
|
+
const decodedRes = decodeBlockResult(blockBytes);
|
|
713
|
+
if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
|
|
714
|
+
const decoded = decodedRes.value;
|
|
715
|
+
let curOffset = blockStart;
|
|
716
|
+
for (const r of decoded.records) {
|
|
717
|
+
if (curOffset < seq) {
|
|
718
|
+
curOffset += 1n;
|
|
719
|
+
continue;
|
|
720
|
+
}
|
|
721
|
+
if (curOffset > endOffsetNum) break;
|
|
722
|
+
const localDocId = Number(curOffset - seg.start_offset);
|
|
723
|
+
if (allowedDocIds && !allowedDocIds.has(localDocId)) {
|
|
724
|
+
curOffset += 1n;
|
|
725
|
+
continue;
|
|
726
|
+
}
|
|
727
|
+
const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
|
|
728
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
729
|
+
if (matchRes.value.matched) {
|
|
730
|
+
results.push({ offset: curOffset, payload: r.payload });
|
|
731
|
+
bytesOut += r.payload.byteLength;
|
|
732
|
+
}
|
|
733
|
+
curOffset += 1n;
|
|
734
|
+
if (matchRes.value.stop) {
|
|
735
|
+
filterScanLimitReached = true;
|
|
736
|
+
seq = curOffset;
|
|
737
|
+
return Result.ok(undefined);
|
|
738
|
+
}
|
|
739
|
+
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
|
|
740
|
+
seq = curOffset;
|
|
741
|
+
return Result.ok(undefined);
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
return Result.ok(undefined);
|
|
746
|
+
}
|
|
270
747
|
|
|
271
|
-
const scanSegmentBytes = async (segBytes: Uint8Array, seg: SegmentRow): Promise<Result<void, ReaderError>> => {
|
|
272
748
|
let curOffset = seg.start_offset;
|
|
273
749
|
for (const blockRes of iterateBlocksResult(segBytes)) {
|
|
274
750
|
if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
|
|
@@ -286,13 +762,23 @@ export class StreamReader {
|
|
|
286
762
|
continue;
|
|
287
763
|
}
|
|
288
764
|
if (curOffset > endOffsetNum) break;
|
|
289
|
-
|
|
765
|
+
const localDocId = Number(curOffset - seg.start_offset);
|
|
766
|
+
if (allowedDocIds && !allowedDocIds.has(localDocId)) {
|
|
290
767
|
curOffset += 1n;
|
|
291
768
|
continue;
|
|
292
769
|
}
|
|
293
|
-
|
|
294
|
-
|
|
770
|
+
const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
|
|
771
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
772
|
+
if (matchRes.value.matched) {
|
|
773
|
+
results.push({ offset: curOffset, payload: r.payload });
|
|
774
|
+
bytesOut += r.payload.byteLength;
|
|
775
|
+
}
|
|
295
776
|
curOffset += 1n;
|
|
777
|
+
if (matchRes.value.stop) {
|
|
778
|
+
filterScanLimitReached = true;
|
|
779
|
+
seq = curOffset;
|
|
780
|
+
return Result.ok(undefined);
|
|
781
|
+
}
|
|
296
782
|
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
|
|
297
783
|
seq = curOffset;
|
|
298
784
|
return Result.ok(undefined);
|
|
@@ -302,91 +788,219 @@ export class StreamReader {
|
|
|
302
788
|
return Result.ok(undefined);
|
|
303
789
|
};
|
|
304
790
|
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
791
|
+
const scanSegmentSource = async (
|
|
792
|
+
source: SegmentReadSource,
|
|
793
|
+
seg: SegmentRow,
|
|
794
|
+
allowedDocIds: Set<number> | null
|
|
795
|
+
): Promise<Result<void, ReaderError>> => {
|
|
796
|
+
const footerBlocks = loadSegmentFooterBlocksFromSource(seg, source);
|
|
797
|
+
if (footerBlocks) {
|
|
798
|
+
for (let blockIndex = findFirstRelevantBlockIndex(footerBlocks, seq); blockIndex < footerBlocks.length; blockIndex++) {
|
|
799
|
+
const block = footerBlocks[blockIndex]!;
|
|
800
|
+
const blockStart = block.firstOffset;
|
|
801
|
+
const blockEnd = blockStart + BigInt(block.recordCount) - 1n;
|
|
802
|
+
if (blockEnd < seq) continue;
|
|
803
|
+
if (blockStart > endOffsetNum) break;
|
|
804
|
+
|
|
805
|
+
const headerBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + DSB3_HEADER_BYTES - 1);
|
|
806
|
+
const headerRes = parseBlockHeaderResult(headerBytes);
|
|
807
|
+
if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
|
|
808
|
+
if (keyBytes) {
|
|
809
|
+
const bloom = new Bloom256(headerRes.value.bloom);
|
|
810
|
+
if (!bloom.maybeHas(keyBytes)) continue;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
const totalLen = DSB3_HEADER_BYTES + block.compressedLen;
|
|
814
|
+
const blockBytes = readRangeFromSource(source, block.blockOffset, block.blockOffset + totalLen - 1);
|
|
815
|
+
const decodedRes = decodeBlockResult(blockBytes);
|
|
816
|
+
if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
|
|
817
|
+
const decoded = decodedRes.value;
|
|
818
|
+
let curOffset = blockStart;
|
|
819
|
+
for (const r of decoded.records) {
|
|
820
|
+
if (curOffset < seq) {
|
|
821
|
+
curOffset += 1n;
|
|
822
|
+
continue;
|
|
823
|
+
}
|
|
824
|
+
if (curOffset > endOffsetNum) break;
|
|
825
|
+
const localDocId = Number(curOffset - seg.start_offset);
|
|
826
|
+
if (allowedDocIds && !allowedDocIds.has(localDocId)) {
|
|
827
|
+
curOffset += 1n;
|
|
828
|
+
continue;
|
|
829
|
+
}
|
|
830
|
+
const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
|
|
831
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
832
|
+
if (matchRes.value.matched) {
|
|
833
|
+
results.push({ offset: curOffset, payload: r.payload });
|
|
834
|
+
bytesOut += r.payload.byteLength;
|
|
835
|
+
}
|
|
836
|
+
curOffset += 1n;
|
|
837
|
+
if (matchRes.value.stop) {
|
|
838
|
+
filterScanLimitReached = true;
|
|
839
|
+
seq = curOffset;
|
|
840
|
+
return Result.ok(undefined);
|
|
841
|
+
}
|
|
842
|
+
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
|
|
843
|
+
seq = curOffset;
|
|
844
|
+
return Result.ok(undefined);
|
|
845
|
+
}
|
|
846
|
+
}
|
|
847
|
+
}
|
|
848
|
+
return Result.ok(undefined);
|
|
311
849
|
}
|
|
312
|
-
|
|
850
|
+
|
|
851
|
+
const limit = loadSegmentDataLimitFromSource(seg, source);
|
|
852
|
+
let blockOffset = 0;
|
|
853
|
+
let blockFirstOffset = seg.start_offset;
|
|
854
|
+
while (blockOffset < limit) {
|
|
855
|
+
const headerBytes = readRangeFromSource(source, blockOffset, blockOffset + DSB3_HEADER_BYTES - 1);
|
|
856
|
+
const headerRes = parseBlockHeaderResult(headerBytes);
|
|
857
|
+
if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
|
|
858
|
+
const header = headerRes.value;
|
|
859
|
+
const totalLen = DSB3_HEADER_BYTES + header.compressedLen;
|
|
860
|
+
const blockStart = blockFirstOffset;
|
|
861
|
+
const blockEnd = blockStart + BigInt(header.recordCount) - 1n;
|
|
862
|
+
if (blockEnd < seq) {
|
|
863
|
+
blockOffset += totalLen;
|
|
864
|
+
blockFirstOffset = blockEnd + 1n;
|
|
865
|
+
continue;
|
|
866
|
+
}
|
|
867
|
+
if (blockStart > endOffsetNum) break;
|
|
868
|
+
|
|
869
|
+
if (keyBytes) {
|
|
870
|
+
const bloom = new Bloom256(header.bloom);
|
|
871
|
+
if (!bloom.maybeHas(keyBytes)) {
|
|
872
|
+
blockOffset += totalLen;
|
|
873
|
+
blockFirstOffset = blockEnd + 1n;
|
|
874
|
+
continue;
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
|
|
878
|
+
const blockBytes = readRangeFromSource(source, blockOffset, blockOffset + totalLen - 1);
|
|
879
|
+
const decodedRes = decodeBlockResult(blockBytes);
|
|
880
|
+
if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
|
|
881
|
+
const decoded = decodedRes.value;
|
|
882
|
+
let curOffset = blockStart;
|
|
883
|
+
for (const r of decoded.records) {
|
|
884
|
+
if (curOffset < seq) {
|
|
885
|
+
curOffset += 1n;
|
|
886
|
+
continue;
|
|
887
|
+
}
|
|
888
|
+
if (curOffset > endOffsetNum) break;
|
|
889
|
+
const localDocId = Number(curOffset - seg.start_offset);
|
|
890
|
+
if (allowedDocIds && !allowedDocIds.has(localDocId)) {
|
|
891
|
+
curOffset += 1n;
|
|
892
|
+
continue;
|
|
893
|
+
}
|
|
894
|
+
const matchRes = evaluateRecordResult(curOffset, r.routingKey, r.payload);
|
|
895
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
896
|
+
if (matchRes.value.matched) {
|
|
897
|
+
results.push({ offset: curOffset, payload: r.payload });
|
|
898
|
+
bytesOut += r.payload.byteLength;
|
|
899
|
+
}
|
|
900
|
+
curOffset += 1n;
|
|
901
|
+
if (matchRes.value.stop) {
|
|
902
|
+
filterScanLimitReached = true;
|
|
903
|
+
seq = curOffset;
|
|
904
|
+
return Result.ok(undefined);
|
|
905
|
+
}
|
|
906
|
+
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
|
|
907
|
+
seq = curOffset;
|
|
908
|
+
return Result.ok(undefined);
|
|
909
|
+
}
|
|
910
|
+
}
|
|
911
|
+
blockOffset += totalLen;
|
|
912
|
+
blockFirstOffset = blockEnd + 1n;
|
|
913
|
+
}
|
|
914
|
+
return Result.ok(undefined);
|
|
915
|
+
};
|
|
916
|
+
|
|
917
|
+
const sealedEndSeq = endOffsetNum < srow.sealed_through ? endOffsetNum : srow.sealed_through;
|
|
918
|
+
const plannedSealedSegments = this.planSealedReadSegments(
|
|
919
|
+
stream,
|
|
920
|
+
seq,
|
|
921
|
+
sealedEndSeq,
|
|
922
|
+
candidateSegments,
|
|
923
|
+
indexedThrough,
|
|
924
|
+
"asc"
|
|
925
|
+
);
|
|
926
|
+
|
|
927
|
+
// 1) Read from sealed segments.
|
|
928
|
+
if (plannedSealedSegments) {
|
|
929
|
+
for (const seg of plannedSealedSegments.segments) {
|
|
930
|
+
if (seg.end_offset < seq) continue;
|
|
931
|
+
if (seg.start_offset > sealedEndSeq) break;
|
|
932
|
+
let allowedDocIds: Set<number> | null = null;
|
|
933
|
+
if (columnClauses.length > 0) {
|
|
934
|
+
const docIdsRes = await this.resolveColumnCandidateDocIdsResult(stream, seg.segment_index, columnClauses);
|
|
935
|
+
if (Result.isError(docIdsRes)) return Result.err({ kind: "internal", message: docIdsRes.error.message });
|
|
936
|
+
if (docIdsRes.value) {
|
|
937
|
+
allowedDocIds = docIdsRes.value;
|
|
938
|
+
if (allowedDocIds.size === 0) {
|
|
939
|
+
seq = seg.end_offset + 1n;
|
|
940
|
+
continue;
|
|
941
|
+
}
|
|
942
|
+
}
|
|
943
|
+
}
|
|
944
|
+
const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
|
|
945
|
+
if (preferFull) {
|
|
946
|
+
const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
|
|
947
|
+
const scanRes = await scanSegmentBytes(segBytes, seg, allowedDocIds);
|
|
948
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
949
|
+
if (filterScanLimitReached) return Result.ok(finalize());
|
|
950
|
+
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
|
|
951
|
+
} else {
|
|
952
|
+
const source = await loadSegmentSource(this.os, seg, this.diskCache, this.retryOpts());
|
|
953
|
+
const scanRes = await scanSegmentSource(source, seg, allowedDocIds);
|
|
954
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
955
|
+
if (filterScanLimitReached) return Result.ok(finalize());
|
|
956
|
+
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
|
|
957
|
+
}
|
|
313
958
|
seq = seg.end_offset + 1n;
|
|
314
|
-
|
|
959
|
+
}
|
|
960
|
+
if (seq <= plannedSealedSegments.sealedEndSeq) {
|
|
961
|
+
seq = plannedSealedSegments.sealedEndSeq + 1n;
|
|
962
|
+
}
|
|
963
|
+
} else {
|
|
964
|
+
while (seq <= endOffsetNum && seq <= srow.sealed_through) {
|
|
965
|
+
const seg = this.db.findSegmentForOffset(stream, seq);
|
|
966
|
+
if (!seg) {
|
|
967
|
+
// Corruption in local metadata: sealed_through points past segments table.
|
|
968
|
+
break;
|
|
969
|
+
}
|
|
970
|
+
if (candidateSegments && seg.segment_index < indexedThrough && !candidateSegments.has(seg.segment_index)) {
|
|
971
|
+
seq = seg.end_offset + 1n;
|
|
972
|
+
continue;
|
|
973
|
+
}
|
|
974
|
+
let allowedDocIds: Set<number> | null = null;
|
|
975
|
+
if (columnClauses.length > 0) {
|
|
976
|
+
const docIdsRes = await this.resolveColumnCandidateDocIdsResult(stream, seg.segment_index, columnClauses);
|
|
977
|
+
if (Result.isError(docIdsRes)) return Result.err({ kind: "internal", message: docIdsRes.error.message });
|
|
978
|
+
if (docIdsRes.value) {
|
|
979
|
+
allowedDocIds = docIdsRes.value;
|
|
980
|
+
if (allowedDocIds.size === 0) {
|
|
981
|
+
seq = seg.end_offset + 1n;
|
|
982
|
+
continue;
|
|
983
|
+
}
|
|
984
|
+
}
|
|
315
985
|
}
|
|
316
986
|
const preferFull = !keyBytes && this.config.readMaxBytes >= seg.size_bytes;
|
|
317
987
|
if (preferFull) {
|
|
318
988
|
const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
|
|
319
|
-
const scanRes = await scanSegmentBytes(segBytes, seg);
|
|
989
|
+
const scanRes = await scanSegmentBytes(segBytes, seg, allowedDocIds);
|
|
320
990
|
if (Result.isError(scanRes)) return scanRes;
|
|
991
|
+
if (filterScanLimitReached) return Result.ok(finalize());
|
|
321
992
|
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
|
|
322
993
|
} else {
|
|
323
|
-
const
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
|
|
329
|
-
} else {
|
|
330
|
-
const footer = footerInfo.footer;
|
|
331
|
-
for (const entry of footer.blocks) {
|
|
332
|
-
const blockStart = entry.firstOffset;
|
|
333
|
-
const blockEnd = entry.firstOffset + BigInt(entry.recordCount) - 1n;
|
|
334
|
-
if (blockEnd < seq) continue;
|
|
335
|
-
if (blockStart > endOffsetNum) break;
|
|
336
|
-
|
|
337
|
-
if (keyBytes) {
|
|
338
|
-
const headerBytes = await readSegmentRange(
|
|
339
|
-
this.os,
|
|
340
|
-
seg,
|
|
341
|
-
entry.blockOffset,
|
|
342
|
-
entry.blockOffset + DSB3_HEADER_BYTES - 1,
|
|
343
|
-
this.diskCache,
|
|
344
|
-
this.retryOpts()
|
|
345
|
-
);
|
|
346
|
-
const headerRes = parseBlockHeaderResult(headerBytes);
|
|
347
|
-
if (Result.isError(headerRes)) return Result.err({ kind: "internal", message: headerRes.error.message });
|
|
348
|
-
const header = headerRes.value;
|
|
349
|
-
const bloom = new Bloom256(header.bloom);
|
|
350
|
-
if (!bloom.maybeHas(keyBytes)) continue;
|
|
351
|
-
}
|
|
352
|
-
|
|
353
|
-
const totalLen = DSB3_HEADER_BYTES + entry.compressedLen;
|
|
354
|
-
const blockBytes = await readSegmentRange(
|
|
355
|
-
this.os,
|
|
356
|
-
seg,
|
|
357
|
-
entry.blockOffset,
|
|
358
|
-
entry.blockOffset + totalLen - 1,
|
|
359
|
-
this.diskCache,
|
|
360
|
-
this.retryOpts()
|
|
361
|
-
);
|
|
362
|
-
const decodedRes = decodeBlockResult(blockBytes);
|
|
363
|
-
if (Result.isError(decodedRes)) return Result.err({ kind: "internal", message: decodedRes.error.message });
|
|
364
|
-
const decoded = decodedRes.value;
|
|
365
|
-
let curOffset = entry.firstOffset;
|
|
366
|
-
for (const r of decoded.records) {
|
|
367
|
-
if (curOffset < seq) {
|
|
368
|
-
curOffset += 1n;
|
|
369
|
-
continue;
|
|
370
|
-
}
|
|
371
|
-
if (curOffset > endOffsetNum) break;
|
|
372
|
-
if (keyBytes && !bytesEqual(r.routingKey, keyBytes)) {
|
|
373
|
-
curOffset += 1n;
|
|
374
|
-
continue;
|
|
375
|
-
}
|
|
376
|
-
results.push({ offset: curOffset, payload: r.payload });
|
|
377
|
-
bytesOut += r.payload.byteLength;
|
|
378
|
-
curOffset += 1n;
|
|
379
|
-
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
|
|
380
|
-
seq = curOffset;
|
|
381
|
-
return Result.ok(finalize());
|
|
382
|
-
}
|
|
383
|
-
}
|
|
384
|
-
}
|
|
385
|
-
}
|
|
994
|
+
const source = await loadSegmentSource(this.os, seg, this.diskCache, this.retryOpts());
|
|
995
|
+
const scanRes = await scanSegmentSource(source, seg, allowedDocIds);
|
|
996
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
997
|
+
if (filterScanLimitReached) return Result.ok(finalize());
|
|
998
|
+
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) return Result.ok(finalize());
|
|
386
999
|
}
|
|
387
1000
|
|
|
388
|
-
|
|
389
|
-
|
|
1001
|
+
// Move to next segment.
|
|
1002
|
+
seq = seg.end_offset + 1n;
|
|
1003
|
+
}
|
|
390
1004
|
}
|
|
391
1005
|
|
|
392
1006
|
// 2) Read remaining from WAL tail.
|
|
@@ -395,8 +1009,24 @@ export class StreamReader {
|
|
|
395
1009
|
for (const rec of this.db.iterWalRange(stream, seq, endOffsetNum, keyBytes ?? undefined)) {
|
|
396
1010
|
const s = BigInt(rec.offset);
|
|
397
1011
|
const payload: Uint8Array = rec.payload;
|
|
398
|
-
|
|
399
|
-
|
|
1012
|
+
const routingKey =
|
|
1013
|
+
rec.routing_key == null
|
|
1014
|
+
? null
|
|
1015
|
+
: rec.routing_key instanceof Uint8Array
|
|
1016
|
+
? rec.routing_key
|
|
1017
|
+
: new Uint8Array(rec.routing_key);
|
|
1018
|
+
const matchRes = evaluateRecordResult(s, routingKey, payload);
|
|
1019
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
1020
|
+
if (matchRes.value.matched) {
|
|
1021
|
+
results.push({ offset: s, payload });
|
|
1022
|
+
bytesOut += payload.byteLength;
|
|
1023
|
+
}
|
|
1024
|
+
if (matchRes.value.stop) {
|
|
1025
|
+
filterScanLimitReached = true;
|
|
1026
|
+
hitLimit = true;
|
|
1027
|
+
seq = s + 1n;
|
|
1028
|
+
break;
|
|
1029
|
+
}
|
|
400
1030
|
if (results.length >= this.config.readMaxRecords || bytesOut >= this.config.readMaxBytes) {
|
|
401
1031
|
hitLimit = true;
|
|
402
1032
|
// We only emitted payloads up through this offset (key-filtered reads
|
|
@@ -433,6 +1063,13 @@ export class StreamReader {
|
|
|
433
1063
|
endOffsetSeq: endOffsetNum,
|
|
434
1064
|
nextOffsetSeq: scannedThrough,
|
|
435
1065
|
records: results,
|
|
1066
|
+
...(filter
|
|
1067
|
+
? {
|
|
1068
|
+
filterScannedBytes,
|
|
1069
|
+
filterScanLimitBytes: READ_FILTER_SCAN_LIMIT_BYTES,
|
|
1070
|
+
filterScanLimitReached,
|
|
1071
|
+
}
|
|
1072
|
+
: {}),
|
|
436
1073
|
};
|
|
437
1074
|
}
|
|
438
1075
|
} catch (e: unknown) {
|
|
@@ -440,11 +1077,1218 @@ export class StreamReader {
|
|
|
440
1077
|
}
|
|
441
1078
|
}
|
|
442
1079
|
|
|
443
|
-
async read(args: {
|
|
1080
|
+
async read(args: {
|
|
1081
|
+
stream: string;
|
|
1082
|
+
offset: string;
|
|
1083
|
+
key: string | null;
|
|
1084
|
+
format: ReadFormat;
|
|
1085
|
+
filter?: CompiledReadFilter | null;
|
|
1086
|
+
}): Promise<ReadBatch> {
|
|
444
1087
|
const res = await this.readResult(args);
|
|
445
1088
|
if (Result.isError(res)) throw dsError(res.error.message);
|
|
446
1089
|
return res.value;
|
|
447
1090
|
}
|
|
1091
|
+
|
|
1092
|
+
async searchResult(args: { stream: string; request: SearchRequest }): Promise<Result<SearchResultBatch, ReaderError>> {
|
|
1093
|
+
const startedAt = Date.now();
|
|
1094
|
+
const { stream, request } = args;
|
|
1095
|
+
const leaveSearchPhase = this.memorySampler?.enter("search", {
|
|
1096
|
+
stream,
|
|
1097
|
+
has_query: request.q != null,
|
|
1098
|
+
over_limit: this.memory?.isOverLimit() === true,
|
|
1099
|
+
});
|
|
1100
|
+
const srow = this.db.getStream(stream);
|
|
1101
|
+
try {
|
|
1102
|
+
if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
|
|
1103
|
+
if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
|
|
1104
|
+
return Result.err({ kind: "gone", message: "stream expired" });
|
|
1105
|
+
}
|
|
1106
|
+
|
|
1107
|
+
const regRes = this.registry.getRegistryResult(stream);
|
|
1108
|
+
if (Result.isError(regRes)) return Result.err({ kind: "internal", message: regRes.error.message });
|
|
1109
|
+
const registry = regRes.value;
|
|
1110
|
+
if (!registry.search) return Result.err({ kind: "internal", message: "search is not configured for this stream" });
|
|
1111
|
+
|
|
1112
|
+
const snapshotEndSeq = srow.next_offset - 1n;
|
|
1113
|
+
const snapshotEndOffset = encodeOffset(srow.epoch, snapshotEndSeq);
|
|
1114
|
+
const coverageState = this.computePublishedCoverageState(stream, srow, registry);
|
|
1115
|
+
const visibleSnapshotEndSeq = coverageState.canSearchWalTail
|
|
1116
|
+
? snapshotEndSeq
|
|
1117
|
+
: (coverageState.visibleThroughSeq < snapshotEndSeq ? coverageState.visibleThroughSeq : snapshotEndSeq);
|
|
1118
|
+
const visibleSealedThrough = coverageState.canSearchWalTail
|
|
1119
|
+
? srow.sealed_through
|
|
1120
|
+
: (coverageState.visibleThroughSeq < srow.sealed_through ? coverageState.visibleThroughSeq : srow.sealed_through);
|
|
1121
|
+
const deadline = request.timeoutMs == null ? null : Date.now() + request.timeoutMs;
|
|
1122
|
+
const leadingSort = request.sort[0] ?? null;
|
|
1123
|
+
const offsetSearchAfter =
|
|
1124
|
+
request.searchAfter && leadingSort?.kind === "offset" ? normalizeSearchAfterValue(leadingSort, request.searchAfter[0]) : null;
|
|
1125
|
+
const cursorFieldBound = resolveSearchCursorFieldBound(request);
|
|
1126
|
+
|
|
1127
|
+
const hits: SearchHitInternal[] = [];
|
|
1128
|
+
let timedOut = false;
|
|
1129
|
+
const markTimedOutIfNeeded = (): boolean => {
|
|
1130
|
+
if (deadline == null || Date.now() < deadline) return false;
|
|
1131
|
+
timedOut = true;
|
|
1132
|
+
return true;
|
|
1133
|
+
};
|
|
1134
|
+
let indexedSegments = 0;
|
|
1135
|
+
let indexedSegmentTimeMs = 0;
|
|
1136
|
+
let ftsSectionGetMs = 0;
|
|
1137
|
+
let ftsDecodeMs = 0;
|
|
1138
|
+
let ftsClauseEstimateMs = 0;
|
|
1139
|
+
let scannedSegments = 0;
|
|
1140
|
+
let scannedSegmentTimeMs = 0;
|
|
1141
|
+
let scannedTailDocs = 0;
|
|
1142
|
+
let scannedTailTimeMs = 0;
|
|
1143
|
+
const indexFamiliesUsed = new Set<string>();
|
|
1144
|
+
const columnClauses = collectPositiveSearchColumnClauses(request.q);
|
|
1145
|
+
const ftsClauses = collectPositiveSearchFtsClauses(request.q);
|
|
1146
|
+
let exactCandidateInfo: SegmentCandidateInfo = { segments: null, indexedThrough: 0 };
|
|
1147
|
+
let exactCandidateTimeMs = 0;
|
|
1148
|
+
if (!markTimedOutIfNeeded()) {
|
|
1149
|
+
const exactCandidateStartedAt = Date.now();
|
|
1150
|
+
exactCandidateInfo = await this.resolveSearchExactCandidateSegments(stream, request.q);
|
|
1151
|
+
exactCandidateTimeMs = Date.now() - exactCandidateStartedAt;
|
|
1152
|
+
markTimedOutIfNeeded();
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
const collectSearchMatchResult = (
|
|
1156
|
+
offsetSeq: bigint,
|
|
1157
|
+
payload: Uint8Array
|
|
1158
|
+
): Result<void, ReaderError> => {
|
|
1159
|
+
const parsedRes = decodeJsonPayloadResult(this.registry, stream, offsetSeq, payload);
|
|
1160
|
+
if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
|
|
1161
|
+
const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, parsedRes.value);
|
|
1162
|
+
if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
|
|
1163
|
+
if (!evalRes.value.matched) return Result.ok(undefined);
|
|
1164
|
+
const fieldsRes = extractSearchHitFieldsResult(registry, offsetSeq, parsedRes.value);
|
|
1165
|
+
if (Result.isError(fieldsRes)) return Result.err({ kind: "internal", message: fieldsRes.error.message });
|
|
1166
|
+
const sortInternal = buildSearchSortInternalValues(request.sort, fieldsRes.value, evalRes.value, offsetSeq);
|
|
1167
|
+
if (request.searchAfter && compareSearchAfterValues(sortInternal, request.sort, request.searchAfter) <= 0) {
|
|
1168
|
+
return Result.ok(undefined);
|
|
1169
|
+
}
|
|
1170
|
+
hits.push({
|
|
1171
|
+
offsetSeq,
|
|
1172
|
+
offset: encodeOffset(srow.epoch, offsetSeq),
|
|
1173
|
+
score: evalRes.value.score,
|
|
1174
|
+
sortInternal,
|
|
1175
|
+
sortResponse: buildSearchSortResponseValues(request.sort, sortInternal, encodeOffset(srow.epoch, offsetSeq)),
|
|
1176
|
+
fields: fieldsRes.value,
|
|
1177
|
+
source: parsedRes.value,
|
|
1178
|
+
});
|
|
1179
|
+
return Result.ok(undefined);
|
|
1180
|
+
};
|
|
1181
|
+
|
|
1182
|
+
const scanSegmentForSearchResult = async (
|
|
1183
|
+
seg: SegmentRow,
|
|
1184
|
+
allowedDocIds: Set<number> | null,
|
|
1185
|
+
rangeStartSeq: bigint,
|
|
1186
|
+
rangeEndSeq: bigint
|
|
1187
|
+
): Promise<Result<void, ReaderError>> => {
|
|
1188
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1189
|
+
const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
|
|
1190
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1191
|
+
let curOffset = seg.start_offset;
|
|
1192
|
+
for (const blockRes of iterateBlocksResult(segBytes)) {
|
|
1193
|
+
if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
|
|
1194
|
+
for (const record of blockRes.value.decoded.records) {
|
|
1195
|
+
if (curOffset > rangeEndSeq) return Result.ok(undefined);
|
|
1196
|
+
if (curOffset < rangeStartSeq) {
|
|
1197
|
+
curOffset += 1n;
|
|
1198
|
+
continue;
|
|
1199
|
+
}
|
|
1200
|
+
const localDocId = Number(curOffset - seg.start_offset);
|
|
1201
|
+
if (!allowedDocIds || allowedDocIds.has(localDocId)) {
|
|
1202
|
+
const matchRes = collectSearchMatchResult(curOffset, record.payload);
|
|
1203
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
1204
|
+
}
|
|
1205
|
+
curOffset += 1n;
|
|
1206
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1207
|
+
}
|
|
1208
|
+
}
|
|
1209
|
+
return Result.ok(undefined);
|
|
1210
|
+
};
|
|
1211
|
+
|
|
1212
|
+
const scanSegmentWithFamiliesResult = async (
|
|
1213
|
+
seg: SegmentRow,
|
|
1214
|
+
rangeStartSeq: bigint,
|
|
1215
|
+
rangeEndSeq: bigint
|
|
1216
|
+
): Promise<Result<void, ReaderError>> => {
|
|
1217
|
+
const segmentStartedAt = Date.now();
|
|
1218
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1219
|
+
if (
|
|
1220
|
+
exactCandidateInfo.segments &&
|
|
1221
|
+
seg.segment_index < exactCandidateInfo.indexedThrough &&
|
|
1222
|
+
!exactCandidateInfo.segments.has(seg.segment_index)
|
|
1223
|
+
) {
|
|
1224
|
+
return Result.ok(undefined);
|
|
1225
|
+
}
|
|
1226
|
+
if (cursorFieldBound) {
|
|
1227
|
+
const overlapsCursor = await this.segmentMayOverlapSearchCursor(stream, seg.segment_index, cursorFieldBound);
|
|
1228
|
+
if (!overlapsCursor) {
|
|
1229
|
+
indexFamiliesUsed.add("col");
|
|
1230
|
+
indexedSegments += 1;
|
|
1231
|
+
indexedSegmentTimeMs += Date.now() - segmentStartedAt;
|
|
1232
|
+
return Result.ok(undefined);
|
|
1233
|
+
}
|
|
1234
|
+
}
|
|
1235
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1236
|
+
|
|
1237
|
+
const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult(
|
|
1238
|
+
stream,
|
|
1239
|
+
seg.segment_index,
|
|
1240
|
+
columnClauses,
|
|
1241
|
+
ftsClauses,
|
|
1242
|
+
{
|
|
1243
|
+
addFtsSectionGetMs: (deltaMs) => {
|
|
1244
|
+
ftsSectionGetMs += deltaMs;
|
|
1245
|
+
},
|
|
1246
|
+
addFtsDecodeMs: (deltaMs) => {
|
|
1247
|
+
ftsDecodeMs += deltaMs;
|
|
1248
|
+
},
|
|
1249
|
+
addFtsClauseEstimateMs: (deltaMs) => {
|
|
1250
|
+
ftsClauseEstimateMs += deltaMs;
|
|
1251
|
+
},
|
|
1252
|
+
}
|
|
1253
|
+
);
|
|
1254
|
+
if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message });
|
|
1255
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1256
|
+
const familyCandidates = familyCandidatesRes.value;
|
|
1257
|
+
if (familyCandidates.docIds && familyCandidates.docIds.size === 0) {
|
|
1258
|
+
indexedSegments += familyCandidates.usedFamilies.size > 0 ? 1 : 0;
|
|
1259
|
+
for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family);
|
|
1260
|
+
if (familyCandidates.usedFamilies.size > 0) indexedSegmentTimeMs += Date.now() - segmentStartedAt;
|
|
1261
|
+
return Result.ok(undefined);
|
|
1262
|
+
}
|
|
1263
|
+
const usedIndexedFamilies = familyCandidates.usedFamilies.size > 0;
|
|
1264
|
+
if (familyCandidates.usedFamilies.size > 0) {
|
|
1265
|
+
indexedSegments += 1;
|
|
1266
|
+
for (const family of familyCandidates.usedFamilies) indexFamiliesUsed.add(family);
|
|
1267
|
+
} else {
|
|
1268
|
+
scannedSegments += 1;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
const scanRes = await scanSegmentForSearchResult(seg, familyCandidates.docIds, rangeStartSeq, rangeEndSeq);
|
|
1272
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1273
|
+
if (usedIndexedFamilies) indexedSegmentTimeMs += Date.now() - segmentStartedAt;
|
|
1274
|
+
else scannedSegmentTimeMs += Date.now() - segmentStartedAt;
|
|
1275
|
+
return Result.ok(undefined);
|
|
1276
|
+
};
|
|
1277
|
+
|
|
1278
|
+
const stopIfPageComplete = (): boolean => hits.length >= request.size;
|
|
1279
|
+
|
|
1280
|
+
if (leadingSort?.kind === "offset") {
|
|
1281
|
+
const descending = leadingSort.direction === "desc";
|
|
1282
|
+
const rangeStartSeq = descending ? 0n : typeof offsetSearchAfter === "bigint" ? offsetSearchAfter + 1n : 0n;
|
|
1283
|
+
const requestedRangeEndSeq = descending ? (typeof offsetSearchAfter === "bigint" ? offsetSearchAfter - 1n : snapshotEndSeq) : snapshotEndSeq;
|
|
1284
|
+
const rangeEndSeq = requestedRangeEndSeq < visibleSnapshotEndSeq ? requestedRangeEndSeq : visibleSnapshotEndSeq;
|
|
1285
|
+
|
|
1286
|
+
if (rangeStartSeq <= rangeEndSeq) {
|
|
1287
|
+
if (descending) {
|
|
1288
|
+
const tailStart = srow.sealed_through + 1n;
|
|
1289
|
+
if (coverageState.canSearchWalTail && tailStart <= rangeEndSeq) {
|
|
1290
|
+
const walStart = rangeStartSeq > tailStart ? rangeStartSeq : tailStart;
|
|
1291
|
+
const walEnd = rangeEndSeq;
|
|
1292
|
+
if (walStart <= walEnd) {
|
|
1293
|
+
const tailStartedAt = Date.now();
|
|
1294
|
+
for (const record of this.db.iterWalRangeDesc(stream, walStart, walEnd)) {
|
|
1295
|
+
scannedTailDocs += 1;
|
|
1296
|
+
const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload);
|
|
1297
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
1298
|
+
if (markTimedOutIfNeeded()) break;
|
|
1299
|
+
if (stopIfPageComplete()) break;
|
|
1300
|
+
}
|
|
1301
|
+
scannedTailTimeMs += Date.now() - tailStartedAt;
|
|
1302
|
+
}
|
|
1303
|
+
}
|
|
1304
|
+
if (!timedOut && !stopIfPageComplete()) {
|
|
1305
|
+
const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough;
|
|
1306
|
+
if (sealedEnd >= rangeStartSeq) {
|
|
1307
|
+
const plannedSealedSegments = this.planSealedReadSegments(
|
|
1308
|
+
stream,
|
|
1309
|
+
rangeStartSeq,
|
|
1310
|
+
sealedEnd,
|
|
1311
|
+
exactCandidateInfo.segments,
|
|
1312
|
+
exactCandidateInfo.indexedThrough,
|
|
1313
|
+
"desc"
|
|
1314
|
+
);
|
|
1315
|
+
if (plannedSealedSegments) {
|
|
1316
|
+
for (const seg of plannedSealedSegments.segments) {
|
|
1317
|
+
const scanRes = await this.scanSegmentReverseForSearchResult(
|
|
1318
|
+
stream,
|
|
1319
|
+
seg,
|
|
1320
|
+
exactCandidateInfo,
|
|
1321
|
+
cursorFieldBound,
|
|
1322
|
+
columnClauses,
|
|
1323
|
+
ftsClauses,
|
|
1324
|
+
rangeStartSeq,
|
|
1325
|
+
sealedEnd,
|
|
1326
|
+
{
|
|
1327
|
+
indexFamiliesUsed,
|
|
1328
|
+
collectSearchMatchResult,
|
|
1329
|
+
deadline,
|
|
1330
|
+
isTimedOut: () => timedOut,
|
|
1331
|
+
setTimedOut: (next) => {
|
|
1332
|
+
timedOut = next;
|
|
1333
|
+
},
|
|
1334
|
+
stopIfPageComplete,
|
|
1335
|
+
addIndexedSegment: () => {
|
|
1336
|
+
indexedSegments += 1;
|
|
1337
|
+
},
|
|
1338
|
+
addScannedSegment: () => {
|
|
1339
|
+
scannedSegments += 1;
|
|
1340
|
+
},
|
|
1341
|
+
addIndexedSegmentTimeMs: (deltaMs) => {
|
|
1342
|
+
indexedSegmentTimeMs += deltaMs;
|
|
1343
|
+
},
|
|
1344
|
+
addFtsSectionGetMs: (deltaMs) => {
|
|
1345
|
+
ftsSectionGetMs += deltaMs;
|
|
1346
|
+
},
|
|
1347
|
+
addFtsDecodeMs: (deltaMs) => {
|
|
1348
|
+
ftsDecodeMs += deltaMs;
|
|
1349
|
+
},
|
|
1350
|
+
addFtsClauseEstimateMs: (deltaMs) => {
|
|
1351
|
+
ftsClauseEstimateMs += deltaMs;
|
|
1352
|
+
},
|
|
1353
|
+
addScannedSegmentTimeMs: (deltaMs) => {
|
|
1354
|
+
scannedSegmentTimeMs += deltaMs;
|
|
1355
|
+
},
|
|
1356
|
+
}
|
|
1357
|
+
);
|
|
1358
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1359
|
+
if (timedOut || stopIfPageComplete()) break;
|
|
1360
|
+
}
|
|
1361
|
+
} else {
|
|
1362
|
+
const startSeg = this.db.findSegmentForOffset(stream, sealedEnd);
|
|
1363
|
+
let segmentIndex = startSeg?.segment_index ?? this.db.countSegmentsForStream(stream) - 1;
|
|
1364
|
+
while (segmentIndex >= 0) {
|
|
1365
|
+
const seg = this.db.getSegmentByIndex(stream, segmentIndex);
|
|
1366
|
+
if (!seg) {
|
|
1367
|
+
segmentIndex -= 1;
|
|
1368
|
+
continue;
|
|
1369
|
+
}
|
|
1370
|
+
if (seg.end_offset < rangeStartSeq) break;
|
|
1371
|
+
if (seg.start_offset > sealedEnd) {
|
|
1372
|
+
segmentIndex -= 1;
|
|
1373
|
+
continue;
|
|
1374
|
+
}
|
|
1375
|
+
const scanRes = await this.scanSegmentReverseForSearchResult(
|
|
1376
|
+
stream,
|
|
1377
|
+
seg,
|
|
1378
|
+
exactCandidateInfo,
|
|
1379
|
+
cursorFieldBound,
|
|
1380
|
+
columnClauses,
|
|
1381
|
+
ftsClauses,
|
|
1382
|
+
rangeStartSeq,
|
|
1383
|
+
sealedEnd,
|
|
1384
|
+
{
|
|
1385
|
+
indexFamiliesUsed,
|
|
1386
|
+
collectSearchMatchResult,
|
|
1387
|
+
deadline,
|
|
1388
|
+
isTimedOut: () => timedOut,
|
|
1389
|
+
setTimedOut: (next) => {
|
|
1390
|
+
timedOut = next;
|
|
1391
|
+
},
|
|
1392
|
+
stopIfPageComplete,
|
|
1393
|
+
addIndexedSegment: () => {
|
|
1394
|
+
indexedSegments += 1;
|
|
1395
|
+
},
|
|
1396
|
+
addScannedSegment: () => {
|
|
1397
|
+
scannedSegments += 1;
|
|
1398
|
+
},
|
|
1399
|
+
addIndexedSegmentTimeMs: (deltaMs) => {
|
|
1400
|
+
indexedSegmentTimeMs += deltaMs;
|
|
1401
|
+
},
|
|
1402
|
+
addFtsSectionGetMs: (deltaMs) => {
|
|
1403
|
+
ftsSectionGetMs += deltaMs;
|
|
1404
|
+
},
|
|
1405
|
+
addFtsDecodeMs: (deltaMs) => {
|
|
1406
|
+
ftsDecodeMs += deltaMs;
|
|
1407
|
+
},
|
|
1408
|
+
addFtsClauseEstimateMs: (deltaMs) => {
|
|
1409
|
+
ftsClauseEstimateMs += deltaMs;
|
|
1410
|
+
},
|
|
1411
|
+
addScannedSegmentTimeMs: (deltaMs) => {
|
|
1412
|
+
scannedSegmentTimeMs += deltaMs;
|
|
1413
|
+
},
|
|
1414
|
+
}
|
|
1415
|
+
);
|
|
1416
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1417
|
+
if (timedOut || stopIfPageComplete()) break;
|
|
1418
|
+
segmentIndex -= 1;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
}
|
|
1422
|
+
}
|
|
1423
|
+
} else {
|
|
1424
|
+
let seq = rangeStartSeq;
|
|
1425
|
+
const sealedEnd = rangeEndSeq < visibleSealedThrough ? rangeEndSeq : visibleSealedThrough;
|
|
1426
|
+
const plannedSealedSegments = this.planSealedReadSegments(
|
|
1427
|
+
stream,
|
|
1428
|
+
rangeStartSeq,
|
|
1429
|
+
sealedEnd,
|
|
1430
|
+
exactCandidateInfo.segments,
|
|
1431
|
+
exactCandidateInfo.indexedThrough,
|
|
1432
|
+
"asc"
|
|
1433
|
+
);
|
|
1434
|
+
if (plannedSealedSegments) {
|
|
1435
|
+
for (const seg of plannedSealedSegments.segments) {
|
|
1436
|
+
const scanRes = await scanSegmentWithFamiliesResult(seg, rangeStartSeq, rangeEndSeq);
|
|
1437
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1438
|
+
seq = seg.end_offset + 1n;
|
|
1439
|
+
if (timedOut || stopIfPageComplete()) break;
|
|
1440
|
+
}
|
|
1441
|
+
if (seq <= plannedSealedSegments.sealedEndSeq) seq = plannedSealedSegments.sealedEndSeq + 1n;
|
|
1442
|
+
} else {
|
|
1443
|
+
while (seq <= rangeEndSeq && seq <= visibleSealedThrough) {
|
|
1444
|
+
const seg = this.db.findSegmentForOffset(stream, seq);
|
|
1445
|
+
if (!seg) break;
|
|
1446
|
+
const scanRes = await scanSegmentWithFamiliesResult(seg, rangeStartSeq, rangeEndSeq);
|
|
1447
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1448
|
+
seq = seg.end_offset + 1n;
|
|
1449
|
+
if (timedOut || stopIfPageComplete()) break;
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
if (!timedOut && !stopIfPageComplete() && coverageState.canSearchWalTail && seq <= rangeEndSeq) {
|
|
1453
|
+
const tailStartedAt = Date.now();
|
|
1454
|
+
for (const record of this.db.iterWalRange(stream, seq, rangeEndSeq)) {
|
|
1455
|
+
scannedTailDocs += 1;
|
|
1456
|
+
const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload);
|
|
1457
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
1458
|
+
if (markTimedOutIfNeeded()) break;
|
|
1459
|
+
if (stopIfPageComplete()) break;
|
|
1460
|
+
}
|
|
1461
|
+
scannedTailTimeMs += Date.now() - tailStartedAt;
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
const pageHits = hits.slice(0, request.size);
|
|
1467
|
+
const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null;
|
|
1468
|
+
const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null;
|
|
1469
|
+
return Result.ok({
|
|
1470
|
+
stream,
|
|
1471
|
+
snapshotEndOffset,
|
|
1472
|
+
tookMs: Date.now() - startedAt,
|
|
1473
|
+
timedOut,
|
|
1474
|
+
timeoutMs: request.timeoutMs,
|
|
1475
|
+
coverage: {
|
|
1476
|
+
mode: coverageState.mode,
|
|
1477
|
+
complete: coverageState.complete && !timedOut,
|
|
1478
|
+
streamHeadOffset: coverageState.streamHeadOffset,
|
|
1479
|
+
visibleThroughOffset: coverageState.visibleThroughOffset,
|
|
1480
|
+
visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
|
|
1481
|
+
oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
|
|
1482
|
+
possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
|
|
1483
|
+
possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
|
|
1484
|
+
possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
|
|
1485
|
+
possibleMissingWalRows: coverageState.possibleMissingWalRows,
|
|
1486
|
+
indexedSegments,
|
|
1487
|
+
indexedSegmentTimeMs,
|
|
1488
|
+
ftsSectionGetMs,
|
|
1489
|
+
ftsDecodeMs,
|
|
1490
|
+
ftsClauseEstimateMs,
|
|
1491
|
+
scannedSegments,
|
|
1492
|
+
scannedSegmentTimeMs,
|
|
1493
|
+
scannedTailDocs,
|
|
1494
|
+
scannedTailTimeMs,
|
|
1495
|
+
exactCandidateTimeMs,
|
|
1496
|
+
indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
|
|
1497
|
+
},
|
|
1498
|
+
total: {
|
|
1499
|
+
value: pageHits.length,
|
|
1500
|
+
relation: exactTotalKnown ? "eq" : "gte",
|
|
1501
|
+
},
|
|
1502
|
+
hits: pageHits.map((hit) => ({
|
|
1503
|
+
offset: hit.offset,
|
|
1504
|
+
score: hit.score,
|
|
1505
|
+
sort: hit.sortResponse,
|
|
1506
|
+
fields: hit.fields,
|
|
1507
|
+
source: hit.source,
|
|
1508
|
+
})),
|
|
1509
|
+
nextSearchAfter,
|
|
1510
|
+
});
|
|
1511
|
+
}
|
|
1512
|
+
|
|
1513
|
+
let seq = 0n;
|
|
1514
|
+
const sealedEnd = visibleSnapshotEndSeq < visibleSealedThrough ? visibleSnapshotEndSeq : visibleSealedThrough;
|
|
1515
|
+
const plannedSealedSegments = this.planSealedReadSegments(
|
|
1516
|
+
stream,
|
|
1517
|
+
0n,
|
|
1518
|
+
sealedEnd,
|
|
1519
|
+
exactCandidateInfo.segments,
|
|
1520
|
+
exactCandidateInfo.indexedThrough,
|
|
1521
|
+
"asc"
|
|
1522
|
+
);
|
|
1523
|
+
if (plannedSealedSegments) {
|
|
1524
|
+
for (const seg of plannedSealedSegments.segments) {
|
|
1525
|
+
const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq);
|
|
1526
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1527
|
+
seq = seg.end_offset + 1n;
|
|
1528
|
+
if (timedOut) break;
|
|
1529
|
+
}
|
|
1530
|
+
if (seq <= plannedSealedSegments.sealedEndSeq) seq = plannedSealedSegments.sealedEndSeq + 1n;
|
|
1531
|
+
} else {
|
|
1532
|
+
while (seq <= visibleSnapshotEndSeq && seq <= visibleSealedThrough) {
|
|
1533
|
+
const seg = this.db.findSegmentForOffset(stream, seq);
|
|
1534
|
+
if (!seg) break;
|
|
1535
|
+
const scanRes = await scanSegmentWithFamiliesResult(seg, 0n, snapshotEndSeq);
|
|
1536
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1537
|
+
seq = seg.end_offset + 1n;
|
|
1538
|
+
if (timedOut) break;
|
|
1539
|
+
}
|
|
1540
|
+
}
|
|
1541
|
+
|
|
1542
|
+
if (!timedOut && coverageState.canSearchWalTail && seq <= snapshotEndSeq) {
|
|
1543
|
+
const tailStartedAt = Date.now();
|
|
1544
|
+
for (const record of this.db.iterWalRange(stream, seq, snapshotEndSeq)) {
|
|
1545
|
+
scannedTailDocs += 1;
|
|
1546
|
+
const matchRes = collectSearchMatchResult(BigInt(record.offset), record.payload);
|
|
1547
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
1548
|
+
if (markTimedOutIfNeeded()) break;
|
|
1549
|
+
}
|
|
1550
|
+
scannedTailTimeMs += Date.now() - tailStartedAt;
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
hits.sort((left, right) => compareSearchHits(left, right, request.sort));
|
|
1554
|
+
const pageHits = hits.slice(0, request.size);
|
|
1555
|
+
const nextSearchAfter = pageHits.length === request.size ? pageHits[pageHits.length - 1].sortResponse : null;
|
|
1556
|
+
const exactTotalKnown = !timedOut && coverageState.complete && nextSearchAfter == null;
|
|
1557
|
+
|
|
1558
|
+
return Result.ok({
|
|
1559
|
+
stream,
|
|
1560
|
+
snapshotEndOffset,
|
|
1561
|
+
tookMs: Date.now() - startedAt,
|
|
1562
|
+
timedOut,
|
|
1563
|
+
timeoutMs: request.timeoutMs,
|
|
1564
|
+
coverage: {
|
|
1565
|
+
mode: coverageState.mode,
|
|
1566
|
+
complete: coverageState.complete && !timedOut,
|
|
1567
|
+
streamHeadOffset: coverageState.streamHeadOffset,
|
|
1568
|
+
visibleThroughOffset: coverageState.visibleThroughOffset,
|
|
1569
|
+
visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
|
|
1570
|
+
oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
|
|
1571
|
+
possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
|
|
1572
|
+
possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
|
|
1573
|
+
possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
|
|
1574
|
+
possibleMissingWalRows: coverageState.possibleMissingWalRows,
|
|
1575
|
+
indexedSegments,
|
|
1576
|
+
indexedSegmentTimeMs,
|
|
1577
|
+
ftsSectionGetMs,
|
|
1578
|
+
ftsDecodeMs,
|
|
1579
|
+
ftsClauseEstimateMs,
|
|
1580
|
+
scannedSegments,
|
|
1581
|
+
scannedSegmentTimeMs,
|
|
1582
|
+
scannedTailDocs,
|
|
1583
|
+
scannedTailTimeMs,
|
|
1584
|
+
exactCandidateTimeMs,
|
|
1585
|
+
indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
|
|
1586
|
+
},
|
|
1587
|
+
total: {
|
|
1588
|
+
value: pageHits.length,
|
|
1589
|
+
relation: exactTotalKnown ? "eq" : "gte",
|
|
1590
|
+
},
|
|
1591
|
+
hits: pageHits.map((hit) => ({
|
|
1592
|
+
offset: hit.offset,
|
|
1593
|
+
score: hit.score,
|
|
1594
|
+
sort: hit.sortResponse,
|
|
1595
|
+
fields: hit.fields,
|
|
1596
|
+
source: hit.source,
|
|
1597
|
+
})),
|
|
1598
|
+
nextSearchAfter,
|
|
1599
|
+
});
|
|
1600
|
+
} catch (e: unknown) {
|
|
1601
|
+
return Result.err({ kind: "internal", message: errorMessage(e) });
|
|
1602
|
+
} finally {
|
|
1603
|
+
leaveSearchPhase?.();
|
|
1604
|
+
}
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
async search(args: { stream: string; request: SearchRequest }): Promise<SearchResultBatch> {
|
|
1608
|
+
const res = await this.searchResult(args);
|
|
1609
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
1610
|
+
return res.value;
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
async aggregateResult(args: { stream: string; request: AggregateRequest }): Promise<Result<AggregateResultBatch, ReaderError>> {
|
|
1614
|
+
const { stream, request } = args;
|
|
1615
|
+
const leaveAggregatePhase = this.memorySampler?.enter("aggregate", {
|
|
1616
|
+
stream,
|
|
1617
|
+
rollup: request.rollup,
|
|
1618
|
+
over_limit: this.memory?.isOverLimit() === true,
|
|
1619
|
+
});
|
|
1620
|
+
const srow = this.db.getStream(stream);
|
|
1621
|
+
try {
|
|
1622
|
+
if (!srow || this.db.isDeleted(srow)) return Result.err({ kind: "not_found", message: "not_found" });
|
|
1623
|
+
if (srow.expires_at_ms != null && this.db.nowMs() > srow.expires_at_ms) {
|
|
1624
|
+
return Result.err({ kind: "gone", message: "stream expired" });
|
|
1625
|
+
}
|
|
1626
|
+
|
|
1627
|
+
const regRes = this.registry.getRegistryResult(stream);
|
|
1628
|
+
if (Result.isError(regRes)) return Result.err({ kind: "internal", message: regRes.error.message });
|
|
1629
|
+
const registry = regRes.value;
|
|
1630
|
+
const rollup = registry.search?.rollups?.[request.rollup];
|
|
1631
|
+
if (!registry.search || !rollup) {
|
|
1632
|
+
return Result.err({ kind: "internal", message: "rollup is not configured for this stream" });
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
const coverageState = this.computePublishedCoverageState(stream, srow, registry);
|
|
1636
|
+
const intervalMs = request.intervalMs;
|
|
1637
|
+
const intervalBig = BigInt(intervalMs);
|
|
1638
|
+
const fromMs = Number(request.fromMs);
|
|
1639
|
+
const toMs = Number(request.toMs);
|
|
1640
|
+
const fullStartMs = Number(((request.fromMs + intervalBig - 1n) / intervalBig) * intervalBig);
|
|
1641
|
+
const fullEndMs = Number((request.toMs / intervalBig) * intervalBig);
|
|
1642
|
+
const hasFullWindows = fullEndMs > fullStartMs;
|
|
1643
|
+
const dimensions = new Set(rollup.dimensions ?? []);
|
|
1644
|
+
const eligibility = extractRollupEligibility(request.q, dimensions);
|
|
1645
|
+
const selectedMeasures = new Set(request.measures ?? Object.keys(rollup.measures));
|
|
1646
|
+
const timestampField = rollup.timestampField ?? registry.search.primaryTimestampField;
|
|
1647
|
+
const primaryTimestampField = registry.search.primaryTimestampField;
|
|
1648
|
+
const usesPrimaryTimestampBounds = timestampField === primaryTimestampField;
|
|
1649
|
+
|
|
1650
|
+
const buckets = new Map<number, Map<string, AggregateGroupInternal>>();
|
|
1651
|
+
const indexedSegmentSet = new Set<number>();
|
|
1652
|
+
const scannedSegmentSet = new Set<number>();
|
|
1653
|
+
let scannedTailDocs = 0;
|
|
1654
|
+
const indexFamiliesUsed = new Set<string>();
|
|
1655
|
+
const metricsProfile = registry.search.profile === "metrics";
|
|
1656
|
+
let usedRollups = false;
|
|
1657
|
+
|
|
1658
|
+
const mergeBucketMeasures = (bucketStartMs: number, dimensionsKey: Record<string, string | null>, measures: Record<string, AggMeasureState>): void => {
|
|
1659
|
+
let groups = buckets.get(bucketStartMs);
|
|
1660
|
+
if (!groups) {
|
|
1661
|
+
groups = new Map();
|
|
1662
|
+
buckets.set(bucketStartMs, groups);
|
|
1663
|
+
}
|
|
1664
|
+
const projectedKey: Record<string, string | null> = {};
|
|
1665
|
+
for (const field of request.groupBy) projectedKey[field] = dimensionsKey[field] ?? null;
|
|
1666
|
+
const groupKey = JSON.stringify(projectedKey);
|
|
1667
|
+
let group = groups.get(groupKey);
|
|
1668
|
+
if (!group) {
|
|
1669
|
+
group = { key: projectedKey, measures: {} };
|
|
1670
|
+
groups.set(groupKey, group);
|
|
1671
|
+
}
|
|
1672
|
+
for (const [measureName, state] of Object.entries(measures)) {
|
|
1673
|
+
if (!selectedMeasures.has(measureName)) continue;
|
|
1674
|
+
const existing = group.measures[measureName];
|
|
1675
|
+
if (!existing) {
|
|
1676
|
+
group.measures[measureName] = cloneAggMeasureState(state);
|
|
1677
|
+
continue;
|
|
1678
|
+
}
|
|
1679
|
+
group.measures[measureName] = mergeAggMeasureState(existing, state);
|
|
1680
|
+
}
|
|
1681
|
+
};
|
|
1682
|
+
|
|
1683
|
+
const matchesExactFilters = (dimensionsKey: Record<string, string | null>): boolean => {
|
|
1684
|
+
for (const [field, value] of Object.entries(eligibility.exactFilters)) {
|
|
1685
|
+
if ((dimensionsKey[field] ?? null) !== value) return false;
|
|
1686
|
+
}
|
|
1687
|
+
return true;
|
|
1688
|
+
};
|
|
1689
|
+
|
|
1690
|
+
const partialRanges: Array<{ startMs: number; endMs: number }> = [];
|
|
1691
|
+
if (!eligibility.eligible || !hasFullWindows) {
|
|
1692
|
+
partialRanges.push({ startMs: fromMs, endMs: toMs });
|
|
1693
|
+
} else {
|
|
1694
|
+
if (fromMs < fullStartMs) partialRanges.push({ startMs: fromMs, endMs: fullStartMs });
|
|
1695
|
+
if (fullEndMs < toMs) partialRanges.push({ startMs: fullEndMs, endMs: toMs });
|
|
1696
|
+
}
|
|
1697
|
+
|
|
1698
|
+
const scanSegmentForAggregateResult = async (
|
|
1699
|
+
seg: SegmentRow,
|
|
1700
|
+
scanRanges: Array<{ startMs: number; endMs: number }>
|
|
1701
|
+
): Promise<Result<void, ReaderError>> => {
|
|
1702
|
+
const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
|
|
1703
|
+
let curOffset = seg.start_offset;
|
|
1704
|
+
for (const blockRes of iterateBlocksResult(segBytes)) {
|
|
1705
|
+
if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
|
|
1706
|
+
for (const record of blockRes.value.decoded.records) {
|
|
1707
|
+
const parsedRes = decodeJsonPayloadResult(this.registry, stream, curOffset, record.payload);
|
|
1708
|
+
if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
|
|
1709
|
+
const contributionRes = extractRollupContributionResult(registry, rollup, curOffset, parsedRes.value);
|
|
1710
|
+
if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
|
|
1711
|
+
const contribution = contributionRes.value;
|
|
1712
|
+
if (!contribution) {
|
|
1713
|
+
curOffset += 1n;
|
|
1714
|
+
continue;
|
|
1715
|
+
}
|
|
1716
|
+
const inRange = scanRanges.some((range) => contribution.timestampMs >= range.startMs && contribution.timestampMs < range.endMs);
|
|
1717
|
+
if (!inRange) {
|
|
1718
|
+
curOffset += 1n;
|
|
1719
|
+
continue;
|
|
1720
|
+
}
|
|
1721
|
+
if (request.q) {
|
|
1722
|
+
const evalRes = evaluateSearchQueryResult(registry, curOffset, request.q, parsedRes.value);
|
|
1723
|
+
if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
|
|
1724
|
+
if (!evalRes.value.matched) {
|
|
1725
|
+
curOffset += 1n;
|
|
1726
|
+
continue;
|
|
1727
|
+
}
|
|
1728
|
+
}
|
|
1729
|
+
const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
|
|
1730
|
+
mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
|
|
1731
|
+
curOffset += 1n;
|
|
1732
|
+
}
|
|
1733
|
+
}
|
|
1734
|
+
scannedSegmentSet.add(seg.segment_index);
|
|
1735
|
+
return Result.ok(undefined);
|
|
1736
|
+
};
|
|
1737
|
+
|
|
1738
|
+
const segmentMayOverlapAggregateRange = async (
|
|
1739
|
+
seg: SegmentRow,
|
|
1740
|
+
startMs: number,
|
|
1741
|
+
endMs: number
|
|
1742
|
+
): Promise<boolean> => {
|
|
1743
|
+
if (usesPrimaryTimestampBounds) {
|
|
1744
|
+
const companionRow = this.db.getSearchSegmentCompanion(stream, seg.segment_index);
|
|
1745
|
+
if (companionRow?.primary_timestamp_min_ms != null && companionRow.primary_timestamp_max_ms != null) {
|
|
1746
|
+
return companionRow.primary_timestamp_max_ms >= BigInt(startMs) && companionRow.primary_timestamp_min_ms < BigInt(endMs);
|
|
1747
|
+
}
|
|
1748
|
+
}
|
|
1749
|
+
return this.segmentMayOverlapTimeRange(stream, seg.segment_index, startMs, endMs, timestampField);
|
|
1750
|
+
};
|
|
1751
|
+
|
|
1752
|
+
const scanMetricsBlockForAggregateResult = async (
|
|
1753
|
+
seg: SegmentRow,
|
|
1754
|
+
companion: MetricsBlockSectionView,
|
|
1755
|
+
scanRanges: Array<{ startMs: number; endMs: number }>
|
|
1756
|
+
): Promise<Result<void, ReaderError>> => {
|
|
1757
|
+
for (const record of companion.records()) {
|
|
1758
|
+
const offsetSeq = seg.start_offset + BigInt(record.doc_id);
|
|
1759
|
+
const timestampMs = record.windowStartMs;
|
|
1760
|
+
const inRange = scanRanges.some((range) => timestampMs >= range.startMs && timestampMs < range.endMs);
|
|
1761
|
+
if (!inRange) continue;
|
|
1762
|
+
const materialized = materializeMetricsBlockRecord(record);
|
|
1763
|
+
if (request.q) {
|
|
1764
|
+
const evalRes = evaluateSearchQueryResult(registry, offsetSeq, request.q, materialized);
|
|
1765
|
+
if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
|
|
1766
|
+
if (!evalRes.value.matched) continue;
|
|
1767
|
+
}
|
|
1768
|
+
const contributionRes = extractRollupContributionResult(registry, rollup, offsetSeq, materialized);
|
|
1769
|
+
if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
|
|
1770
|
+
const contribution = contributionRes.value;
|
|
1771
|
+
if (!contribution) continue;
|
|
1772
|
+
const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
|
|
1773
|
+
mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
|
|
1774
|
+
}
|
|
1775
|
+
indexedSegmentSet.add(seg.segment_index);
|
|
1776
|
+
indexFamiliesUsed.add("mblk");
|
|
1777
|
+
return Result.ok(undefined);
|
|
1778
|
+
};
|
|
1779
|
+
|
|
1780
|
+
for (const seg of this.db.listSegmentsForStream(stream)) {
|
|
1781
|
+
if (seg.segment_index >= coverageState.visiblePublishedSegmentCount) break;
|
|
1782
|
+
let coveredAlignedWindows = false;
|
|
1783
|
+
if (eligibility.eligible && this.index && hasFullWindows) {
|
|
1784
|
+
const overlapsAlignedWindow = await segmentMayOverlapAggregateRange(seg, fullStartMs, fullEndMs);
|
|
1785
|
+
if (overlapsAlignedWindow) {
|
|
1786
|
+
const companion = await this.index.getAggSegmentCompanion(stream, seg.segment_index);
|
|
1787
|
+
const intervalCompanion = companion?.getInterval(request.rollup, intervalMs);
|
|
1788
|
+
if (intervalCompanion) {
|
|
1789
|
+
coveredAlignedWindows = true;
|
|
1790
|
+
indexedSegmentSet.add(seg.segment_index);
|
|
1791
|
+
indexFamiliesUsed.add("agg");
|
|
1792
|
+
usedRollups = true;
|
|
1793
|
+
intervalCompanion.forEachGroupInRange(fullStartMs, fullEndMs, (windowStartMs, group) => {
|
|
1794
|
+
if (!matchesExactFilters(group.dimensions)) return;
|
|
1795
|
+
mergeBucketMeasures(windowStartMs, group.dimensions, group.measures);
|
|
1796
|
+
});
|
|
1797
|
+
}
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
|
|
1801
|
+
const scanRanges =
|
|
1802
|
+
!eligibility.eligible || !hasFullWindows
|
|
1803
|
+
? [{ startMs: fromMs, endMs: toMs }]
|
|
1804
|
+
: coveredAlignedWindows
|
|
1805
|
+
? partialRanges
|
|
1806
|
+
: [{ startMs: fromMs, endMs: toMs }];
|
|
1807
|
+
if (scanRanges.length === 0) continue;
|
|
1808
|
+
let overlaps = false;
|
|
1809
|
+
for (const range of scanRanges) {
|
|
1810
|
+
if (await segmentMayOverlapAggregateRange(seg, range.startMs, range.endMs)) {
|
|
1811
|
+
overlaps = true;
|
|
1812
|
+
break;
|
|
1813
|
+
}
|
|
1814
|
+
}
|
|
1815
|
+
if (!overlaps) continue;
|
|
1816
|
+
let scanRes: Result<void, ReaderError>;
|
|
1817
|
+
if (metricsProfile && this.index) {
|
|
1818
|
+
const companion = await this.index.getMetricsBlockSegmentCompanion(stream, seg.segment_index);
|
|
1819
|
+
if (companion) {
|
|
1820
|
+
scanRes = await scanMetricsBlockForAggregateResult(seg, companion, scanRanges);
|
|
1821
|
+
} else {
|
|
1822
|
+
scanRes = await scanSegmentForAggregateResult(seg, scanRanges);
|
|
1823
|
+
}
|
|
1824
|
+
} else {
|
|
1825
|
+
scanRes = await scanSegmentForAggregateResult(seg, scanRanges);
|
|
1826
|
+
}
|
|
1827
|
+
if (Result.isError(scanRes)) return scanRes;
|
|
1828
|
+
}
|
|
1829
|
+
|
|
1830
|
+
const tailStart = srow.sealed_through + 1n;
|
|
1831
|
+
const tailEnd = srow.next_offset - 1n;
|
|
1832
|
+
if (coverageState.canSearchWalTail && tailStart <= tailEnd) {
|
|
1833
|
+
for (const record of this.db.iterWalRange(stream, tailStart, tailEnd)) {
|
|
1834
|
+
scannedTailDocs += 1;
|
|
1835
|
+
const parsedRes = decodeJsonPayloadResult(this.registry, stream, BigInt(record.offset), record.payload);
|
|
1836
|
+
if (Result.isError(parsedRes)) return Result.err({ kind: "internal", message: parsedRes.error.message });
|
|
1837
|
+
const contributionRes = extractRollupContributionResult(registry, rollup, BigInt(record.offset), parsedRes.value);
|
|
1838
|
+
if (Result.isError(contributionRes)) return Result.err({ kind: "internal", message: contributionRes.error.message });
|
|
1839
|
+
const contribution = contributionRes.value;
|
|
1840
|
+
if (!contribution || contribution.timestampMs < fromMs || contribution.timestampMs >= toMs) continue;
|
|
1841
|
+
if (request.q) {
|
|
1842
|
+
const evalRes = evaluateSearchQueryResult(registry, BigInt(record.offset), request.q, parsedRes.value);
|
|
1843
|
+
if (Result.isError(evalRes)) return Result.err({ kind: "internal", message: evalRes.error.message });
|
|
1844
|
+
if (!evalRes.value.matched) continue;
|
|
1845
|
+
}
|
|
1846
|
+
const bucketStartMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
|
|
1847
|
+
mergeBucketMeasures(bucketStartMs, contribution.dimensions, contribution.measures);
|
|
1848
|
+
}
|
|
1849
|
+
}
|
|
1850
|
+
|
|
1851
|
+
const bucketList = Array.from(buckets.entries())
|
|
1852
|
+
.sort((a, b) => a[0] - b[0])
|
|
1853
|
+
.map(([startMs, groups]) => ({
|
|
1854
|
+
start: new Date(startMs).toISOString(),
|
|
1855
|
+
end: new Date(startMs + intervalMs).toISOString(),
|
|
1856
|
+
groups: Array.from(groups.values())
|
|
1857
|
+
.sort((a, b) => JSON.stringify(a.key).localeCompare(JSON.stringify(b.key)))
|
|
1858
|
+
.map((group) => ({
|
|
1859
|
+
key: group.key,
|
|
1860
|
+
measures: Object.fromEntries(
|
|
1861
|
+
Object.entries(group.measures)
|
|
1862
|
+
.sort((a, b) => a[0].localeCompare(b[0]))
|
|
1863
|
+
.map(([name, state]) => [name, formatAggMeasureState(state)])
|
|
1864
|
+
),
|
|
1865
|
+
})),
|
|
1866
|
+
}));
|
|
1867
|
+
|
|
1868
|
+
return Result.ok({
|
|
1869
|
+
stream,
|
|
1870
|
+
rollup: request.rollup,
|
|
1871
|
+
from: new Date(fromMs).toISOString(),
|
|
1872
|
+
to: new Date(toMs).toISOString(),
|
|
1873
|
+
interval: request.interval,
|
|
1874
|
+
coverage: {
|
|
1875
|
+
mode: coverageState.mode,
|
|
1876
|
+
complete: coverageState.complete,
|
|
1877
|
+
streamHeadOffset: coverageState.streamHeadOffset,
|
|
1878
|
+
visibleThroughOffset: coverageState.visibleThroughOffset,
|
|
1879
|
+
visibleThroughPrimaryTimestampMax: coverageState.visibleThroughPrimaryTimestampMax,
|
|
1880
|
+
oldestOmittedAppendAt: coverageState.oldestOmittedAppendAt,
|
|
1881
|
+
possibleMissingEventsUpperBound: coverageState.possibleMissingEventsUpperBound,
|
|
1882
|
+
possibleMissingUploadedSegments: coverageState.possibleMissingUploadedSegments,
|
|
1883
|
+
possibleMissingSealedRows: coverageState.possibleMissingSealedRows,
|
|
1884
|
+
possibleMissingWalRows: coverageState.possibleMissingWalRows,
|
|
1885
|
+
usedRollups,
|
|
1886
|
+
indexedSegments: indexedSegmentSet.size,
|
|
1887
|
+
scannedSegments: scannedSegmentSet.size,
|
|
1888
|
+
scannedTailDocs,
|
|
1889
|
+
indexFamiliesUsed: Array.from(indexFamiliesUsed).sort(),
|
|
1890
|
+
},
|
|
1891
|
+
buckets: bucketList,
|
|
1892
|
+
});
|
|
1893
|
+
} catch (e: unknown) {
|
|
1894
|
+
return Result.err({ kind: "internal", message: errorMessage(e) });
|
|
1895
|
+
} finally {
|
|
1896
|
+
leaveAggregatePhase?.();
|
|
1897
|
+
}
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
async aggregate(args: { stream: string; request: AggregateRequest }): Promise<AggregateResultBatch> {
|
|
1901
|
+
const res = await this.aggregateResult(args);
|
|
1902
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
1903
|
+
return res.value;
|
|
1904
|
+
}
|
|
1905
|
+
|
|
1906
|
+
private async scanSegmentReverseForSearchResult(
|
|
1907
|
+
stream: string,
|
|
1908
|
+
seg: SegmentRow,
|
|
1909
|
+
exactCandidateInfo: SegmentCandidateInfo,
|
|
1910
|
+
cursorFieldBound: SearchCursorFieldBound | null,
|
|
1911
|
+
columnClauses: SearchColumnClause[],
|
|
1912
|
+
ftsClauses: SearchFtsClause[],
|
|
1913
|
+
rangeStartSeq: bigint,
|
|
1914
|
+
rangeEndSeq: bigint,
|
|
1915
|
+
state: {
|
|
1916
|
+
indexFamiliesUsed: Set<string>;
|
|
1917
|
+
collectSearchMatchResult: (offsetSeq: bigint, payload: Uint8Array) => Result<void, ReaderError>;
|
|
1918
|
+
deadline: number | null;
|
|
1919
|
+
isTimedOut: () => boolean;
|
|
1920
|
+
setTimedOut: (next: boolean) => void;
|
|
1921
|
+
stopIfPageComplete: () => boolean;
|
|
1922
|
+
addIndexedSegment: () => void;
|
|
1923
|
+
addScannedSegment: () => void;
|
|
1924
|
+
addIndexedSegmentTimeMs: (deltaMs: number) => void;
|
|
1925
|
+
addFtsSectionGetMs: (deltaMs: number) => void;
|
|
1926
|
+
addFtsDecodeMs: (deltaMs: number) => void;
|
|
1927
|
+
addFtsClauseEstimateMs: (deltaMs: number) => void;
|
|
1928
|
+
addScannedSegmentTimeMs: (deltaMs: number) => void;
|
|
1929
|
+
}
|
|
1930
|
+
): Promise<Result<void, ReaderError>> {
|
|
1931
|
+
const segmentStartedAt = Date.now();
|
|
1932
|
+
const markTimedOutIfNeeded = (): boolean => {
|
|
1933
|
+
if (state.deadline == null || Date.now() < state.deadline) return false;
|
|
1934
|
+
state.setTimedOut(true);
|
|
1935
|
+
return true;
|
|
1936
|
+
};
|
|
1937
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1938
|
+
if (
|
|
1939
|
+
exactCandidateInfo.segments &&
|
|
1940
|
+
seg.segment_index < exactCandidateInfo.indexedThrough &&
|
|
1941
|
+
!exactCandidateInfo.segments.has(seg.segment_index)
|
|
1942
|
+
) {
|
|
1943
|
+
return Result.ok(undefined);
|
|
1944
|
+
}
|
|
1945
|
+
if (cursorFieldBound) {
|
|
1946
|
+
const overlapsCursor = await this.segmentMayOverlapSearchCursor(stream, seg.segment_index, cursorFieldBound);
|
|
1947
|
+
if (!overlapsCursor) {
|
|
1948
|
+
state.indexFamiliesUsed.add("col");
|
|
1949
|
+
state.addIndexedSegment();
|
|
1950
|
+
state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
1951
|
+
return Result.ok(undefined);
|
|
1952
|
+
}
|
|
1953
|
+
}
|
|
1954
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1955
|
+
|
|
1956
|
+
const familyCandidatesRes = await this.resolveSearchFamilyCandidatesResult(
|
|
1957
|
+
stream,
|
|
1958
|
+
seg.segment_index,
|
|
1959
|
+
columnClauses,
|
|
1960
|
+
ftsClauses,
|
|
1961
|
+
{
|
|
1962
|
+
addFtsSectionGetMs: state.addFtsSectionGetMs,
|
|
1963
|
+
addFtsDecodeMs: state.addFtsDecodeMs,
|
|
1964
|
+
addFtsClauseEstimateMs: state.addFtsClauseEstimateMs,
|
|
1965
|
+
}
|
|
1966
|
+
);
|
|
1967
|
+
if (Result.isError(familyCandidatesRes)) return Result.err({ kind: "internal", message: familyCandidatesRes.error.message });
|
|
1968
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1969
|
+
const familyCandidates = familyCandidatesRes.value;
|
|
1970
|
+
if (familyCandidates.docIds && familyCandidates.docIds.size === 0) {
|
|
1971
|
+
if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegment();
|
|
1972
|
+
for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family);
|
|
1973
|
+
if (familyCandidates.usedFamilies.size > 0) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
1974
|
+
return Result.ok(undefined);
|
|
1975
|
+
}
|
|
1976
|
+
const usedIndexedFamilies = familyCandidates.usedFamilies.size > 0;
|
|
1977
|
+
if (familyCandidates.usedFamilies.size > 0) {
|
|
1978
|
+
state.addIndexedSegment();
|
|
1979
|
+
for (const family of familyCandidates.usedFamilies) state.indexFamiliesUsed.add(family);
|
|
1980
|
+
} else {
|
|
1981
|
+
state.addScannedSegment();
|
|
1982
|
+
}
|
|
1983
|
+
|
|
1984
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1985
|
+
const segBytes = await loadSegmentBytes(this.os, seg, this.diskCache, this.retryOpts());
|
|
1986
|
+
if (markTimedOutIfNeeded()) return Result.ok(undefined);
|
|
1987
|
+
const decodedBlocks: Array<{ records: Array<{ payload: Uint8Array }> }> = [];
|
|
1988
|
+
for (const blockRes of iterateBlocksResult(segBytes)) {
|
|
1989
|
+
if (Result.isError(blockRes)) return Result.err({ kind: "internal", message: blockRes.error.message });
|
|
1990
|
+
decodedBlocks.push({ records: blockRes.value.decoded.records });
|
|
1991
|
+
if (markTimedOutIfNeeded()) {
|
|
1992
|
+
if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
1993
|
+
else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
1994
|
+
return Result.ok(undefined);
|
|
1995
|
+
}
|
|
1996
|
+
}
|
|
1997
|
+
|
|
1998
|
+
let blockEndOffset = seg.end_offset;
|
|
1999
|
+
for (let blockIndex = decodedBlocks.length - 1; blockIndex >= 0; blockIndex--) {
|
|
2000
|
+
const decoded = decodedBlocks[blockIndex]!;
|
|
2001
|
+
const blockStartOffset = blockEndOffset - BigInt(decoded.records.length) + 1n;
|
|
2002
|
+
for (let recordIndex = decoded.records.length - 1; recordIndex >= 0; recordIndex--) {
|
|
2003
|
+
const offsetSeq = blockStartOffset + BigInt(recordIndex);
|
|
2004
|
+
if (offsetSeq > rangeEndSeq) continue;
|
|
2005
|
+
if (offsetSeq < rangeStartSeq) {
|
|
2006
|
+
if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2007
|
+
else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2008
|
+
return Result.ok(undefined);
|
|
2009
|
+
}
|
|
2010
|
+
const localDocId = Number(offsetSeq - seg.start_offset);
|
|
2011
|
+
if (!familyCandidates.docIds || familyCandidates.docIds.has(localDocId)) {
|
|
2012
|
+
const matchRes = state.collectSearchMatchResult(offsetSeq, decoded.records[recordIndex]!.payload);
|
|
2013
|
+
if (Result.isError(matchRes)) return matchRes;
|
|
2014
|
+
}
|
|
2015
|
+
if (markTimedOutIfNeeded()) {
|
|
2016
|
+
if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2017
|
+
else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2018
|
+
return Result.ok(undefined);
|
|
2019
|
+
}
|
|
2020
|
+
if (state.stopIfPageComplete()) {
|
|
2021
|
+
if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2022
|
+
else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2023
|
+
return Result.ok(undefined);
|
|
2024
|
+
}
|
|
2025
|
+
}
|
|
2026
|
+
blockEndOffset = blockStartOffset - 1n;
|
|
2027
|
+
}
|
|
2028
|
+
|
|
2029
|
+
if (usedIndexedFamilies) state.addIndexedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2030
|
+
else state.addScannedSegmentTimeMs(Date.now() - segmentStartedAt);
|
|
2031
|
+
return Result.ok(undefined);
|
|
2032
|
+
}
|
|
2033
|
+
|
|
2034
|
+
private async segmentMayOverlapSearchCursor(
|
|
2035
|
+
stream: string,
|
|
2036
|
+
segmentIndex: number,
|
|
2037
|
+
bound: SearchCursorFieldBound
|
|
2038
|
+
): Promise<boolean> {
|
|
2039
|
+
if (!this.index || bound.encoded == null) return true;
|
|
2040
|
+
const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
|
|
2041
|
+
if (!companion) return true;
|
|
2042
|
+
|
|
2043
|
+
if (companion.primaryTimestampField === bound.sort.field && companion.minTimestampMs() != null && companion.maxTimestampMs() != null) {
|
|
2044
|
+
const target = bound.after;
|
|
2045
|
+
if (typeof target !== "bigint") return true;
|
|
2046
|
+
const minMs = companion.minTimestampMs()!;
|
|
2047
|
+
const maxMs = companion.maxTimestampMs()!;
|
|
2048
|
+
return bound.sort.direction === "desc" ? minMs <= target : maxMs >= target;
|
|
2049
|
+
}
|
|
2050
|
+
|
|
2051
|
+
const field = companion.getField(bound.sort.field);
|
|
2052
|
+
if (!field) return true;
|
|
2053
|
+
const minValue = field.minValue();
|
|
2054
|
+
const maxValue = field.maxValue();
|
|
2055
|
+
if (minValue == null || maxValue == null) return true;
|
|
2056
|
+
const boundValue = bound.after;
|
|
2057
|
+
const cmpMin = compareComparableValues(minValue, boundValue);
|
|
2058
|
+
const cmpMax = compareComparableValues(maxValue, boundValue);
|
|
2059
|
+
return bound.sort.direction === "desc" ? cmpMin <= 0 : cmpMax >= 0;
|
|
2060
|
+
}
|
|
2061
|
+
|
|
2062
|
+
private async segmentMayOverlapTimeRange(
|
|
2063
|
+
stream: string,
|
|
2064
|
+
segmentIndex: number,
|
|
2065
|
+
startMs: number,
|
|
2066
|
+
endMs: number,
|
|
2067
|
+
timestampField: string
|
|
2068
|
+
): Promise<boolean> {
|
|
2069
|
+
if (!this.index) return true;
|
|
2070
|
+
const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
|
|
2071
|
+
if (companion && companion.primaryTimestampField === timestampField) {
|
|
2072
|
+
const minMs = companion.minTimestampMs() == null ? null : Number(companion.minTimestampMs());
|
|
2073
|
+
const maxMs = companion.maxTimestampMs() == null ? null : Number(companion.maxTimestampMs());
|
|
2074
|
+
if (Number.isFinite(minMs) && Number.isFinite(maxMs)) {
|
|
2075
|
+
return (maxMs as number) >= startMs && (minMs as number) < endMs;
|
|
2076
|
+
}
|
|
2077
|
+
}
|
|
2078
|
+
const metricsBlock = await this.index.getMetricsBlockSegmentCompanion(stream, segmentIndex);
|
|
2079
|
+
if (!metricsBlock) return true;
|
|
2080
|
+
const minMs = metricsBlock.minWindowStartMs;
|
|
2081
|
+
const maxMs = metricsBlock.maxWindowEndMs;
|
|
2082
|
+
if (!Number.isFinite(minMs) || !Number.isFinite(maxMs)) return true;
|
|
2083
|
+
return (maxMs as number) >= startMs && (minMs as number) < endMs;
|
|
2084
|
+
}
|
|
2085
|
+
|
|
2086
|
+
private async resolveCandidateSegments(
|
|
2087
|
+
stream: string,
|
|
2088
|
+
keyBytes: Uint8Array | null,
|
|
2089
|
+
filter: CompiledReadFilter | null
|
|
2090
|
+
): Promise<SegmentCandidateInfo> {
|
|
2091
|
+
if (!this.index) return { segments: null, indexedThrough: 0 };
|
|
2092
|
+
|
|
2093
|
+
const candidates: IndexCandidate[] = [];
|
|
2094
|
+
if (keyBytes) {
|
|
2095
|
+
const keyCandidate = await this.index.candidateSegmentsForRoutingKey(stream, keyBytes);
|
|
2096
|
+
if (keyCandidate) candidates.push(keyCandidate);
|
|
2097
|
+
}
|
|
2098
|
+
if (filter) {
|
|
2099
|
+
for (const clause of collectPositiveExactFilterClauses(filter)) {
|
|
2100
|
+
const filterCandidate = await this.index.candidateSegmentsForSecondaryIndex(
|
|
2101
|
+
stream,
|
|
2102
|
+
clause.field,
|
|
2103
|
+
utf8Bytes(clause.canonicalValue)
|
|
2104
|
+
);
|
|
2105
|
+
if (filterCandidate) candidates.push(filterCandidate);
|
|
2106
|
+
}
|
|
2107
|
+
}
|
|
2108
|
+
if (candidates.length === 0) return { segments: null, indexedThrough: 0 };
|
|
2109
|
+
|
|
2110
|
+
const indexedThrough = candidates.reduce((min, candidate) => Math.min(min, candidate.indexedThrough), Number.MAX_SAFE_INTEGER);
|
|
2111
|
+
if (!Number.isFinite(indexedThrough) || indexedThrough <= 0) {
|
|
2112
|
+
return { segments: null, indexedThrough: 0 };
|
|
2113
|
+
}
|
|
2114
|
+
|
|
2115
|
+
let intersection: Set<number> | null = null;
|
|
2116
|
+
for (const candidate of candidates) {
|
|
2117
|
+
const covered = new Set<number>();
|
|
2118
|
+
for (const segmentIndex of candidate.segments) {
|
|
2119
|
+
if (segmentIndex < indexedThrough) covered.add(segmentIndex);
|
|
2120
|
+
}
|
|
2121
|
+
if (intersection == null) {
|
|
2122
|
+
intersection = covered;
|
|
2123
|
+
continue;
|
|
2124
|
+
}
|
|
2125
|
+
for (const segmentIndex of Array.from(intersection)) {
|
|
2126
|
+
if (!covered.has(segmentIndex)) intersection.delete(segmentIndex);
|
|
2127
|
+
}
|
|
2128
|
+
}
|
|
2129
|
+
return { segments: intersection ?? new Set<number>(), indexedThrough };
|
|
2130
|
+
}
|
|
2131
|
+
|
|
2132
|
+
private async resolveSearchExactCandidateSegments(stream: string, query: CompiledSearchQuery): Promise<SegmentCandidateInfo> {
|
|
2133
|
+
if (!this.index) return { segments: null, indexedThrough: 0 };
|
|
2134
|
+
const clauses = collectPositiveSearchExactClauses(query);
|
|
2135
|
+
if (clauses.length === 0) return { segments: null, indexedThrough: 0 };
|
|
2136
|
+
|
|
2137
|
+
const candidates: IndexCandidate[] = [];
|
|
2138
|
+
for (const clause of clauses) {
|
|
2139
|
+
const candidate = await this.index.candidateSegmentsForSecondaryIndex(stream, clause.field, utf8Bytes(clause.canonicalValue));
|
|
2140
|
+
if (candidate) candidates.push(candidate);
|
|
2141
|
+
}
|
|
2142
|
+
if (candidates.length === 0) return { segments: null, indexedThrough: 0 };
|
|
2143
|
+
|
|
2144
|
+
const indexedThrough = candidates.reduce((min, candidate) => Math.min(min, candidate.indexedThrough), Number.MAX_SAFE_INTEGER);
|
|
2145
|
+
if (!Number.isFinite(indexedThrough) || indexedThrough <= 0) return { segments: null, indexedThrough: 0 };
|
|
2146
|
+
|
|
2147
|
+
let intersection: Set<number> | null = null;
|
|
2148
|
+
for (const candidate of candidates) {
|
|
2149
|
+
const covered = new Set<number>();
|
|
2150
|
+
for (const segmentIndex of candidate.segments) {
|
|
2151
|
+
if (segmentIndex < indexedThrough) covered.add(segmentIndex);
|
|
2152
|
+
}
|
|
2153
|
+
if (intersection == null) {
|
|
2154
|
+
intersection = covered;
|
|
2155
|
+
continue;
|
|
2156
|
+
}
|
|
2157
|
+
for (const segmentIndex of Array.from(intersection)) {
|
|
2158
|
+
if (!covered.has(segmentIndex)) intersection.delete(segmentIndex);
|
|
2159
|
+
}
|
|
2160
|
+
}
|
|
2161
|
+
return { segments: intersection ?? new Set<number>(), indexedThrough };
|
|
2162
|
+
}
|
|
2163
|
+
|
|
2164
|
+
private async resolveColumnCandidateDocIdsResult(
|
|
2165
|
+
stream: string,
|
|
2166
|
+
segmentIndex: number,
|
|
2167
|
+
clauses: ReadFilterColumnClause[]
|
|
2168
|
+
): Promise<Result<Set<number> | null, { message: string }>> {
|
|
2169
|
+
if (!this.index || clauses.length === 0) return Result.ok(null);
|
|
2170
|
+
const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
|
|
2171
|
+
if (!companion) return Result.ok(null);
|
|
2172
|
+
|
|
2173
|
+
let intersection: Set<number> | null = null;
|
|
2174
|
+
for (const clause of clauses) {
|
|
2175
|
+
const clauseRes = filterDocIdsByColumnResult({
|
|
2176
|
+
companion,
|
|
2177
|
+
field: clause.field,
|
|
2178
|
+
op: clause.op,
|
|
2179
|
+
value: clause.compareValue,
|
|
2180
|
+
});
|
|
2181
|
+
if (Result.isError(clauseRes)) return Result.ok(null);
|
|
2182
|
+
if (intersection == null) {
|
|
2183
|
+
intersection = clauseRes.value;
|
|
2184
|
+
continue;
|
|
2185
|
+
}
|
|
2186
|
+
for (const docId of Array.from(intersection)) {
|
|
2187
|
+
if (!clauseRes.value.has(docId)) intersection.delete(docId);
|
|
2188
|
+
}
|
|
2189
|
+
if (intersection.size === 0) break;
|
|
2190
|
+
}
|
|
2191
|
+
return Result.ok(intersection ?? new Set<number>());
|
|
2192
|
+
}
|
|
2193
|
+
|
|
2194
|
+
private async resolveSearchColumnCandidateDocIdsResult(
|
|
2195
|
+
stream: string,
|
|
2196
|
+
segmentIndex: number,
|
|
2197
|
+
clauses: SearchColumnClause[]
|
|
2198
|
+
): Promise<Result<Set<number> | null, { message: string }>> {
|
|
2199
|
+
if (!this.index || clauses.length === 0) return Result.ok(null);
|
|
2200
|
+
const companion = await this.index.getColSegmentCompanion(stream, segmentIndex);
|
|
2201
|
+
if (!companion) return Result.ok(null);
|
|
2202
|
+
|
|
2203
|
+
let intersection: Set<number> | null = null;
|
|
2204
|
+
for (const clause of clauses) {
|
|
2205
|
+
const clauseRes = filterDocIdsByColumnResult({
|
|
2206
|
+
companion,
|
|
2207
|
+
field: clause.field,
|
|
2208
|
+
op: clause.op,
|
|
2209
|
+
value: clause.compareValue,
|
|
2210
|
+
});
|
|
2211
|
+
if (Result.isError(clauseRes)) return Result.ok(null);
|
|
2212
|
+
if (intersection == null) {
|
|
2213
|
+
intersection = clauseRes.value;
|
|
2214
|
+
continue;
|
|
2215
|
+
}
|
|
2216
|
+
for (const docId of Array.from(intersection)) {
|
|
2217
|
+
if (!clauseRes.value.has(docId)) intersection.delete(docId);
|
|
2218
|
+
}
|
|
2219
|
+
if (intersection.size === 0) break;
|
|
2220
|
+
}
|
|
2221
|
+
return Result.ok(intersection ?? new Set<number>());
|
|
2222
|
+
}
|
|
2223
|
+
|
|
2224
|
+
private async resolveSearchFtsCandidateDocIdsResult(
|
|
2225
|
+
stream: string,
|
|
2226
|
+
segmentIndex: number,
|
|
2227
|
+
clauses: SearchFtsClause[],
|
|
2228
|
+
stats?: {
|
|
2229
|
+
addFtsSectionGetMs?: (deltaMs: number) => void;
|
|
2230
|
+
addFtsDecodeMs?: (deltaMs: number) => void;
|
|
2231
|
+
addFtsClauseEstimateMs?: (deltaMs: number) => void;
|
|
2232
|
+
}
|
|
2233
|
+
): Promise<Result<Set<number> | null, { message: string }>> {
|
|
2234
|
+
if (!this.index || clauses.length === 0) return Result.ok(null);
|
|
2235
|
+
const companionRes = this.index.getFtsSegmentCompanionWithStats
|
|
2236
|
+
? await this.index.getFtsSegmentCompanionWithStats(stream, segmentIndex)
|
|
2237
|
+
: { companion: await this.index.getFtsSegmentCompanion(stream, segmentIndex), stats: { sectionGetMs: 0, decodeMs: 0 } };
|
|
2238
|
+
stats?.addFtsSectionGetMs?.(companionRes.stats.sectionGetMs);
|
|
2239
|
+
stats?.addFtsDecodeMs?.(companionRes.stats.decodeMs);
|
|
2240
|
+
const companion = companionRes.companion;
|
|
2241
|
+
if (!companion) return Result.ok(null);
|
|
2242
|
+
const clausesRes = filterDocIdsByFtsClausesResult({
|
|
2243
|
+
companion,
|
|
2244
|
+
clauses,
|
|
2245
|
+
onEstimateMs: (deltaMs) => {
|
|
2246
|
+
stats?.addFtsClauseEstimateMs?.(deltaMs);
|
|
2247
|
+
},
|
|
2248
|
+
});
|
|
2249
|
+
if (Result.isError(clausesRes)) return clausesRes;
|
|
2250
|
+
return Result.ok(clausesRes.value);
|
|
2251
|
+
}
|
|
2252
|
+
|
|
2253
|
+
private async resolveSearchFamilyCandidatesResult(
|
|
2254
|
+
stream: string,
|
|
2255
|
+
segmentIndex: number,
|
|
2256
|
+
columnClauses: SearchColumnClause[],
|
|
2257
|
+
ftsClauses: SearchFtsClause[],
|
|
2258
|
+
stats?: {
|
|
2259
|
+
addFtsSectionGetMs?: (deltaMs: number) => void;
|
|
2260
|
+
addFtsDecodeMs?: (deltaMs: number) => void;
|
|
2261
|
+
addFtsClauseEstimateMs?: (deltaMs: number) => void;
|
|
2262
|
+
}
|
|
2263
|
+
): Promise<Result<SearchFamilyCandidateInfo, { message: string }>> {
|
|
2264
|
+
let intersection: Set<number> | null = null;
|
|
2265
|
+
const usedFamilies = new Set<string>();
|
|
2266
|
+
|
|
2267
|
+
if (columnClauses.length > 0) {
|
|
2268
|
+
const columnRes = await this.resolveSearchColumnCandidateDocIdsResult(stream, segmentIndex, columnClauses);
|
|
2269
|
+
if (Result.isError(columnRes)) return columnRes;
|
|
2270
|
+
if (columnRes.value) {
|
|
2271
|
+
intersection = columnRes.value;
|
|
2272
|
+
usedFamilies.add("col");
|
|
2273
|
+
}
|
|
2274
|
+
}
|
|
2275
|
+
|
|
2276
|
+
if (ftsClauses.length > 0) {
|
|
2277
|
+
const ftsRes = await this.resolveSearchFtsCandidateDocIdsResult(stream, segmentIndex, ftsClauses, stats);
|
|
2278
|
+
if (Result.isError(ftsRes)) return ftsRes;
|
|
2279
|
+
if (ftsRes.value) {
|
|
2280
|
+
if (intersection == null) intersection = ftsRes.value;
|
|
2281
|
+
else {
|
|
2282
|
+
for (const docId of Array.from(intersection)) {
|
|
2283
|
+
if (!ftsRes.value.has(docId)) intersection.delete(docId);
|
|
2284
|
+
}
|
|
2285
|
+
}
|
|
2286
|
+
usedFamilies.add("fts");
|
|
2287
|
+
}
|
|
2288
|
+
}
|
|
2289
|
+
|
|
2290
|
+
return Result.ok({ docIds: intersection, usedFamilies });
|
|
2291
|
+
}
|
|
448
2292
|
}
|
|
449
2293
|
|
|
450
2294
|
function bytesEqual(a: Uint8Array, b: Uint8Array): boolean {
|
|
@@ -452,3 +2296,146 @@ function bytesEqual(a: Uint8Array, b: Uint8Array): boolean {
|
|
|
452
2296
|
for (let i = 0; i < a.byteLength; i++) if (a[i] !== b[i]) return false;
|
|
453
2297
|
return true;
|
|
454
2298
|
}
|
|
2299
|
+
|
|
2300
|
+
function buildSearchSortInternalValues(
|
|
2301
|
+
sorts: SearchSortSpec[],
|
|
2302
|
+
fields: Record<string, unknown>,
|
|
2303
|
+
evaluation: SearchEvaluation,
|
|
2304
|
+
offsetSeq: bigint
|
|
2305
|
+
): Array<bigint | number | string | boolean | null> {
|
|
2306
|
+
return sorts.map((sort) => {
|
|
2307
|
+
if (sort.kind === "score") return evaluation.score;
|
|
2308
|
+
if (sort.kind === "offset") return offsetSeq;
|
|
2309
|
+
const rawValue = fields[sort.field];
|
|
2310
|
+
const scalar = Array.isArray(rawValue) ? rawValue[0] : rawValue;
|
|
2311
|
+
if (scalar == null) return null;
|
|
2312
|
+
if (sort.config.kind === "integer" || sort.config.kind === "float" || sort.config.kind === "date" || sort.config.kind === "bool") {
|
|
2313
|
+
return canonicalizeColumnValue(sort.config, scalar);
|
|
2314
|
+
}
|
|
2315
|
+
return canonicalizeExactValue(sort.config, scalar);
|
|
2316
|
+
});
|
|
2317
|
+
}
|
|
2318
|
+
|
|
2319
|
+
function buildSearchSortResponseValues(
|
|
2320
|
+
sorts: SearchSortSpec[],
|
|
2321
|
+
sortInternal: Array<bigint | number | string | boolean | null>,
|
|
2322
|
+
offset: string
|
|
2323
|
+
): unknown[] {
|
|
2324
|
+
return sorts.map((sort, index) => {
|
|
2325
|
+
const value = sortInternal[index];
|
|
2326
|
+
if (sort.kind === "offset") return offset;
|
|
2327
|
+
if (typeof value === "bigint") return Number(value);
|
|
2328
|
+
return value;
|
|
2329
|
+
});
|
|
2330
|
+
}
|
|
2331
|
+
|
|
2332
|
+
function compareComparableValues(left: bigint | number | string | boolean | null, right: bigint | number | string | boolean | null): number {
|
|
2333
|
+
if (left == null && right == null) return 0;
|
|
2334
|
+
if (left == null) return 1;
|
|
2335
|
+
if (right == null) return -1;
|
|
2336
|
+
if (typeof left === "bigint" && typeof right === "bigint") return left < right ? -1 : left > right ? 1 : 0;
|
|
2337
|
+
if (typeof left === "number" && typeof right === "number") return left < right ? -1 : left > right ? 1 : 0;
|
|
2338
|
+
if (typeof left === "boolean" && typeof right === "boolean") return left === right ? 0 : left ? 1 : -1;
|
|
2339
|
+
const ls = String(left);
|
|
2340
|
+
const rs = String(right);
|
|
2341
|
+
return ls < rs ? -1 : ls > rs ? 1 : 0;
|
|
2342
|
+
}
|
|
2343
|
+
|
|
2344
|
+
function compareSearchHits(left: SearchHitInternal, right: SearchHitInternal, sorts: SearchSortSpec[]): number {
|
|
2345
|
+
for (let i = 0; i < sorts.length; i++) {
|
|
2346
|
+
const cmp = compareComparableValues(left.sortInternal[i] ?? null, right.sortInternal[i] ?? null);
|
|
2347
|
+
if (cmp === 0) continue;
|
|
2348
|
+
return sorts[i].direction === "asc" ? cmp : -cmp;
|
|
2349
|
+
}
|
|
2350
|
+
return 0;
|
|
2351
|
+
}
|
|
2352
|
+
|
|
2353
|
+
function compareSearchAfterValues(
|
|
2354
|
+
sortInternal: Array<bigint | number | string | boolean | null>,
|
|
2355
|
+
sorts: SearchSortSpec[],
|
|
2356
|
+
searchAfter: unknown[]
|
|
2357
|
+
): number {
|
|
2358
|
+
for (let i = 0; i < sorts.length; i++) {
|
|
2359
|
+
const after = normalizeSearchAfterValue(sorts[i], searchAfter[i]);
|
|
2360
|
+
const cmp = compareComparableValues(sortInternal[i] ?? null, after);
|
|
2361
|
+
if (cmp === 0) continue;
|
|
2362
|
+
return sorts[i].direction === "asc" ? cmp : -cmp;
|
|
2363
|
+
}
|
|
2364
|
+
return 0;
|
|
2365
|
+
}
|
|
2366
|
+
|
|
2367
|
+
function compareEncodedValues(left: Uint8Array, right: Uint8Array): number {
|
|
2368
|
+
const length = Math.min(left.byteLength, right.byteLength);
|
|
2369
|
+
for (let i = 0; i < length; i++) {
|
|
2370
|
+
if (left[i] === right[i]) continue;
|
|
2371
|
+
return left[i]! < right[i]! ? -1 : 1;
|
|
2372
|
+
}
|
|
2373
|
+
if (left.byteLength === right.byteLength) return 0;
|
|
2374
|
+
return left.byteLength < right.byteLength ? -1 : 1;
|
|
2375
|
+
}
|
|
2376
|
+
|
|
2377
|
+
function encodeSearchCursorValue(sort: Extract<SearchSortSpec, { kind: "field" }>, value: bigint | number | string | boolean | null): Uint8Array | null {
|
|
2378
|
+
if (value == null) return null;
|
|
2379
|
+
if (sort.config.kind === "integer" || sort.config.kind === "date") {
|
|
2380
|
+
return typeof value === "bigint" ? encodeSortableInt64(value) : null;
|
|
2381
|
+
}
|
|
2382
|
+
if (sort.config.kind === "float") {
|
|
2383
|
+
return typeof value === "number" ? encodeSortableFloat64(value) : null;
|
|
2384
|
+
}
|
|
2385
|
+
if (sort.config.kind === "bool") {
|
|
2386
|
+
return typeof value === "boolean" ? encodeSortableBool(value) : null;
|
|
2387
|
+
}
|
|
2388
|
+
return null;
|
|
2389
|
+
}
|
|
2390
|
+
|
|
2391
|
+
function resolveSearchCursorFieldBound(request: SearchRequest): SearchCursorFieldBound | null {
|
|
2392
|
+
if (!request.searchAfter || request.searchAfter.length === 0) return null;
|
|
2393
|
+
const leadingSort = request.sort[0];
|
|
2394
|
+
if (!leadingSort || leadingSort.kind !== "field") return null;
|
|
2395
|
+
if (
|
|
2396
|
+
leadingSort.config.kind !== "integer" &&
|
|
2397
|
+
leadingSort.config.kind !== "float" &&
|
|
2398
|
+
leadingSort.config.kind !== "date" &&
|
|
2399
|
+
leadingSort.config.kind !== "bool"
|
|
2400
|
+
) {
|
|
2401
|
+
return null;
|
|
2402
|
+
}
|
|
2403
|
+
const after = normalizeSearchAfterValue(leadingSort, request.searchAfter[0]);
|
|
2404
|
+
return {
|
|
2405
|
+
kind: "field",
|
|
2406
|
+
sort: leadingSort,
|
|
2407
|
+
after,
|
|
2408
|
+
encoded: encodeSearchCursorValue(leadingSort, after),
|
|
2409
|
+
};
|
|
2410
|
+
}
|
|
2411
|
+
|
|
2412
|
+
function normalizeSearchAfterValue(sort: SearchSortSpec, raw: unknown): bigint | number | string | boolean | null {
|
|
2413
|
+
if (raw == null) return null;
|
|
2414
|
+
if (sort.kind === "offset") {
|
|
2415
|
+
if (typeof raw !== "string") return null;
|
|
2416
|
+
const parsed = parseOffsetResult(raw);
|
|
2417
|
+
if (Result.isError(parsed)) return null;
|
|
2418
|
+
return offsetToSeqOrNeg1(parsed.value);
|
|
2419
|
+
}
|
|
2420
|
+
if (sort.kind === "score") {
|
|
2421
|
+
return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
|
|
2422
|
+
}
|
|
2423
|
+
if (sort.config.kind === "integer" || sort.config.kind === "date") {
|
|
2424
|
+
if (typeof raw === "number" && Number.isFinite(raw)) return BigInt(Math.trunc(raw));
|
|
2425
|
+
if (typeof raw === "string" && raw.trim() !== "") {
|
|
2426
|
+
try {
|
|
2427
|
+
return BigInt(raw.trim());
|
|
2428
|
+
} catch {
|
|
2429
|
+
return null;
|
|
2430
|
+
}
|
|
2431
|
+
}
|
|
2432
|
+
return null;
|
|
2433
|
+
}
|
|
2434
|
+
if (sort.config.kind === "float") return typeof raw === "number" && Number.isFinite(raw) ? raw : null;
|
|
2435
|
+
if (sort.config.kind === "bool") return typeof raw === "boolean" ? raw : null;
|
|
2436
|
+
return typeof raw === "string" ? raw : null;
|
|
2437
|
+
}
|
|
2438
|
+
|
|
2439
|
+
function compareSearchAfter(hit: SearchHitInternal, sorts: SearchSortSpec[], searchAfter: unknown[]): number {
|
|
2440
|
+
return compareSearchAfterValues(hit.sortInternal, sorts, searchAfter);
|
|
2441
|
+
}
|