@tungthedev/streams-server 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +45 -0
- package/CONTRIBUTING.md +76 -0
- package/LICENSE +201 -0
- package/README.md +58 -0
- package/SECURITY.md +42 -0
- package/bin/prisma-streams-server +2 -0
- package/package.json +46 -0
- package/src/app.ts +583 -0
- package/src/app_core.ts +3144 -0
- package/src/app_local.ts +206 -0
- package/src/auth.ts +124 -0
- package/src/auto_tune.ts +69 -0
- package/src/backpressure.ts +66 -0
- package/src/bootstrap.ts +613 -0
- package/src/compute/demo_entry.ts +415 -0
- package/src/compute/demo_site.ts +1242 -0
- package/src/compute/entry.ts +19 -0
- package/src/compute/package_entry.ts +4 -0
- package/src/compute/virtual-modules.d.ts +15 -0
- package/src/compute/worker_module_url.ts +9 -0
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +402 -0
- package/src/db/bootstrap_store.ts +9 -0
- package/src/db/db.ts +2424 -0
- package/src/db/schema.ts +925 -0
- package/src/db/sqlite_manifest_snapshot.ts +81 -0
- package/src/db/sqlite_touch_store.ts +491 -0
- package/src/db/sqlite_wal_store.ts +472 -0
- package/src/details/full_mode_details.ts +568 -0
- package/src/expiry_sweeper.ts +47 -0
- package/src/foreground_activity.ts +55 -0
- package/src/hist.ts +169 -0
- package/src/index/binary_fuse.ts +379 -0
- package/src/index/indexer.ts +947 -0
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +863 -0
- package/src/index/run_cache.ts +84 -0
- package/src/index/run_format.ts +213 -0
- package/src/index/schedule.ts +28 -0
- package/src/index/secondary_indexer.ts +901 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +309 -0
- package/src/lens/lens.ts +501 -0
- package/src/manifest.ts +249 -0
- package/src/memory.ts +334 -0
- package/src/metrics.ts +147 -0
- package/src/metrics_emitter.ts +83 -0
- package/src/notifier.ts +180 -0
- package/src/objectstore/accounting.ts +151 -0
- package/src/objectstore/interface.ts +13 -0
- package/src/objectstore/mock_r2.ts +269 -0
- package/src/objectstore/null.ts +32 -0
- package/src/objectstore/r2.ts +318 -0
- package/src/observe/pairing.ts +61 -0
- package/src/observe/request.ts +772 -0
- package/src/offset.ts +70 -0
- package/src/postgres/bootstrap.ts +269 -0
- package/src/postgres/companions.ts +197 -0
- package/src/postgres/control_restore.ts +109 -0
- package/src/postgres/details.ts +189 -0
- package/src/postgres/lexicon_index.ts +260 -0
- package/src/postgres/routing_index.ts +189 -0
- package/src/postgres/rows.ts +132 -0
- package/src/postgres/schema.ts +355 -0
- package/src/postgres/secondary_index.ts +238 -0
- package/src/postgres/segments.ts +900 -0
- package/src/postgres/stats.ts +103 -0
- package/src/postgres/store.ts +947 -0
- package/src/postgres/touch.ts +591 -0
- package/src/postgres/types.ts +32 -0
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +473 -0
- package/src/profiles/generic.ts +51 -0
- package/src/profiles/index.ts +237 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +83 -0
- package/src/profiles/otelTraces/normalize.ts +955 -0
- package/src/profiles/otelTraces/otlp.ts +1002 -0
- package/src/profiles/otelTraces/schema.ts +408 -0
- package/src/profiles/otelTraces.ts +390 -0
- package/src/profiles/profile.ts +284 -0
- package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
- package/src/profiles/stateProtocol/changes.ts +24 -0
- package/src/profiles/stateProtocol/ingest.ts +115 -0
- package/src/profiles/stateProtocol/routes.ts +511 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +107 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2986 -0
- package/src/runtime/hash.ts +156 -0
- package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
- package/src/runtime/hash_vendor/NOTICE.md +8 -0
- package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +237 -0
- package/src/schema/lens_schema.ts +290 -0
- package/src/schema/proof.ts +547 -0
- package/src/schema/read_json.ts +51 -0
- package/src/schema/registry.ts +966 -0
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +409 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +327 -0
- package/src/search/companion_manager.ts +1305 -0
- package/src/search/companion_plan.ts +229 -0
- package/src/search/exact_format.ts +281 -0
- package/src/search/exact_runtime.ts +55 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +270 -0
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +403 -0
- package/src/segment/segmenter.ts +412 -0
- package/src/segment/segmenter_worker.ts +72 -0
- package/src/segment/segmenter_workers.ts +130 -0
- package/src/server.ts +264 -0
- package/src/server_auto_tune.ts +158 -0
- package/src/sqlite/adapter.ts +335 -0
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +205 -0
- package/src/store/append.ts +50 -0
- package/src/store/bootstrap_restore_store.ts +71 -0
- package/src/store/capabilities.ts +86 -0
- package/src/store/full_mode_details_store.ts +71 -0
- package/src/store/index_store.ts +104 -0
- package/src/store/profile_touch_store.ts +1 -0
- package/src/store/rows.ts +144 -0
- package/src/store/schema_profile_store.ts +73 -0
- package/src/store/schema_publication.ts +6 -0
- package/src/store/segment_manifest_store.ts +129 -0
- package/src/store/segment_read_store.ts +22 -0
- package/src/store/stats_accounting_store.ts +83 -0
- package/src/store/touch_store.ts +98 -0
- package/src/store/wal_store.ts +21 -0
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_keys.ts +158 -0
- package/src/touch/live_metrics.ts +841 -0
- package/src/touch/live_templates.ts +449 -0
- package/src/touch/manager.ts +1292 -0
- package/src/touch/process_batch.ts +576 -0
- package/src/touch/processor_worker.ts +85 -0
- package/src/touch/spec.ts +459 -0
- package/src/touch/touch_journal.ts +771 -0
- package/src/touch/touch_key_id.ts +20 -0
- package/src/touch/worker_pool.ts +191 -0
- package/src/touch/worker_protocol.ts +57 -0
- package/src/types/proper-lockfile.d.ts +1 -0
- package/src/uploader.ts +358 -0
- package/src/util/base32_crockford.ts +81 -0
- package/src/util/bloom256.ts +67 -0
- package/src/util/byte_lru.ts +73 -0
- package/src/util/cleanup.ts +22 -0
- package/src/util/crc32c.ts +29 -0
- package/src/util/ds_error.ts +15 -0
- package/src/util/duration.ts +17 -0
- package/src/util/endian.ts +53 -0
- package/src/util/json_pointer.ts +148 -0
- package/src/util/log.ts +25 -0
- package/src/util/lru.ts +53 -0
- package/src/util/retry.ts +35 -0
- package/src/util/siphash.ts +71 -0
- package/src/util/stream_paths.ts +50 -0
- package/src/util/time.ts +14 -0
- package/src/util/yield.ts +3 -0
- package/src/util/zstd.ts +24 -0
|
@@ -0,0 +1,1305 @@
|
|
|
1
|
+
import { randomBytes } from "node:crypto";
|
|
2
|
+
import { Result } from "better-result";
|
|
3
|
+
import type { Config } from "../config";
|
|
4
|
+
import type { SearchCompanionPlanRow, SearchSegmentCompanionRow, SegmentRow } from "../store/rows";
|
|
5
|
+
import type { SearchCompanionIndexStore } from "../store/index_store";
|
|
6
|
+
import type { Metrics } from "../metrics";
|
|
7
|
+
import type { ObjectStore } from "../objectstore/interface";
|
|
8
|
+
import { SchemaRegistryStore, type SchemaRegistry, type SearchFieldConfig } from "../schema/registry";
|
|
9
|
+
import { SegmentDiskCache } from "../segment/cache";
|
|
10
|
+
import { loadSegmentBytesCached } from "../segment/cached_segment";
|
|
11
|
+
import { iterateBlockRecordsResult } from "../segment/format";
|
|
12
|
+
import { dsError } from "../util/ds_error.ts";
|
|
13
|
+
import { RuntimeMemorySampler } from "../runtime_memory_sampler";
|
|
14
|
+
import { ConcurrencyGate } from "../concurrency_gate";
|
|
15
|
+
import type { ForegroundActivityTracker } from "../foreground_activity";
|
|
16
|
+
import { LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS, shouldDeferEnqueuedIndexWork, shouldWaitForLowMemoryIndexQuiet } from "../index/schedule";
|
|
17
|
+
import { retry } from "../util/retry";
|
|
18
|
+
import { yieldToEventLoop } from "../util/yield";
|
|
19
|
+
import { searchCompanionObjectKey, streamHash16Hex } from "../util/stream_paths";
|
|
20
|
+
import { buildDesiredSearchCompanionPlan, hashSearchCompanionPlan, type SearchCompanionPlan } from "./companion_plan";
|
|
21
|
+
import {
|
|
22
|
+
PSCIX2_MAX_TOC_BYTES,
|
|
23
|
+
decodeCompanionSectionPayloadResult,
|
|
24
|
+
decodeBundledSegmentCompanionResult,
|
|
25
|
+
decodeBundledSegmentCompanionTocResult,
|
|
26
|
+
encodeBundledSegmentCompanionFromPayloads,
|
|
27
|
+
encodeCompanionSectionPayload,
|
|
28
|
+
type BundledSegmentCompanion,
|
|
29
|
+
type CompanionSectionKind,
|
|
30
|
+
type CompanionSectionInputMap,
|
|
31
|
+
type CompanionSectionMap,
|
|
32
|
+
type CompanionToc,
|
|
33
|
+
type EncodedCompanionSectionPayload,
|
|
34
|
+
} from "./companion_format";
|
|
35
|
+
import { CompanionFileCache } from "./companion_file_cache";
|
|
36
|
+
import type { ColFieldInput, ColScalar, ColSectionInput, ColSectionView } from "./col_format";
|
|
37
|
+
import {
|
|
38
|
+
analyzeTextValue,
|
|
39
|
+
canonicalizeExactValue,
|
|
40
|
+
canonicalizeColumnValue,
|
|
41
|
+
extractRawSearchValuesForFieldsResult,
|
|
42
|
+
normalizeKeywordValue,
|
|
43
|
+
} from "./schema";
|
|
44
|
+
import type { ExactFieldInput, ExactSectionInput, ExactSectionView } from "./exact_format";
|
|
45
|
+
import type { FtsFieldInput, FtsSectionInput, FtsSectionView, FtsTermInput } from "./fts_format";
|
|
46
|
+
import { buildMetricsBlockRecord } from "../profiles/metrics/normalize";
|
|
47
|
+
import type { MetricsBlockSectionInput, MetricsBlockSectionView } from "../profiles/metrics/block_format";
|
|
48
|
+
import { parseDurationMsResult } from "../util/duration";
|
|
49
|
+
import {
|
|
50
|
+
cloneAggMeasureState,
|
|
51
|
+
extractRollupContributionResult,
|
|
52
|
+
mergeAggMeasureState,
|
|
53
|
+
rollupRequiredFieldNames,
|
|
54
|
+
} from "./aggregate";
|
|
55
|
+
import type { AggMeasureState, AggSectionInput, AggWindowGroup, AggSectionView } from "./agg_format";
|
|
56
|
+
import type { SearchRollupConfig } from "../schema/registry";
|
|
57
|
+
import type { CompanionSectionLookupStats } from "../index/indexer";
|
|
58
|
+
|
|
59
|
+
type CompanionBuildError = { kind: "invalid_companion_build"; message: string };
|
|
60
|
+
|
|
61
|
+
function invalidCompanionBuild<T = never>(message: string): Result<T, CompanionBuildError> {
|
|
62
|
+
return Result.err({ kind: "invalid_companion_build", message });
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
function errorMessage(error: unknown): string {
|
|
66
|
+
return String((error as any)?.message ?? error);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
type ColumnFieldBuilder = {
|
|
70
|
+
config: SearchFieldConfig;
|
|
71
|
+
kind: ColFieldInput["kind"];
|
|
72
|
+
docIds: number[];
|
|
73
|
+
values: ColScalar[];
|
|
74
|
+
invalid: boolean;
|
|
75
|
+
};
|
|
76
|
+
|
|
77
|
+
type FtsFieldBuilder = {
|
|
78
|
+
config: SearchFieldConfig;
|
|
79
|
+
companion: FtsFieldInput;
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
type ExactFieldBuilder = {
|
|
83
|
+
config: SearchFieldConfig;
|
|
84
|
+
companion: ExactFieldInput;
|
|
85
|
+
};
|
|
86
|
+
|
|
87
|
+
type GroupBuilder = {
|
|
88
|
+
key: string;
|
|
89
|
+
measures: Record<string, AggMeasureState>;
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
type MetricsBlockBuilder = {
|
|
93
|
+
records: MetricsBlockSectionInput["records"];
|
|
94
|
+
minWindowStartMs: number | undefined;
|
|
95
|
+
maxWindowEndMs: number | undefined;
|
|
96
|
+
};
|
|
97
|
+
|
|
98
|
+
type AggRollupBuilder = {
|
|
99
|
+
rollup: SearchRollupConfig;
|
|
100
|
+
intervalsMs: number[];
|
|
101
|
+
intervalMap: Map<number, Map<number, Map<string, GroupBuilder>>>;
|
|
102
|
+
dimensionNames: string[];
|
|
103
|
+
fieldNames: string[];
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
type CompanionBuildProgress = {
|
|
107
|
+
docCount: number;
|
|
108
|
+
colFields: number;
|
|
109
|
+
colValues: number;
|
|
110
|
+
exactFields: number;
|
|
111
|
+
exactTerms: number;
|
|
112
|
+
exactPostings: number;
|
|
113
|
+
ftsFields: number;
|
|
114
|
+
ftsTerms: number;
|
|
115
|
+
ftsPostings: number;
|
|
116
|
+
ftsPositions: number;
|
|
117
|
+
aggRollups: number;
|
|
118
|
+
aggWindows: number;
|
|
119
|
+
aggGroups: number;
|
|
120
|
+
metricRecords: number;
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
const PAYLOAD_DECODER = new TextDecoder();
|
|
124
|
+
|
|
125
|
+
function compareValues(left: bigint | number | boolean, right: bigint | number | boolean): number {
|
|
126
|
+
if (typeof left === "bigint" && typeof right === "bigint") return left < right ? -1 : left > right ? 1 : 0;
|
|
127
|
+
if (typeof left === "number" && typeof right === "number") return left < right ? -1 : left > right ? 1 : 0;
|
|
128
|
+
if (typeof left === "boolean" && typeof right === "boolean") return left === right ? 0 : left ? 1 : -1;
|
|
129
|
+
return String(left).localeCompare(String(right));
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
const AGG_DIMENSION_SEPARATOR = "\u001f";
|
|
133
|
+
const AGG_DIMENSION_NULL = "\u0000";
|
|
134
|
+
|
|
135
|
+
function encodeAggDimensionPart(value: string | null): string {
|
|
136
|
+
if (value == null) return AGG_DIMENSION_NULL;
|
|
137
|
+
return value.replaceAll(AGG_DIMENSION_SEPARATOR, `${AGG_DIMENSION_SEPARATOR}${AGG_DIMENSION_SEPARATOR}`);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function decodeAggDimensionPart(value: string): string | null {
|
|
141
|
+
if (value === AGG_DIMENSION_NULL) return null;
|
|
142
|
+
return value.replaceAll(`${AGG_DIMENSION_SEPARATOR}${AGG_DIMENSION_SEPARATOR}`, AGG_DIMENSION_SEPARATOR);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
function encodeAggGroupKey(dimensions: Record<string, string | null>, dimensionNames: string[]): string {
|
|
146
|
+
return dimensionNames.map((name) => encodeAggDimensionPart(dimensions[name] ?? null)).join(AGG_DIMENSION_SEPARATOR);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
function decodeAggGroupKey(groupKey: string, dimensionNames: string[]): Record<string, string | null> {
|
|
150
|
+
const parts: string[] = [];
|
|
151
|
+
let current = "";
|
|
152
|
+
for (let index = 0; index < groupKey.length; index++) {
|
|
153
|
+
const char = groupKey[index]!;
|
|
154
|
+
if (char !== AGG_DIMENSION_SEPARATOR) {
|
|
155
|
+
current += char;
|
|
156
|
+
continue;
|
|
157
|
+
}
|
|
158
|
+
const next = groupKey[index + 1];
|
|
159
|
+
if (next === AGG_DIMENSION_SEPARATOR) {
|
|
160
|
+
current += AGG_DIMENSION_SEPARATOR;
|
|
161
|
+
index += 1;
|
|
162
|
+
continue;
|
|
163
|
+
}
|
|
164
|
+
parts.push(current);
|
|
165
|
+
current = "";
|
|
166
|
+
}
|
|
167
|
+
parts.push(current);
|
|
168
|
+
const decoded: Record<string, string | null> = {};
|
|
169
|
+
for (let index = 0; index < dimensionNames.length; index++) {
|
|
170
|
+
decoded[dimensionNames[index]!] = decodeAggDimensionPart(parts[index] ?? AGG_DIMENSION_NULL);
|
|
171
|
+
}
|
|
172
|
+
return decoded;
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function parseSectionKinds(row: SearchSegmentCompanionRow): Set<CompanionSectionKind> {
|
|
176
|
+
try {
|
|
177
|
+
const parsed = JSON.parse(row.sections_json);
|
|
178
|
+
if (!Array.isArray(parsed)) return new Set();
|
|
179
|
+
return new Set(
|
|
180
|
+
parsed.filter(
|
|
181
|
+
(value): value is CompanionSectionKind => value === "exact" || value === "col" || value === "fts" || value === "agg" || value === "mblk"
|
|
182
|
+
)
|
|
183
|
+
);
|
|
184
|
+
} catch {
|
|
185
|
+
return new Set();
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
function parseSectionSizes(row: SearchSegmentCompanionRow): Record<string, number> {
|
|
190
|
+
try {
|
|
191
|
+
const parsed = JSON.parse(row.section_sizes_json);
|
|
192
|
+
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return {};
|
|
193
|
+
const out: Record<string, number> = {};
|
|
194
|
+
for (const [kind, size] of Object.entries(parsed)) {
|
|
195
|
+
if (typeof size === "number" && Number.isFinite(size) && size > 0) out[kind] = size;
|
|
196
|
+
}
|
|
197
|
+
return out;
|
|
198
|
+
} catch {
|
|
199
|
+
return {};
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
export class SearchCompanionManager {
|
|
204
|
+
private readonly queue = new Set<string>();
|
|
205
|
+
private readonly building = new Set<string>();
|
|
206
|
+
private readonly fileCache: CompanionFileCache;
|
|
207
|
+
private readonly decodedSectionCache = new Map<
|
|
208
|
+
string,
|
|
209
|
+
{ bytes: number; companion: CompanionSectionMap[CompanionSectionKind] }
|
|
210
|
+
>();
|
|
211
|
+
private decodedSectionCacheBytes = 0;
|
|
212
|
+
private readonly segmentCache?: SegmentDiskCache;
|
|
213
|
+
private readonly yieldBlocks: number;
|
|
214
|
+
private readonly memorySampler?: RuntimeMemorySampler;
|
|
215
|
+
private readonly asyncGate: ConcurrencyGate;
|
|
216
|
+
private readonly foregroundActivity?: ForegroundActivityTracker;
|
|
217
|
+
private timer: any | null = null;
|
|
218
|
+
private wakeTimer: any | null = null;
|
|
219
|
+
private running = false;
|
|
220
|
+
private stopped = false;
|
|
221
|
+
private tickPromise: Promise<void> | null = null;
|
|
222
|
+
private firstQueuedAtMs: number | null = null;
|
|
223
|
+
|
|
224
|
+
constructor(
|
|
225
|
+
private readonly cfg: Config,
|
|
226
|
+
private readonly db: SearchCompanionIndexStore,
|
|
227
|
+
private readonly os: ObjectStore,
|
|
228
|
+
private readonly registry: SchemaRegistryStore,
|
|
229
|
+
segmentCache?: SegmentDiskCache,
|
|
230
|
+
private readonly publishManifest?: (stream: string) => Promise<void>,
|
|
231
|
+
private readonly onMetadataChanged?: (stream: string) => void,
|
|
232
|
+
private readonly metrics?: Metrics,
|
|
233
|
+
memorySampler?: RuntimeMemorySampler,
|
|
234
|
+
asyncGate?: ConcurrencyGate,
|
|
235
|
+
foregroundActivity?: ForegroundActivityTracker
|
|
236
|
+
) {
|
|
237
|
+
this.yieldBlocks = Math.max(1, cfg.searchCompanionYieldBlocks);
|
|
238
|
+
this.segmentCache = segmentCache;
|
|
239
|
+
this.memorySampler = memorySampler;
|
|
240
|
+
this.asyncGate = asyncGate ?? new ConcurrencyGate(1);
|
|
241
|
+
this.foregroundActivity = foregroundActivity;
|
|
242
|
+
this.fileCache = new CompanionFileCache(
|
|
243
|
+
`${cfg.rootDir}/cache/companions`,
|
|
244
|
+
cfg.searchCompanionFileCacheMaxBytes,
|
|
245
|
+
cfg.searchCompanionFileCacheMaxAgeMs,
|
|
246
|
+
cfg.searchCompanionMappedCacheEntries
|
|
247
|
+
);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
private async yieldBackgroundWork(): Promise<void> {
|
|
251
|
+
if (this.foregroundActivity) {
|
|
252
|
+
await this.foregroundActivity.yieldBackgroundWork();
|
|
253
|
+
return;
|
|
254
|
+
}
|
|
255
|
+
await yieldToEventLoop();
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
start(): void {
|
|
259
|
+
if (this.timer) return;
|
|
260
|
+
this.stopped = false;
|
|
261
|
+
this.timer = setInterval(() => {
|
|
262
|
+
if (!this.stopped) this.runTick();
|
|
263
|
+
}, this.cfg.indexCheckIntervalMs);
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
async stop(): Promise<void> {
|
|
267
|
+
this.stopped = true;
|
|
268
|
+
if (this.timer) clearInterval(this.timer);
|
|
269
|
+
if (this.wakeTimer) clearTimeout(this.wakeTimer);
|
|
270
|
+
this.timer = null;
|
|
271
|
+
this.wakeTimer = null;
|
|
272
|
+
while (this.tickPromise) await this.tickPromise;
|
|
273
|
+
this.firstQueuedAtMs = null;
|
|
274
|
+
this.fileCache.clearMapped();
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
enqueue(stream: string): void {
|
|
278
|
+
if (this.stopped) return;
|
|
279
|
+
if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now();
|
|
280
|
+
this.queue.add(stream);
|
|
281
|
+
if (shouldDeferEnqueuedIndexWork(this.cfg)) {
|
|
282
|
+
this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS);
|
|
283
|
+
return;
|
|
284
|
+
}
|
|
285
|
+
this.scheduleTick();
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
private scheduleTick(delayMs = 0): void {
|
|
289
|
+
if (this.stopped || !this.timer || this.wakeTimer) return;
|
|
290
|
+
this.wakeTimer = setTimeout(() => {
|
|
291
|
+
this.wakeTimer = null;
|
|
292
|
+
if (this.stopped) return;
|
|
293
|
+
if (
|
|
294
|
+
shouldWaitForLowMemoryIndexQuiet(
|
|
295
|
+
this.cfg,
|
|
296
|
+
this.firstQueuedAtMs,
|
|
297
|
+
this.foregroundActivity?.wasActiveWithin(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS) ?? false
|
|
298
|
+
)
|
|
299
|
+
) {
|
|
300
|
+
this.scheduleTick(LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS);
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
if (this.running) {
|
|
304
|
+
this.scheduleTick(250);
|
|
305
|
+
return;
|
|
306
|
+
}
|
|
307
|
+
this.runTick();
|
|
308
|
+
}, delayMs);
|
|
309
|
+
(this.wakeTimer as { unref?: () => void }).unref?.();
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
private runTick(): void {
|
|
313
|
+
if (this.tickPromise) return;
|
|
314
|
+
const promise = this.tick()
|
|
315
|
+
.catch((e) => {
|
|
316
|
+
const lower = errorMessage(e).toLowerCase();
|
|
317
|
+
const shutdownError =
|
|
318
|
+
lower.includes("database has closed") ||
|
|
319
|
+
lower.includes("closed database") ||
|
|
320
|
+
lower.includes("statement has finalized") ||
|
|
321
|
+
lower.includes("disk i/o error");
|
|
322
|
+
if (!this.stopped || !shutdownError) {
|
|
323
|
+
// eslint-disable-next-line no-console
|
|
324
|
+
console.error("bundled companion tick failed", e);
|
|
325
|
+
}
|
|
326
|
+
})
|
|
327
|
+
.finally(() => {
|
|
328
|
+
if (this.tickPromise === promise) this.tickPromise = null;
|
|
329
|
+
});
|
|
330
|
+
this.tickPromise = promise;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async getColSegmentCompanion(stream: string, segmentIndex: number): Promise<ColSectionView | null> {
|
|
334
|
+
return (await this.getSectionCompanion(stream, segmentIndex, "col")) ?? null;
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
async getExactSegmentCompanion(stream: string, segmentIndex: number): Promise<ExactSectionView | null> {
|
|
338
|
+
return (await this.getSectionCompanion(stream, segmentIndex, "exact")) ?? null;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
async getFtsSegmentCompanion(stream: string, segmentIndex: number): Promise<FtsSectionView | null> {
|
|
342
|
+
return (await this.getFtsSegmentCompanionWithStats(stream, segmentIndex)).companion;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
async getFtsSegmentCompanionWithStats(
|
|
346
|
+
stream: string,
|
|
347
|
+
segmentIndex: number
|
|
348
|
+
): Promise<{ companion: FtsSectionView | null; stats: CompanionSectionLookupStats }> {
|
|
349
|
+
const result = await this.getSectionCompanionWithStats(stream, segmentIndex, "fts");
|
|
350
|
+
return { companion: result.companion ?? null, stats: result.stats };
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
async getAggSegmentCompanion(stream: string, segmentIndex: number): Promise<AggSectionView | null> {
|
|
354
|
+
return (await this.getSectionCompanion(stream, segmentIndex, "agg")) ?? null;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
async getMetricsBlockSegmentCompanion(stream: string, segmentIndex: number): Promise<MetricsBlockSectionView | null> {
|
|
358
|
+
return (await this.getSectionCompanion(stream, segmentIndex, "mblk")) ?? null;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
getLocalCacheBytes(stream: string): number {
|
|
362
|
+
return this.fileCache.bytesForObjectKeyPrefix(`streams/${streamHash16Hex(stream)}/segments/`);
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
getMemoryStats(): {
|
|
366
|
+
fileCacheBytes: number;
|
|
367
|
+
fileCacheEntries: number;
|
|
368
|
+
mappedFileBytes: number;
|
|
369
|
+
mappedFileEntries: number;
|
|
370
|
+
pinnedFileEntries: number;
|
|
371
|
+
} {
|
|
372
|
+
const stats = this.fileCache.stats();
|
|
373
|
+
return {
|
|
374
|
+
fileCacheBytes: stats.usedBytes,
|
|
375
|
+
fileCacheEntries: stats.entryCount,
|
|
376
|
+
mappedFileBytes: stats.mappedBytes,
|
|
377
|
+
mappedFileEntries: stats.mappedEntryCount,
|
|
378
|
+
pinnedFileEntries: stats.pinnedEntryCount,
|
|
379
|
+
};
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
private async getSectionCompanion<K extends CompanionSectionKind>(
|
|
383
|
+
stream: string,
|
|
384
|
+
segmentIndex: number,
|
|
385
|
+
kind: K
|
|
386
|
+
): Promise<CompanionSectionMap[K] | null> {
|
|
387
|
+
return (await this.getSectionCompanionWithStats(stream, segmentIndex, kind)).companion;
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
private async getSectionCompanionWithStats<K extends CompanionSectionKind>(
|
|
391
|
+
stream: string,
|
|
392
|
+
segmentIndex: number,
|
|
393
|
+
kind: K
|
|
394
|
+
): Promise<{ companion: CompanionSectionMap[K] | null; stats: CompanionSectionLookupStats }> {
|
|
395
|
+
const leave = this.memorySampler?.enter("companion_read", { stream, segment_index: segmentIndex, kind });
|
|
396
|
+
try {
|
|
397
|
+
let sectionGetMs = 0;
|
|
398
|
+
let decodeMs = 0;
|
|
399
|
+
const planRow = await this.getCurrentPlanRow(stream);
|
|
400
|
+
if (!planRow) return { companion: null, stats: { sectionGetMs, decodeMs } };
|
|
401
|
+
const row = await this.db.getSearchSegmentCompanion(stream, segmentIndex);
|
|
402
|
+
if (!row || row.plan_generation !== planRow.generation) return { companion: null, stats: { sectionGetMs, decodeMs } };
|
|
403
|
+
if (!parseSectionKinds(row).has(kind)) return { companion: null, stats: { sectionGetMs, decodeMs } };
|
|
404
|
+
const cacheKey = this.decodedSectionCacheKey(row, kind);
|
|
405
|
+
const cached = this.getDecodedSectionCache(cacheKey);
|
|
406
|
+
if (cached) return { companion: cached as CompanionSectionMap[K], stats: { sectionGetMs, decodeMs } };
|
|
407
|
+
const sectionStartedAt = Date.now();
|
|
408
|
+
const bundle = await this.loadBundleResult(row);
|
|
409
|
+
if (Result.isError(bundle)) throw dsError(bundle.error.message);
|
|
410
|
+
const plan = this.parsePlanRowResult(planRow);
|
|
411
|
+
if (Result.isError(plan)) throw dsError(plan.error.message);
|
|
412
|
+
const sectionBytes = this.sectionPayloadResult(bundle.value.bytes, bundle.value.toc, row.object_key, kind);
|
|
413
|
+
if (Result.isError(sectionBytes)) throw dsError(sectionBytes.error.message);
|
|
414
|
+
sectionGetMs = Date.now() - sectionStartedAt;
|
|
415
|
+
const decodeStartedAt = Date.now();
|
|
416
|
+
const decoded = decodeCompanionSectionPayloadResult(kind, sectionBytes.value, plan.value);
|
|
417
|
+
if (Result.isError(decoded)) throw dsError(decoded.error.message);
|
|
418
|
+
decodeMs = Date.now() - decodeStartedAt;
|
|
419
|
+
this.setDecodedSectionCache(cacheKey, decoded.value ?? null, parseSectionSizes(row)[kind] ?? sectionBytes.value.byteLength);
|
|
420
|
+
return { companion: decoded.value ?? null, stats: { sectionGetMs, decodeMs } };
|
|
421
|
+
} finally {
|
|
422
|
+
leave?.();
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
private decodedSectionCacheKey(row: SearchSegmentCompanionRow, kind: CompanionSectionKind): string {
|
|
427
|
+
return `${row.object_key}:${row.plan_generation}:${kind}`;
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
private getDecodedSectionCache(key: string): CompanionSectionMap[CompanionSectionKind] | null {
|
|
431
|
+
const entry = this.decodedSectionCache.get(key);
|
|
432
|
+
if (!entry) return null;
|
|
433
|
+
this.decodedSectionCache.delete(key);
|
|
434
|
+
this.decodedSectionCache.set(key, entry);
|
|
435
|
+
return entry.companion;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
private setDecodedSectionCache(
|
|
439
|
+
key: string,
|
|
440
|
+
companion: CompanionSectionMap[CompanionSectionKind] | null,
|
|
441
|
+
bytes: number
|
|
442
|
+
): void {
|
|
443
|
+
const budget = Math.max(0, this.cfg.searchCompanionSectionCacheBytes);
|
|
444
|
+
if (budget <= 0 || companion == null) return;
|
|
445
|
+
const safeBytes = Math.max(1, Math.ceil(bytes));
|
|
446
|
+
if (safeBytes > budget) return;
|
|
447
|
+
const existing = this.decodedSectionCache.get(key);
|
|
448
|
+
if (existing) {
|
|
449
|
+
this.decodedSectionCacheBytes -= existing.bytes;
|
|
450
|
+
this.decodedSectionCache.delete(key);
|
|
451
|
+
}
|
|
452
|
+
this.decodedSectionCache.set(key, { bytes: safeBytes, companion });
|
|
453
|
+
this.decodedSectionCacheBytes += safeBytes;
|
|
454
|
+
while (this.decodedSectionCacheBytes > budget) {
|
|
455
|
+
const oldestKey = this.decodedSectionCache.keys().next().value;
|
|
456
|
+
if (oldestKey == null) break;
|
|
457
|
+
const oldest = this.decodedSectionCache.get(oldestKey);
|
|
458
|
+
this.decodedSectionCache.delete(oldestKey);
|
|
459
|
+
this.decodedSectionCacheBytes -= oldest?.bytes ?? 0;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
private async getCurrentPlanRow(stream: string): Promise<SearchCompanionPlanRow | null> {
|
|
464
|
+
const regRes = await this.registry.getRegistryResult(stream);
|
|
465
|
+
if (Result.isError(regRes)) return null;
|
|
466
|
+
const desiredPlan = buildDesiredSearchCompanionPlan(regRes.value);
|
|
467
|
+
const desiredHash = hashSearchCompanionPlan(desiredPlan);
|
|
468
|
+
const current = await this.db.getSearchCompanionPlan(stream);
|
|
469
|
+
if (current && current.plan_hash === desiredHash) return current;
|
|
470
|
+
return null;
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
private parsePlanRowResult(planRow: SearchCompanionPlanRow): Result<SearchCompanionPlan, CompanionBuildError> {
|
|
474
|
+
try {
|
|
475
|
+
const parsed = JSON.parse(planRow.plan_json) as SearchCompanionPlan;
|
|
476
|
+
if (!parsed || !parsed.families || !Array.isArray(parsed.fields) || !Array.isArray(parsed.rollups)) {
|
|
477
|
+
return invalidCompanionBuild("invalid bundled companion plan json");
|
|
478
|
+
}
|
|
479
|
+
return Result.ok(parsed);
|
|
480
|
+
} catch (e: unknown) {
|
|
481
|
+
return invalidCompanionBuild(String((e as any)?.message ?? e));
|
|
482
|
+
}
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
private async loadBundleResult(
|
|
486
|
+
row: SearchSegmentCompanionRow
|
|
487
|
+
): Promise<Result<{ bytes: Uint8Array; toc: CompanionToc }, CompanionBuildError>> {
|
|
488
|
+
if (row.size_bytes <= 0) return invalidCompanionBuild(`invalid .cix size for ${row.object_key}`);
|
|
489
|
+
const bundleRes = await this.fileCache.loadMappedBundleResult({
|
|
490
|
+
objectKey: row.object_key,
|
|
491
|
+
expectedSize: row.size_bytes,
|
|
492
|
+
loadBytes: async () =>
|
|
493
|
+
retry(
|
|
494
|
+
async () => {
|
|
495
|
+
const data = await this.os.get(row.object_key);
|
|
496
|
+
if (!data) throw dsError(`missing .cix object ${row.object_key}`);
|
|
497
|
+
return data;
|
|
498
|
+
},
|
|
499
|
+
{
|
|
500
|
+
retries: this.cfg.objectStoreRetries,
|
|
501
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
502
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
503
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
504
|
+
}
|
|
505
|
+
),
|
|
506
|
+
decodeToc: (bytes) => {
|
|
507
|
+
const tocRes = decodeBundledSegmentCompanionTocResult(bytes.subarray(0, Math.min(bytes.byteLength, PSCIX2_MAX_TOC_BYTES)));
|
|
508
|
+
if (Result.isError(tocRes)) return Result.err({ message: tocRes.error.message });
|
|
509
|
+
return Result.ok(tocRes.value);
|
|
510
|
+
},
|
|
511
|
+
});
|
|
512
|
+
if (Result.isError(bundleRes)) return invalidCompanionBuild(bundleRes.error.message);
|
|
513
|
+
return Result.ok({ bytes: bundleRes.value.bytes, toc: bundleRes.value.toc });
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
private sectionPayloadResult(
|
|
517
|
+
bytes: Uint8Array,
|
|
518
|
+
toc: CompanionToc,
|
|
519
|
+
objectKey: string,
|
|
520
|
+
kind: CompanionSectionKind
|
|
521
|
+
): Result<Uint8Array, CompanionBuildError> {
|
|
522
|
+
const section = toc.sections.find((entry) => entry.kind === kind);
|
|
523
|
+
if (!section) return invalidCompanionBuild(`missing ${kind} section in ${objectKey}`);
|
|
524
|
+
if (section.offset < 0 || section.length < 0 || section.offset + section.length > bytes.byteLength) {
|
|
525
|
+
return invalidCompanionBuild(`invalid ${kind} section bounds in ${objectKey}`);
|
|
526
|
+
}
|
|
527
|
+
return Result.ok(bytes.subarray(section.offset, section.offset + section.length));
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
private async tick(): Promise<void> {
|
|
531
|
+
if (this.running || this.stopped) return;
|
|
532
|
+
this.running = true;
|
|
533
|
+
try {
|
|
534
|
+
if (this.metrics) {
|
|
535
|
+
this.metrics.record("tieredstore.companion.build.queue_len", this.queue.size, "count");
|
|
536
|
+
this.metrics.record("tieredstore.companion.builds_inflight", this.building.size, "count");
|
|
537
|
+
}
|
|
538
|
+
const streams = Array.from(new Set([...(await this.db.listSearchCompanionPlanStreams()), ...this.queue]));
|
|
539
|
+
this.queue.clear();
|
|
540
|
+
for (const stream of streams) {
|
|
541
|
+
if (this.stopped) break;
|
|
542
|
+
try {
|
|
543
|
+
const buildRes = await this.buildPendingSegmentsResult(stream);
|
|
544
|
+
if (Result.isError(buildRes)) {
|
|
545
|
+
console.error("bundled companion build failed", stream, buildRes.error.message);
|
|
546
|
+
this.queue.add(stream);
|
|
547
|
+
}
|
|
548
|
+
} catch (e: unknown) {
|
|
549
|
+
console.error("bundled companion tick failed", stream, e);
|
|
550
|
+
this.queue.add(stream);
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
} finally {
|
|
554
|
+
this.running = false;
|
|
555
|
+
if (!this.stopped && this.queue.size > 0) {
|
|
556
|
+
if (this.firstQueuedAtMs == null) this.firstQueuedAtMs = Date.now();
|
|
557
|
+
this.scheduleTick(shouldDeferEnqueuedIndexWork(this.cfg) ? LOW_MEMORY_INDEX_ENQUEUE_QUIET_MS : 0);
|
|
558
|
+
} else {
|
|
559
|
+
this.firstQueuedAtMs = null;
|
|
560
|
+
}
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
private async buildPendingSegmentsResult(stream: string): Promise<Result<void, CompanionBuildError>> {
|
|
565
|
+
if (this.building.has(stream)) return Result.ok(undefined);
|
|
566
|
+
this.building.add(stream);
|
|
567
|
+
try {
|
|
568
|
+
const regRes = await this.registry.getRegistryResult(stream);
|
|
569
|
+
if (Result.isError(regRes)) return invalidCompanionBuild(regRes.error.message);
|
|
570
|
+
const desiredPlan = buildDesiredSearchCompanionPlan(regRes.value);
|
|
571
|
+
const desiredHash = hashSearchCompanionPlan(desiredPlan);
|
|
572
|
+
const wantedFamilies = Object.values(desiredPlan.families).some(Boolean);
|
|
573
|
+
let planRow = await this.db.getSearchCompanionPlan(stream);
|
|
574
|
+
if (!wantedFamilies) {
|
|
575
|
+
if (planRow) {
|
|
576
|
+
await this.db.deleteSearchSegmentCompanions(stream);
|
|
577
|
+
await this.db.deleteSearchCompanionPlan(stream);
|
|
578
|
+
this.onMetadataChanged?.(stream);
|
|
579
|
+
if (this.publishManifest) {
|
|
580
|
+
try {
|
|
581
|
+
await this.publishManifest(stream);
|
|
582
|
+
} catch {
|
|
583
|
+
// background loop will retry
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
}
|
|
587
|
+
return Result.ok(undefined);
|
|
588
|
+
}
|
|
589
|
+
if (!planRow) {
|
|
590
|
+
await this.db.upsertSearchCompanionPlan(stream, 1, desiredHash, JSON.stringify(desiredPlan));
|
|
591
|
+
planRow = await this.db.getSearchCompanionPlan(stream);
|
|
592
|
+
} else if (planRow.plan_hash !== desiredHash) {
|
|
593
|
+
await this.db.upsertSearchCompanionPlan(stream, planRow.generation + 1, desiredHash, JSON.stringify(desiredPlan));
|
|
594
|
+
planRow = await this.db.getSearchCompanionPlan(stream);
|
|
595
|
+
}
|
|
596
|
+
if (!planRow) return Result.ok(undefined);
|
|
597
|
+
|
|
598
|
+
const uploadedSegments = await this.db.countUploadedSegments(stream);
|
|
599
|
+
const companionRows = await this.db.listSearchSegmentCompanions(stream);
|
|
600
|
+
const companionBySegment = new Map(companionRows.map((row) => [row.segment_index, row]));
|
|
601
|
+
const stale: number[] = [];
|
|
602
|
+
for (let segmentIndex = 0; segmentIndex < uploadedSegments; segmentIndex++) {
|
|
603
|
+
const current = companionBySegment.get(segmentIndex);
|
|
604
|
+
if (!current || current.plan_generation !== planRow.generation) stale.push(segmentIndex);
|
|
605
|
+
}
|
|
606
|
+
if (this.metrics) {
|
|
607
|
+
this.metrics.record("tieredstore.companion.lag.segments", stale.length, "count", undefined, stream);
|
|
608
|
+
}
|
|
609
|
+
if (stale.length === 0) return Result.ok(undefined);
|
|
610
|
+
|
|
611
|
+
const batchLimit = Math.max(1, this.cfg.searchCompanionBuildBatchSegments);
|
|
612
|
+
const batch = stale.slice(0, batchLimit);
|
|
613
|
+
let builtCount = 0;
|
|
614
|
+
for (const nextSegmentIndex of batch) {
|
|
615
|
+
const seg = await this.db.getSegmentByIndex(stream, nextSegmentIndex);
|
|
616
|
+
if (!seg || !seg.r2_etag) continue;
|
|
617
|
+
const startedAt = Date.now();
|
|
618
|
+
const companionRes = await this.asyncGate.run(async () =>
|
|
619
|
+
this.memorySampler
|
|
620
|
+
? await this.memorySampler.track(
|
|
621
|
+
"companion",
|
|
622
|
+
{ stream, segment_index: seg.segment_index, plan_generation: planRow.generation },
|
|
623
|
+
() => this.buildEncodedBundledCompanionResult(regRes.value, desiredPlan, planRow.generation, seg)
|
|
624
|
+
)
|
|
625
|
+
: await this.buildEncodedBundledCompanionResult(regRes.value, desiredPlan, planRow.generation, seg)
|
|
626
|
+
);
|
|
627
|
+
if (Result.isError(companionRes)) return companionRes;
|
|
628
|
+
const objectId = Buffer.from(randomBytes(8)).toString("hex");
|
|
629
|
+
const objectKey = searchCompanionObjectKey(streamHash16Hex(stream), seg.segment_index, objectId);
|
|
630
|
+
const payload = companionRes.value.payload;
|
|
631
|
+
const sectionSizes = companionRes.value.sectionSizes;
|
|
632
|
+
try {
|
|
633
|
+
await retry(
|
|
634
|
+
() => this.os.put(objectKey, payload, { contentLength: payload.byteLength }),
|
|
635
|
+
{
|
|
636
|
+
retries: this.cfg.objectStoreRetries,
|
|
637
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
638
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
639
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
640
|
+
}
|
|
641
|
+
);
|
|
642
|
+
} catch (e: unknown) {
|
|
643
|
+
return invalidCompanionBuild(String((e as any)?.message ?? e));
|
|
644
|
+
}
|
|
645
|
+
const cacheRes = this.fileCache.storeBytesResult(objectKey, payload);
|
|
646
|
+
if (Result.isError(cacheRes)) {
|
|
647
|
+
console.warn("bundled companion local cache populate failed", objectKey, cacheRes.error.message);
|
|
648
|
+
}
|
|
649
|
+
const sectionKinds = companionRes.value.sectionKinds;
|
|
650
|
+
await this.db.upsertSearchSegmentCompanion(
|
|
651
|
+
stream,
|
|
652
|
+
seg.segment_index,
|
|
653
|
+
objectKey,
|
|
654
|
+
planRow.generation,
|
|
655
|
+
JSON.stringify(sectionKinds),
|
|
656
|
+
JSON.stringify(sectionSizes),
|
|
657
|
+
payload.byteLength,
|
|
658
|
+
companionRes.value.primaryTimestampMinMs,
|
|
659
|
+
companionRes.value.primaryTimestampMaxMs
|
|
660
|
+
);
|
|
661
|
+
builtCount += 1;
|
|
662
|
+
if (this.metrics) {
|
|
663
|
+
const elapsedNs = BigInt(Date.now() - startedAt) * 1_000_000n;
|
|
664
|
+
this.metrics.record("tieredstore.companion.build.latency", Number(elapsedNs), "ns", undefined, stream);
|
|
665
|
+
this.metrics.record("tieredstore.companion.objects.built", 1, "count", undefined, stream);
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
if (stale.length > builtCount) this.queue.add(stream);
|
|
670
|
+
if (builtCount === 0) return Result.ok(undefined);
|
|
671
|
+
|
|
672
|
+
this.onMetadataChanged?.(stream);
|
|
673
|
+
if (this.publishManifest) {
|
|
674
|
+
try {
|
|
675
|
+
await this.publishManifest(stream);
|
|
676
|
+
} catch (e: unknown) {
|
|
677
|
+
console.error("bundled companion manifest publish failed", stream, e);
|
|
678
|
+
// background loop will retry
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
return Result.ok(undefined);
|
|
682
|
+
} finally {
|
|
683
|
+
this.building.delete(stream);
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
|
|
687
|
+
private async loadSegmentBytesResult(seg: SegmentRow): Promise<Result<Uint8Array, CompanionBuildError>> {
|
|
688
|
+
try {
|
|
689
|
+
const bytes = await loadSegmentBytesCached(
|
|
690
|
+
this.os,
|
|
691
|
+
seg,
|
|
692
|
+
this.segmentCache,
|
|
693
|
+
{
|
|
694
|
+
retries: this.cfg.objectStoreRetries,
|
|
695
|
+
baseDelayMs: this.cfg.objectStoreBaseDelayMs,
|
|
696
|
+
maxDelayMs: this.cfg.objectStoreMaxDelayMs,
|
|
697
|
+
timeoutMs: this.cfg.objectStoreTimeoutMs,
|
|
698
|
+
}
|
|
699
|
+
);
|
|
700
|
+
return Result.ok(bytes);
|
|
701
|
+
} catch (e: unknown) {
|
|
702
|
+
return invalidCompanionBuild(String((e as any)?.message ?? e));
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
private async visitParsedSegmentRecordsResult(
|
|
707
|
+
segmentBytes: Uint8Array,
|
|
708
|
+
seg: SegmentRow,
|
|
709
|
+
visit: (args: {
|
|
710
|
+
docCount: number;
|
|
711
|
+
offset: bigint;
|
|
712
|
+
parsed: unknown | null;
|
|
713
|
+
parsedOk: boolean;
|
|
714
|
+
}) => Promise<Result<void, CompanionBuildError>>
|
|
715
|
+
): Promise<Result<number, CompanionBuildError>> {
|
|
716
|
+
let docCount = 0;
|
|
717
|
+
let offset = seg.start_offset;
|
|
718
|
+
let processedBlocks = 0;
|
|
719
|
+
let lastBlockOffset = -1;
|
|
720
|
+
for (const recRes of iterateBlockRecordsResult(segmentBytes)) {
|
|
721
|
+
if (Result.isError(recRes)) return invalidCompanionBuild(recRes.error.message);
|
|
722
|
+
const rec = recRes.value;
|
|
723
|
+
if (rec.blockOffset !== lastBlockOffset) {
|
|
724
|
+
processedBlocks += 1;
|
|
725
|
+
lastBlockOffset = rec.blockOffset;
|
|
726
|
+
if (processedBlocks % this.yieldBlocks === 0) await this.yieldBackgroundWork();
|
|
727
|
+
}
|
|
728
|
+
let parsed: unknown = null;
|
|
729
|
+
let parsedOk = false;
|
|
730
|
+
try {
|
|
731
|
+
parsed = JSON.parse(PAYLOAD_DECODER.decode(rec.payload));
|
|
732
|
+
parsedOk = true;
|
|
733
|
+
} catch {
|
|
734
|
+
parsed = null;
|
|
735
|
+
}
|
|
736
|
+
const visitRes = await visit({ docCount, offset, parsed, parsedOk });
|
|
737
|
+
if (Result.isError(visitRes)) return visitRes;
|
|
738
|
+
offset += 1n;
|
|
739
|
+
docCount += 1;
|
|
740
|
+
}
|
|
741
|
+
return Result.ok(docCount);
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
private async buildEncodedBundledCompanionResult(
|
|
745
|
+
registry: SchemaRegistry,
|
|
746
|
+
plan: SearchCompanionPlan,
|
|
747
|
+
planGeneration: number,
|
|
748
|
+
seg: SegmentRow
|
|
749
|
+
): Promise<
|
|
750
|
+
Result<
|
|
751
|
+
{
|
|
752
|
+
payload: Uint8Array;
|
|
753
|
+
sectionKinds: CompanionSectionKind[];
|
|
754
|
+
sectionSizes: Record<string, number>;
|
|
755
|
+
primaryTimestampMinMs: bigint | null;
|
|
756
|
+
primaryTimestampMaxMs: bigint | null;
|
|
757
|
+
},
|
|
758
|
+
CompanionBuildError
|
|
759
|
+
>
|
|
760
|
+
> {
|
|
761
|
+
const leaveLoad = this.memorySampler?.enter("companion_load_segment", {
|
|
762
|
+
stream: seg.stream,
|
|
763
|
+
segment_index: seg.segment_index,
|
|
764
|
+
});
|
|
765
|
+
const bytesRes = await this.loadSegmentBytesResult(seg);
|
|
766
|
+
leaveLoad?.();
|
|
767
|
+
if (Result.isError(bytesRes)) return bytesRes;
|
|
768
|
+
const segmentBytes = bytesRes.value;
|
|
769
|
+
const exactBuilders = plan.families.exact ? this.createExactBuilders(registry) : new Map<string, ExactFieldBuilder>();
|
|
770
|
+
const colBuilders = plan.families.col ? this.createColBuilders(registry) : new Map<string, ColumnFieldBuilder>();
|
|
771
|
+
const ftsBuilders = plan.families.fts ? this.createFtsBuilders(registry) : new Map<string, FtsFieldBuilder>();
|
|
772
|
+
const aggBuildersRes = plan.families.agg ? this.createAggRollupBuildersResult(registry) : Result.ok(new Map<string, AggRollupBuilder>());
|
|
773
|
+
if (Result.isError(aggBuildersRes)) return aggBuildersRes;
|
|
774
|
+
const aggBuilders = aggBuildersRes.value;
|
|
775
|
+
const metricsBuilder: MetricsBlockBuilder | null = plan.families.mblk
|
|
776
|
+
? { records: [], minWindowStartMs: undefined, maxWindowEndMs: undefined }
|
|
777
|
+
: null;
|
|
778
|
+
const requiredFieldNames = new Set<string>();
|
|
779
|
+
for (const fieldName of exactBuilders.keys()) requiredFieldNames.add(fieldName);
|
|
780
|
+
for (const fieldName of colBuilders.keys()) requiredFieldNames.add(fieldName);
|
|
781
|
+
for (const fieldName of ftsBuilders.keys()) requiredFieldNames.add(fieldName);
|
|
782
|
+
for (const builder of aggBuilders.values()) {
|
|
783
|
+
for (const fieldName of builder.fieldNames) requiredFieldNames.add(fieldName);
|
|
784
|
+
}
|
|
785
|
+
const fieldNameList = Array.from(requiredFieldNames).sort((a, b) => a.localeCompare(b));
|
|
786
|
+
const leaveScan = this.memorySampler?.enter("companion_scan_records", {
|
|
787
|
+
stream: seg.stream,
|
|
788
|
+
segment_index: seg.segment_index,
|
|
789
|
+
});
|
|
790
|
+
const docCountRes = await this.visitParsedSegmentRecordsResult(segmentBytes, seg, async ({ docCount, offset, parsed, parsedOk }) => {
|
|
791
|
+
let rawSearchValues: Map<string, unknown[]> | null = null;
|
|
792
|
+
if (parsedOk && fieldNameList.length > 0) {
|
|
793
|
+
const leaveExtract = this.memorySampler?.enter("companion_extract_raw", { doc_count: docCount });
|
|
794
|
+
const rawValuesRes = extractRawSearchValuesForFieldsResult(registry, offset, parsed, fieldNameList);
|
|
795
|
+
leaveExtract?.();
|
|
796
|
+
if (Result.isError(rawValuesRes)) return invalidCompanionBuild(rawValuesRes.error.message);
|
|
797
|
+
rawSearchValues = rawValuesRes.value;
|
|
798
|
+
}
|
|
799
|
+
if (rawSearchValues) {
|
|
800
|
+
const leaveExact = this.memorySampler?.enter("companion_record_exact", { doc_count: docCount });
|
|
801
|
+
this.recordExactBuilders(exactBuilders, rawSearchValues, docCount);
|
|
802
|
+
leaveExact?.();
|
|
803
|
+
const leaveCol = this.memorySampler?.enter("companion_record_col", { doc_count: docCount });
|
|
804
|
+
this.recordColBuilders(colBuilders, rawSearchValues, docCount);
|
|
805
|
+
leaveCol?.();
|
|
806
|
+
const leaveFts = this.memorySampler?.enter("companion_record_fts", { doc_count: docCount });
|
|
807
|
+
this.recordFtsBuilders(ftsBuilders, rawSearchValues, docCount);
|
|
808
|
+
leaveFts?.();
|
|
809
|
+
}
|
|
810
|
+
if (parsedOk && rawSearchValues) {
|
|
811
|
+
const leaveAgg = this.memorySampler?.enter("companion_record_agg", { doc_count: docCount });
|
|
812
|
+
for (const builder of aggBuilders.values()) {
|
|
813
|
+
const contributionRes = extractRollupContributionResult(registry, builder.rollup, offset, parsed, rawSearchValues);
|
|
814
|
+
if (Result.isError(contributionRes)) {
|
|
815
|
+
leaveAgg?.();
|
|
816
|
+
return invalidCompanionBuild(contributionRes.error.message);
|
|
817
|
+
}
|
|
818
|
+
if (!contributionRes.value) continue;
|
|
819
|
+
const recordRes = this.recordAggContributionResult(builder, contributionRes.value);
|
|
820
|
+
if (Result.isError(recordRes)) {
|
|
821
|
+
leaveAgg?.();
|
|
822
|
+
return recordRes;
|
|
823
|
+
}
|
|
824
|
+
}
|
|
825
|
+
leaveAgg?.();
|
|
826
|
+
}
|
|
827
|
+
if (metricsBuilder && parsedOk) {
|
|
828
|
+
const leaveMetrics = this.memorySampler?.enter("companion_record_mblk", { doc_count: docCount });
|
|
829
|
+
this.recordMetricsBlockBuilder(metricsBuilder, parsed, docCount);
|
|
830
|
+
leaveMetrics?.();
|
|
831
|
+
}
|
|
832
|
+
if (this.memorySampler && (docCount + 1) % 1024 === 0) {
|
|
833
|
+
this.memorySampler.capture("companion_progress", {
|
|
834
|
+
stream: seg.stream,
|
|
835
|
+
segment_index: seg.segment_index,
|
|
836
|
+
...this.summarizeCompanionBuildProgress(exactBuilders, colBuilders, ftsBuilders, aggBuilders, metricsBuilder, docCount + 1),
|
|
837
|
+
});
|
|
838
|
+
}
|
|
839
|
+
return Result.ok(undefined);
|
|
840
|
+
});
|
|
841
|
+
leaveScan?.();
|
|
842
|
+
if (Result.isError(docCountRes)) return docCountRes;
|
|
843
|
+
|
|
844
|
+
const sectionPayloads: EncodedCompanionSectionPayload[] = [];
|
|
845
|
+
const sectionKinds: CompanionSectionKind[] = [];
|
|
846
|
+
const sectionSizes: Record<string, number> = {};
|
|
847
|
+
let primaryTimestampMinMs: bigint | null = null;
|
|
848
|
+
let primaryTimestampMaxMs: bigint | null = null;
|
|
849
|
+
const addSection = (payload: EncodedCompanionSectionPayload): void => {
|
|
850
|
+
sectionPayloads.push(payload);
|
|
851
|
+
const kind = payload.kind;
|
|
852
|
+
sectionKinds.push(kind);
|
|
853
|
+
sectionSizes[kind] = payload.payload.byteLength;
|
|
854
|
+
};
|
|
855
|
+
|
|
856
|
+
if (plan.families.exact) {
|
|
857
|
+
const leaveExactEncode = this.memorySampler?.enter("companion_encode_exact", {
|
|
858
|
+
stream: seg.stream,
|
|
859
|
+
segment_index: seg.segment_index,
|
|
860
|
+
doc_count: docCountRes.value,
|
|
861
|
+
});
|
|
862
|
+
addSection(encodeCompanionSectionPayload("exact", this.finalizeExactSection(exactBuilders, docCountRes.value), plan));
|
|
863
|
+
exactBuilders.clear();
|
|
864
|
+
leaveExactEncode?.();
|
|
865
|
+
}
|
|
866
|
+
if (plan.families.col) {
|
|
867
|
+
const leaveColEncode = this.memorySampler?.enter("companion_encode_col", {
|
|
868
|
+
stream: seg.stream,
|
|
869
|
+
segment_index: seg.segment_index,
|
|
870
|
+
doc_count: docCountRes.value,
|
|
871
|
+
});
|
|
872
|
+
const colSection = this.finalizeColSection(registry, colBuilders, docCountRes.value);
|
|
873
|
+
const primaryTimestampField = colSection.primary_timestamp_field;
|
|
874
|
+
const primaryTimestampColumn = primaryTimestampField ? colSection.fields[primaryTimestampField] : undefined;
|
|
875
|
+
primaryTimestampMinMs = typeof primaryTimestampColumn?.min === "bigint" ? primaryTimestampColumn.min : null;
|
|
876
|
+
primaryTimestampMaxMs = typeof primaryTimestampColumn?.max === "bigint" ? primaryTimestampColumn.max : null;
|
|
877
|
+
addSection(encodeCompanionSectionPayload("col", colSection, plan));
|
|
878
|
+
colBuilders.clear();
|
|
879
|
+
leaveColEncode?.();
|
|
880
|
+
}
|
|
881
|
+
if (plan.families.fts) {
|
|
882
|
+
const leaveFtsEncode = this.memorySampler?.enter("companion_encode_fts", {
|
|
883
|
+
stream: seg.stream,
|
|
884
|
+
segment_index: seg.segment_index,
|
|
885
|
+
doc_count: docCountRes.value,
|
|
886
|
+
});
|
|
887
|
+
addSection(encodeCompanionSectionPayload("fts", this.finalizeFtsSection(ftsBuilders, docCountRes.value), plan));
|
|
888
|
+
ftsBuilders.clear();
|
|
889
|
+
leaveFtsEncode?.();
|
|
890
|
+
}
|
|
891
|
+
if (plan.families.agg) {
|
|
892
|
+
const leaveAggEncode = this.memorySampler?.enter("companion_encode_agg", {
|
|
893
|
+
stream: seg.stream,
|
|
894
|
+
segment_index: seg.segment_index,
|
|
895
|
+
});
|
|
896
|
+
addSection(encodeCompanionSectionPayload("agg", this.finalizeAggSection(aggBuilders), plan));
|
|
897
|
+
aggBuilders.clear();
|
|
898
|
+
leaveAggEncode?.();
|
|
899
|
+
}
|
|
900
|
+
if (plan.families.mblk && metricsBuilder) {
|
|
901
|
+
const leaveMetricsEncode = this.memorySampler?.enter("companion_encode_mblk", {
|
|
902
|
+
stream: seg.stream,
|
|
903
|
+
segment_index: seg.segment_index,
|
|
904
|
+
});
|
|
905
|
+
addSection(encodeCompanionSectionPayload("mblk", this.finalizeMetricsBlockSection(metricsBuilder), plan));
|
|
906
|
+
metricsBuilder.records.length = 0;
|
|
907
|
+
leaveMetricsEncode?.();
|
|
908
|
+
}
|
|
909
|
+
|
|
910
|
+
return Result.ok({
|
|
911
|
+
payload: encodeBundledSegmentCompanionFromPayloads({
|
|
912
|
+
stream: seg.stream,
|
|
913
|
+
segment_index: seg.segment_index,
|
|
914
|
+
plan_generation: planGeneration,
|
|
915
|
+
sections: sectionPayloads,
|
|
916
|
+
}),
|
|
917
|
+
sectionKinds,
|
|
918
|
+
sectionSizes,
|
|
919
|
+
primaryTimestampMinMs,
|
|
920
|
+
primaryTimestampMaxMs,
|
|
921
|
+
});
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
private async buildBundledCompanionResult(
|
|
925
|
+
registry: SchemaRegistry,
|
|
926
|
+
plan: SearchCompanionPlan,
|
|
927
|
+
planGeneration: number,
|
|
928
|
+
seg: SegmentRow
|
|
929
|
+
): Promise<Result<BundledSegmentCompanion, CompanionBuildError>> {
|
|
930
|
+
const encodedRes = await this.buildEncodedBundledCompanionResult(registry, plan, planGeneration, seg);
|
|
931
|
+
if (Result.isError(encodedRes)) return encodedRes;
|
|
932
|
+
const decodedRes = decodeBundledSegmentCompanionResult(encodedRes.value.payload, plan);
|
|
933
|
+
if (Result.isError(decodedRes)) return invalidCompanionBuild(decodedRes.error.message);
|
|
934
|
+
return Result.ok(decodedRes.value);
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
private createColBuilders(registry: SchemaRegistry): Map<string, ColumnFieldBuilder> {
|
|
938
|
+
const columnFields = Object.entries(registry.search?.fields ?? {}).filter(([, field]) => field.column === true);
|
|
939
|
+
const builders = new Map<string, ColumnFieldBuilder>();
|
|
940
|
+
for (const [fieldName, field] of columnFields) {
|
|
941
|
+
builders.set(fieldName, { config: field, kind: field.kind, docIds: [], values: [], invalid: false });
|
|
942
|
+
}
|
|
943
|
+
return builders;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
private createExactBuilders(registry: SchemaRegistry): Map<string, ExactFieldBuilder> {
|
|
947
|
+
const builders = new Map<string, ExactFieldBuilder>();
|
|
948
|
+
for (const [fieldName, field] of Object.entries(registry.search?.fields ?? {}).sort((a, b) => a[0].localeCompare(b[0]))) {
|
|
949
|
+
if (field.exact !== true || field.kind === "text") continue;
|
|
950
|
+
builders.set(fieldName, {
|
|
951
|
+
config: field,
|
|
952
|
+
companion: {
|
|
953
|
+
kind: field.kind,
|
|
954
|
+
exists_docs: [],
|
|
955
|
+
terms: Object.create(null) as Record<string, number[]>,
|
|
956
|
+
},
|
|
957
|
+
});
|
|
958
|
+
}
|
|
959
|
+
return builders;
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
private recordExactBuilders(builders: Map<string, ExactFieldBuilder>, rawSearchValues: Map<string, unknown[]>, docCount: number): void {
|
|
963
|
+
for (const [fieldName, builder] of builders) {
|
|
964
|
+
const fieldCompanion = builder.companion;
|
|
965
|
+
let hasValue = false;
|
|
966
|
+
for (const rawValue of rawSearchValues.get(fieldName) ?? []) {
|
|
967
|
+
const canonical = canonicalizeExactValue(builder.config, rawValue);
|
|
968
|
+
if (canonical == null) continue;
|
|
969
|
+
hasValue = true;
|
|
970
|
+
const postings = fieldCompanion.terms[canonical] ?? [];
|
|
971
|
+
if (postings.length === 0 || postings[postings.length - 1] !== docCount) postings.push(docCount);
|
|
972
|
+
fieldCompanion.terms[canonical] = postings;
|
|
973
|
+
}
|
|
974
|
+
if (hasValue) fieldCompanion.exists_docs.push(docCount);
|
|
975
|
+
}
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
private finalizeExactSection(builders: Map<string, ExactFieldBuilder>, docCount: number): ExactSectionInput {
|
|
979
|
+
const orderedFields = Object.create(null) as Record<string, ExactFieldInput>;
|
|
980
|
+
for (const [fieldName, builder] of Array.from(builders.entries()).sort((a, b) => a[0].localeCompare(b[0]))) {
|
|
981
|
+
orderedFields[fieldName] = builder.companion;
|
|
982
|
+
}
|
|
983
|
+
return {
|
|
984
|
+
doc_count: docCount,
|
|
985
|
+
fields: orderedFields,
|
|
986
|
+
};
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
private recordColBuilders(builders: Map<string, ColumnFieldBuilder>, rawSearchValues: Map<string, unknown[]>, docCount: number): void {
|
|
990
|
+
for (const [fieldName, builder] of builders) {
|
|
991
|
+
if (builder.invalid) continue;
|
|
992
|
+
const rawValues = rawSearchValues.get(fieldName) ?? [];
|
|
993
|
+
const colValues: Array<bigint | number | boolean> = [];
|
|
994
|
+
for (const rawValue of rawValues) {
|
|
995
|
+
const normalized = canonicalizeColumnValue(builder.config, rawValue);
|
|
996
|
+
if (normalized != null) colValues.push(normalized);
|
|
997
|
+
}
|
|
998
|
+
if (colValues.length > 1) {
|
|
999
|
+
builder.invalid = true;
|
|
1000
|
+
continue;
|
|
1001
|
+
}
|
|
1002
|
+
if (colValues.length === 1) {
|
|
1003
|
+
builder.docIds.push(docCount);
|
|
1004
|
+
builder.values.push(colValues[0]!);
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
}
|
|
1008
|
+
|
|
1009
|
+
private finalizeColSection(
|
|
1010
|
+
registry: SchemaRegistry,
|
|
1011
|
+
builders: Map<string, ColumnFieldBuilder>,
|
|
1012
|
+
docCount: number
|
|
1013
|
+
): ColSectionInput {
|
|
1014
|
+
const fields: Record<string, ColFieldInput> = {};
|
|
1015
|
+
const primaryTimestampField = registry.search?.primaryTimestampField;
|
|
1016
|
+
for (const [fieldName, builder] of builders) {
|
|
1017
|
+
if (builder.invalid) continue;
|
|
1018
|
+
let minValue: bigint | number | boolean | null = null;
|
|
1019
|
+
let maxValue: bigint | number | boolean | null = null;
|
|
1020
|
+
for (const value of builder.values) {
|
|
1021
|
+
if (minValue == null || compareValues(value, minValue) < 0) minValue = value;
|
|
1022
|
+
if (maxValue == null || compareValues(value, maxValue) > 0) maxValue = value;
|
|
1023
|
+
}
|
|
1024
|
+
if (builder.values.length === 0) continue;
|
|
1025
|
+
fields[fieldName] = {
|
|
1026
|
+
kind: builder.kind,
|
|
1027
|
+
doc_ids: builder.docIds,
|
|
1028
|
+
values: builder.values,
|
|
1029
|
+
min: minValue,
|
|
1030
|
+
max: maxValue,
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
|
|
1034
|
+
return {
|
|
1035
|
+
doc_count: docCount,
|
|
1036
|
+
primary_timestamp_field: primaryTimestampField ?? undefined,
|
|
1037
|
+
fields,
|
|
1038
|
+
};
|
|
1039
|
+
}
|
|
1040
|
+
|
|
1041
|
+
private createFtsFieldBuilder(field: SearchFieldConfig): FtsFieldBuilder {
|
|
1042
|
+
return {
|
|
1043
|
+
config: field,
|
|
1044
|
+
companion: {
|
|
1045
|
+
kind: field.kind,
|
|
1046
|
+
exact: field.exact === true ? true : undefined,
|
|
1047
|
+
prefix: field.prefix === true ? true : undefined,
|
|
1048
|
+
positions: field.positions === true ? true : undefined,
|
|
1049
|
+
exists_docs: [],
|
|
1050
|
+
terms: Object.create(null) as Record<string, FtsTermInput>,
|
|
1051
|
+
},
|
|
1052
|
+
};
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
private createFtsBuilders(registry: SchemaRegistry): Map<string, FtsFieldBuilder> {
|
|
1056
|
+
const builders = new Map<string, FtsFieldBuilder>();
|
|
1057
|
+
for (const [fieldName, field] of Object.entries(registry.search?.fields ?? {}).sort((a, b) => a[0].localeCompare(b[0]))) {
|
|
1058
|
+
if (field.kind !== "text" && !(field.kind === "keyword" && field.prefix === true)) continue;
|
|
1059
|
+
builders.set(fieldName, this.createFtsFieldBuilder(field));
|
|
1060
|
+
}
|
|
1061
|
+
return builders;
|
|
1062
|
+
}
|
|
1063
|
+
|
|
1064
|
+
private recordFtsBuilders(builders: Map<string, FtsFieldBuilder>, rawSearchValues: Map<string, unknown[]>, docCount: number): void {
|
|
1065
|
+
for (const [fieldName, builder] of builders) {
|
|
1066
|
+
const fieldCompanion = builder.companion;
|
|
1067
|
+
const textValues: string[] = [];
|
|
1068
|
+
for (const rawValue of rawSearchValues.get(fieldName) ?? []) {
|
|
1069
|
+
if (builder.config.kind === "keyword") {
|
|
1070
|
+
const normalized = normalizeKeywordValue(rawValue, builder.config.normalizer);
|
|
1071
|
+
if (normalized != null) textValues.push(normalized);
|
|
1072
|
+
} else if (builder.config.kind === "text" && typeof rawValue === "string") {
|
|
1073
|
+
textValues.push(rawValue);
|
|
1074
|
+
}
|
|
1075
|
+
}
|
|
1076
|
+
if (textValues.length === 0) continue;
|
|
1077
|
+
fieldCompanion.exists_docs.push(docCount);
|
|
1078
|
+
if (builder.config.kind === "keyword") {
|
|
1079
|
+
for (const value of textValues) {
|
|
1080
|
+
const postings = fieldCompanion.terms[value] ?? { doc_ids: [] };
|
|
1081
|
+
const docIds = postings.doc_ids;
|
|
1082
|
+
if (docIds.length === 0 || docIds[docIds.length - 1] !== docCount) docIds.push(docCount);
|
|
1083
|
+
fieldCompanion.terms[value] = postings;
|
|
1084
|
+
}
|
|
1085
|
+
continue;
|
|
1086
|
+
}
|
|
1087
|
+
let position = 0;
|
|
1088
|
+
for (const value of textValues) {
|
|
1089
|
+
const tokens = analyzeTextValue(value, builder.config.analyzer);
|
|
1090
|
+
for (const token of tokens) {
|
|
1091
|
+
const postings = fieldCompanion.terms[token] ?? {
|
|
1092
|
+
doc_ids: [],
|
|
1093
|
+
freqs: fieldCompanion.positions ? [] : undefined,
|
|
1094
|
+
positions: fieldCompanion.positions ? [] : undefined,
|
|
1095
|
+
};
|
|
1096
|
+
const docIds = postings.doc_ids;
|
|
1097
|
+
const lastIndex = docIds.length - 1;
|
|
1098
|
+
if (lastIndex < 0 || docIds[lastIndex] !== docCount) {
|
|
1099
|
+
docIds.push(docCount);
|
|
1100
|
+
if (fieldCompanion.positions) {
|
|
1101
|
+
postings.freqs!.push(1);
|
|
1102
|
+
postings.positions!.push(position);
|
|
1103
|
+
}
|
|
1104
|
+
} else if (fieldCompanion.positions) {
|
|
1105
|
+
postings.freqs![lastIndex] = (postings.freqs![lastIndex] ?? 0) + 1;
|
|
1106
|
+
postings.positions!.push(position);
|
|
1107
|
+
}
|
|
1108
|
+
fieldCompanion.terms[token] = postings;
|
|
1109
|
+
position += 1;
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
|
|
1115
|
+
private finalizeFtsSection(
|
|
1116
|
+
builders: Map<string, FtsFieldBuilder>,
|
|
1117
|
+
docCount: number
|
|
1118
|
+
): FtsSectionInput {
|
|
1119
|
+
const orderedFields = Object.create(null) as Record<string, FtsFieldInput>;
|
|
1120
|
+
for (const [fieldName, builder] of Array.from(builders.entries()).sort((a, b) => a[0].localeCompare(b[0]))) {
|
|
1121
|
+
orderedFields[fieldName] = builder.companion;
|
|
1122
|
+
}
|
|
1123
|
+
return {
|
|
1124
|
+
doc_count: docCount,
|
|
1125
|
+
fields: orderedFields,
|
|
1126
|
+
};
|
|
1127
|
+
}
|
|
1128
|
+
|
|
1129
|
+
private createAggRollupBuildersResult(registry: SchemaRegistry): Result<Map<string, AggRollupBuilder>, CompanionBuildError> {
|
|
1130
|
+
const builders = new Map<string, AggRollupBuilder>();
|
|
1131
|
+
for (const [rollupName, rollup] of Object.entries(registry.search?.rollups ?? {}).sort((a, b) => a[0].localeCompare(b[0]))) {
|
|
1132
|
+
const parsedIntervalsRes = this.parseRollupIntervalsResult(rollup);
|
|
1133
|
+
if (Result.isError(parsedIntervalsRes)) return parsedIntervalsRes;
|
|
1134
|
+
const intervalMap = new Map<number, Map<number, Map<string, GroupBuilder>>>();
|
|
1135
|
+
for (const intervalMs of parsedIntervalsRes.value) intervalMap.set(intervalMs, new Map());
|
|
1136
|
+
builders.set(rollupName, {
|
|
1137
|
+
rollup,
|
|
1138
|
+
intervalsMs: parsedIntervalsRes.value,
|
|
1139
|
+
intervalMap,
|
|
1140
|
+
dimensionNames: [...(rollup.dimensions ?? [])],
|
|
1141
|
+
fieldNames: rollupRequiredFieldNames(registry, rollup),
|
|
1142
|
+
});
|
|
1143
|
+
}
|
|
1144
|
+
return Result.ok(builders);
|
|
1145
|
+
}
|
|
1146
|
+
|
|
1147
|
+
private finalizeAggSection(builders: Map<string, AggRollupBuilder>): AggSectionInput {
|
|
1148
|
+
const encodedRollups: AggSectionInput["rollups"] = {};
|
|
1149
|
+
for (const [rollupName, builder] of builders) {
|
|
1150
|
+
encodedRollups[rollupName] = { intervals: this.finalizeAggIntervals(builder.intervalMap, builder.dimensionNames) };
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
return { rollups: encodedRollups };
|
|
1154
|
+
}
|
|
1155
|
+
|
|
1156
|
+
private summarizeCompanionBuildProgress(
|
|
1157
|
+
exactBuilders: Map<string, ExactFieldBuilder>,
|
|
1158
|
+
colBuilders: Map<string, ColumnFieldBuilder>,
|
|
1159
|
+
ftsBuilders: Map<string, FtsFieldBuilder>,
|
|
1160
|
+
aggBuilders: Map<string, AggRollupBuilder>,
|
|
1161
|
+
metricsBuilder: MetricsBlockBuilder | null,
|
|
1162
|
+
docCount: number
|
|
1163
|
+
): CompanionBuildProgress {
|
|
1164
|
+
let exactTerms = 0;
|
|
1165
|
+
let exactPostings = 0;
|
|
1166
|
+
for (const builder of exactBuilders.values()) {
|
|
1167
|
+
for (const postings of Object.values(builder.companion.terms)) {
|
|
1168
|
+
exactTerms += 1;
|
|
1169
|
+
exactPostings += postings.length;
|
|
1170
|
+
}
|
|
1171
|
+
}
|
|
1172
|
+
|
|
1173
|
+
let colValues = 0;
|
|
1174
|
+
for (const builder of colBuilders.values()) colValues += builder.values.length;
|
|
1175
|
+
|
|
1176
|
+
let ftsTerms = 0;
|
|
1177
|
+
let ftsPostings = 0;
|
|
1178
|
+
let ftsPositions = 0;
|
|
1179
|
+
for (const builder of ftsBuilders.values()) {
|
|
1180
|
+
for (const postings of Object.values(builder.companion.terms)) {
|
|
1181
|
+
ftsTerms += 1;
|
|
1182
|
+
ftsPostings += postings.doc_ids.length;
|
|
1183
|
+
ftsPositions += postings.positions?.length ?? 0;
|
|
1184
|
+
}
|
|
1185
|
+
}
|
|
1186
|
+
|
|
1187
|
+
let aggWindows = 0;
|
|
1188
|
+
let aggGroups = 0;
|
|
1189
|
+
for (const builder of aggBuilders.values()) {
|
|
1190
|
+
for (const windowMap of builder.intervalMap.values()) {
|
|
1191
|
+
aggWindows += windowMap.size;
|
|
1192
|
+
for (const groups of windowMap.values()) aggGroups += groups.size;
|
|
1193
|
+
}
|
|
1194
|
+
}
|
|
1195
|
+
|
|
1196
|
+
return {
|
|
1197
|
+
docCount,
|
|
1198
|
+
exactFields: exactBuilders.size,
|
|
1199
|
+
exactTerms,
|
|
1200
|
+
exactPostings,
|
|
1201
|
+
colFields: colBuilders.size,
|
|
1202
|
+
colValues,
|
|
1203
|
+
ftsFields: ftsBuilders.size,
|
|
1204
|
+
ftsTerms,
|
|
1205
|
+
ftsPostings,
|
|
1206
|
+
ftsPositions,
|
|
1207
|
+
aggRollups: aggBuilders.size,
|
|
1208
|
+
aggWindows,
|
|
1209
|
+
aggGroups,
|
|
1210
|
+
metricRecords: metricsBuilder?.records.length ?? 0,
|
|
1211
|
+
};
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
private parseRollupIntervalsResult(rollup: SearchRollupConfig): Result<number[], CompanionBuildError> {
|
|
1215
|
+
const parsed: number[] = [];
|
|
1216
|
+
for (const interval of rollup.intervals) {
|
|
1217
|
+
const intervalMsRes = parseDurationMsResult(interval);
|
|
1218
|
+
if (Result.isError(intervalMsRes)) return invalidCompanionBuild(intervalMsRes.error.message);
|
|
1219
|
+
parsed.push(intervalMsRes.value);
|
|
1220
|
+
}
|
|
1221
|
+
return Result.ok(parsed);
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
private recordAggContributionResult(
|
|
1225
|
+
builder: AggRollupBuilder,
|
|
1226
|
+
contribution: {
|
|
1227
|
+
timestampMs: number;
|
|
1228
|
+
dimensions: Record<string, string | null>;
|
|
1229
|
+
measures: Record<string, AggMeasureState>;
|
|
1230
|
+
}
|
|
1231
|
+
): Result<void, CompanionBuildError> {
|
|
1232
|
+
const groupKey = encodeAggGroupKey(contribution.dimensions, builder.dimensionNames);
|
|
1233
|
+
for (const intervalMs of builder.intervalsMs) {
|
|
1234
|
+
if (!Number.isFinite(intervalMs) || intervalMs <= 0) return invalidCompanionBuild(`invalid rollup interval ${intervalMs}`);
|
|
1235
|
+
const startMs = Math.floor(contribution.timestampMs / intervalMs) * intervalMs;
|
|
1236
|
+
const windowMap = builder.intervalMap.get(intervalMs) ?? new Map<number, Map<string, GroupBuilder>>();
|
|
1237
|
+
builder.intervalMap.set(intervalMs, windowMap);
|
|
1238
|
+
const groups = windowMap.get(startMs) ?? new Map<string, GroupBuilder>();
|
|
1239
|
+
windowMap.set(startMs, groups);
|
|
1240
|
+
let group = groups.get(groupKey);
|
|
1241
|
+
if (!group) {
|
|
1242
|
+
const measures: Record<string, AggMeasureState> = {};
|
|
1243
|
+
for (const [measureName, state] of Object.entries(contribution.measures)) {
|
|
1244
|
+
measures[measureName] = cloneAggMeasureState(state);
|
|
1245
|
+
}
|
|
1246
|
+
group = {
|
|
1247
|
+
key: groupKey,
|
|
1248
|
+
measures,
|
|
1249
|
+
};
|
|
1250
|
+
groups.set(groupKey, group);
|
|
1251
|
+
continue;
|
|
1252
|
+
}
|
|
1253
|
+
for (const [measureName, state] of Object.entries(contribution.measures)) {
|
|
1254
|
+
const existing = group.measures[measureName];
|
|
1255
|
+
group.measures[measureName] = existing ? mergeAggMeasureState(existing, state) : cloneAggMeasureState(state);
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
return Result.ok(undefined);
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
private finalizeAggIntervals(
|
|
1262
|
+
intervalMap: Map<number, Map<number, Map<string, GroupBuilder>>>,
|
|
1263
|
+
dimensionNames: string[]
|
|
1264
|
+
): AggSectionInput["rollups"][string]["intervals"] {
|
|
1265
|
+
const intervals: AggSectionInput["rollups"][string]["intervals"] = {};
|
|
1266
|
+
for (const [intervalMs, windowMap] of Array.from(intervalMap.entries()).sort((a, b) => a[0] - b[0])) {
|
|
1267
|
+
intervals[String(intervalMs)] = {
|
|
1268
|
+
interval_ms: intervalMs,
|
|
1269
|
+
windows: Array.from(windowMap.entries())
|
|
1270
|
+
.sort((a, b) => a[0] - b[0])
|
|
1271
|
+
.map(([startMs, groups]) => ({
|
|
1272
|
+
start_ms: startMs,
|
|
1273
|
+
groups: Array.from(groups.values()).map((group) => ({
|
|
1274
|
+
dimensions: decodeAggGroupKey(group.key, dimensionNames),
|
|
1275
|
+
measures: group.measures,
|
|
1276
|
+
})),
|
|
1277
|
+
})),
|
|
1278
|
+
};
|
|
1279
|
+
}
|
|
1280
|
+
return intervals;
|
|
1281
|
+
}
|
|
1282
|
+
|
|
1283
|
+
private finalizeMetricsBlockSection(builder: MetricsBlockBuilder): MetricsBlockSectionInput {
|
|
1284
|
+
return {
|
|
1285
|
+
record_count: builder.records.length,
|
|
1286
|
+
min_window_start_ms: builder.minWindowStartMs,
|
|
1287
|
+
max_window_end_ms: builder.maxWindowEndMs,
|
|
1288
|
+
records: builder.records,
|
|
1289
|
+
};
|
|
1290
|
+
}
|
|
1291
|
+
|
|
1292
|
+
private recordMetricsBlockBuilder(builder: MetricsBlockBuilder, parsed: unknown, docCount: number): void {
|
|
1293
|
+
const normalizedRes = buildMetricsBlockRecord(docCount, parsed);
|
|
1294
|
+
if (Result.isError(normalizedRes)) return;
|
|
1295
|
+
builder.records.push(normalizedRes.value);
|
|
1296
|
+
builder.minWindowStartMs =
|
|
1297
|
+
builder.minWindowStartMs == null
|
|
1298
|
+
? normalizedRes.value.windowStartMs
|
|
1299
|
+
: Math.min(builder.minWindowStartMs, normalizedRes.value.windowStartMs);
|
|
1300
|
+
builder.maxWindowEndMs =
|
|
1301
|
+
builder.maxWindowEndMs == null
|
|
1302
|
+
? normalizedRes.value.windowEndMs
|
|
1303
|
+
: Math.max(builder.maxWindowEndMs, normalizedRes.value.windowEndMs);
|
|
1304
|
+
}
|
|
1305
|
+
}
|