@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import { Result } from "better-result";
|
|
2
|
+
import type { SchemaRegistry, SearchConfig, SearchFieldBinding, SearchFieldConfig } from "../schema/registry";
|
|
3
|
+
import { resolvePointerResult } from "../util/json_pointer";
|
|
4
|
+
import { schemaVersionForOffset } from "../schema/read_json";
|
|
5
|
+
|
|
6
|
+
export type SearchExactTerm = {
|
|
7
|
+
field: string;
|
|
8
|
+
config: SearchFieldConfig;
|
|
9
|
+
canonical: string;
|
|
10
|
+
bytes: Uint8Array;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export function resolveSearchAlias(search: SearchConfig | undefined, fieldName: string): string {
|
|
14
|
+
return search?.aliases?.[fieldName] ?? fieldName;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function getSearchFieldConfig(search: SearchConfig | undefined, fieldName: string): SearchFieldConfig | null {
|
|
18
|
+
const resolved = resolveSearchAlias(search, fieldName);
|
|
19
|
+
return search?.fields?.[resolved] ?? null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function getSearchFieldBinding(config: SearchFieldConfig, version: number): SearchFieldBinding | null {
|
|
23
|
+
let selected: SearchFieldBinding | null = null;
|
|
24
|
+
for (const binding of config.bindings) {
|
|
25
|
+
if (binding.version <= version && (!selected || binding.version > selected.version)) {
|
|
26
|
+
selected = binding;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return selected;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function normalizeKeywordValue(value: unknown, normalizer: SearchFieldConfig["normalizer"]): string | null {
|
|
33
|
+
if (typeof value !== "string") return null;
|
|
34
|
+
return normalizer === "lowercase_v1" ? value.toLowerCase() : value;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function canonicalizeExactValue(config: SearchFieldConfig, value: unknown): string | null {
|
|
38
|
+
switch (config.kind) {
|
|
39
|
+
case "keyword":
|
|
40
|
+
return normalizeKeywordValue(value, config.normalizer);
|
|
41
|
+
case "integer":
|
|
42
|
+
if (typeof value === "bigint") return value.toString();
|
|
43
|
+
if (typeof value === "number" && Number.isFinite(value) && Number.isInteger(value)) return String(value);
|
|
44
|
+
if (typeof value === "string" && /^-?(0|[1-9][0-9]*)$/.test(value.trim())) return String(BigInt(value.trim()));
|
|
45
|
+
return null;
|
|
46
|
+
case "float":
|
|
47
|
+
if (typeof value === "bigint") return value.toString();
|
|
48
|
+
if (typeof value === "number" && Number.isFinite(value)) return String(value);
|
|
49
|
+
if (typeof value === "string" && value.trim() !== "") {
|
|
50
|
+
const n = Number(value);
|
|
51
|
+
if (Number.isFinite(n)) return String(n);
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
case "date":
|
|
55
|
+
if (typeof value === "number" && Number.isFinite(value)) return String(Math.trunc(value));
|
|
56
|
+
if (typeof value === "bigint") return value.toString();
|
|
57
|
+
if (typeof value === "string" && value.trim() !== "") {
|
|
58
|
+
const parsed = Date.parse(value);
|
|
59
|
+
if (Number.isFinite(parsed)) return String(Math.trunc(parsed));
|
|
60
|
+
if (/^-?(0|[1-9][0-9]*)$/.test(value.trim())) return String(BigInt(value.trim()));
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
case "bool":
|
|
64
|
+
if (typeof value === "boolean") return value ? "true" : "false";
|
|
65
|
+
if (typeof value === "string") {
|
|
66
|
+
const lowered = value.trim().toLowerCase();
|
|
67
|
+
if (lowered === "true" || lowered === "false") return lowered;
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
default:
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function canonicalizeColumnValue(config: SearchFieldConfig, value: unknown): bigint | number | boolean | null {
|
|
76
|
+
switch (config.kind) {
|
|
77
|
+
case "integer": {
|
|
78
|
+
const canonical = canonicalizeExactValue(config, value);
|
|
79
|
+
return canonical == null ? null : BigInt(canonical);
|
|
80
|
+
}
|
|
81
|
+
case "date": {
|
|
82
|
+
const canonical = canonicalizeExactValue(config, value);
|
|
83
|
+
return canonical == null ? null : BigInt(canonical);
|
|
84
|
+
}
|
|
85
|
+
case "float": {
|
|
86
|
+
const canonical = canonicalizeExactValue(config, value);
|
|
87
|
+
if (canonical == null) return null;
|
|
88
|
+
const parsed = Number(canonical);
|
|
89
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
90
|
+
}
|
|
91
|
+
case "bool":
|
|
92
|
+
return canonicalizeExactValue(config, value) === "true"
|
|
93
|
+
? true
|
|
94
|
+
: canonicalizeExactValue(config, value) === "false"
|
|
95
|
+
? false
|
|
96
|
+
: null;
|
|
97
|
+
default:
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function analyzeTextValue(value: string, analyzer: SearchFieldConfig["analyzer"]): string[] {
|
|
103
|
+
if (analyzer !== "unicode_word_v1") return [];
|
|
104
|
+
const matches = value.toLowerCase().match(/[\p{L}\p{N}]+/gu);
|
|
105
|
+
return matches ? matches.filter((token) => token.length > 0) : [];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function addRawValues(out: unknown[], value: unknown): void {
|
|
109
|
+
if (Array.isArray(value)) {
|
|
110
|
+
for (const item of value) addRawValues(out, item);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
out.push(value);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function extractRawSearchValuesForFieldsResult(
|
|
117
|
+
reg: SchemaRegistry,
|
|
118
|
+
offset: bigint,
|
|
119
|
+
value: unknown,
|
|
120
|
+
fieldNames: Iterable<string>
|
|
121
|
+
): Result<Map<string, unknown[]>, { message: string }> {
|
|
122
|
+
if (!reg.search) return Result.ok(new Map());
|
|
123
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
124
|
+
return Result.err({ message: "search fields require JSON object records" });
|
|
125
|
+
}
|
|
126
|
+
const version = schemaVersionForOffset(reg, offset);
|
|
127
|
+
const out = new Map<string, unknown[]>();
|
|
128
|
+
for (const fieldName of fieldNames) {
|
|
129
|
+
const config = reg.search.fields[fieldName];
|
|
130
|
+
if (!config) continue;
|
|
131
|
+
const binding = getSearchFieldBinding(config, version);
|
|
132
|
+
if (!binding) continue;
|
|
133
|
+
const resolvedRes = resolvePointerResult(value, binding.jsonPointer);
|
|
134
|
+
if (Result.isError(resolvedRes)) return Result.err({ message: resolvedRes.error.message });
|
|
135
|
+
if (!resolvedRes.value.exists) continue;
|
|
136
|
+
const values: unknown[] = [];
|
|
137
|
+
addRawValues(values, resolvedRes.value.value);
|
|
138
|
+
if (values.length > 0) out.set(fieldName, values);
|
|
139
|
+
}
|
|
140
|
+
return Result.ok(out);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function extractRawSearchValuesResult(
|
|
144
|
+
reg: SchemaRegistry,
|
|
145
|
+
offset: bigint,
|
|
146
|
+
value: unknown
|
|
147
|
+
): Result<Map<string, unknown[]>, { message: string }> {
|
|
148
|
+
return extractRawSearchValuesForFieldsResult(reg, offset, value, Object.keys(reg.search?.fields ?? {}));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function extractSearchExactTermsResult(
|
|
152
|
+
reg: SchemaRegistry,
|
|
153
|
+
offset: bigint,
|
|
154
|
+
value: unknown
|
|
155
|
+
): Result<SearchExactTerm[], { message: string }> {
|
|
156
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
157
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
158
|
+
const out: SearchExactTerm[] = [];
|
|
159
|
+
const seen = new Set<string>();
|
|
160
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
161
|
+
const config = reg.search?.fields[fieldName];
|
|
162
|
+
if (!config?.exact) continue;
|
|
163
|
+
for (const rawValue of values) {
|
|
164
|
+
const canonical = canonicalizeExactValue(config, rawValue);
|
|
165
|
+
if (canonical == null) continue;
|
|
166
|
+
const dedupeKey = `${fieldName}\u0000${canonical}`;
|
|
167
|
+
if (seen.has(dedupeKey)) continue;
|
|
168
|
+
seen.add(dedupeKey);
|
|
169
|
+
out.push({
|
|
170
|
+
field: fieldName,
|
|
171
|
+
config,
|
|
172
|
+
canonical,
|
|
173
|
+
bytes: new TextEncoder().encode(canonical),
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return Result.ok(out);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
export function extractSearchExactValuesResult(
|
|
181
|
+
reg: SchemaRegistry,
|
|
182
|
+
offset: bigint,
|
|
183
|
+
value: unknown
|
|
184
|
+
): Result<Map<string, string[]>, { message: string }> {
|
|
185
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
186
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
187
|
+
const out = new Map<string, string[]>();
|
|
188
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
189
|
+
const config = reg.search?.fields[fieldName];
|
|
190
|
+
if (!config) continue;
|
|
191
|
+
const exactValues: string[] = [];
|
|
192
|
+
for (const rawValue of values) {
|
|
193
|
+
const canonical = canonicalizeExactValue(config, rawValue);
|
|
194
|
+
if (canonical != null) exactValues.push(canonical);
|
|
195
|
+
}
|
|
196
|
+
if (exactValues.length > 0) out.set(fieldName, exactValues);
|
|
197
|
+
}
|
|
198
|
+
return Result.ok(out);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export function extractSearchColumnValuesResult(
|
|
202
|
+
reg: SchemaRegistry,
|
|
203
|
+
offset: bigint,
|
|
204
|
+
value: unknown
|
|
205
|
+
): Result<Map<string, Array<bigint | number | boolean>>, { message: string }> {
|
|
206
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
207
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
208
|
+
const out = new Map<string, Array<bigint | number | boolean>>();
|
|
209
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
210
|
+
const config = reg.search?.fields[fieldName];
|
|
211
|
+
if (!config?.column) continue;
|
|
212
|
+
const colValues: Array<bigint | number | boolean> = [];
|
|
213
|
+
for (const rawValue of values) {
|
|
214
|
+
const normalized = canonicalizeColumnValue(config, rawValue);
|
|
215
|
+
if (normalized != null) colValues.push(normalized);
|
|
216
|
+
}
|
|
217
|
+
if (colValues.length > 0) out.set(fieldName, colValues);
|
|
218
|
+
}
|
|
219
|
+
return Result.ok(out);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export function extractSearchTextValuesResult(
|
|
223
|
+
reg: SchemaRegistry,
|
|
224
|
+
offset: bigint,
|
|
225
|
+
value: unknown
|
|
226
|
+
): Result<Map<string, string[]>, { message: string }> {
|
|
227
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
228
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
229
|
+
const out = new Map<string, string[]>();
|
|
230
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
231
|
+
const config = reg.search?.fields[fieldName];
|
|
232
|
+
if (!config) continue;
|
|
233
|
+
const textValues: string[] = [];
|
|
234
|
+
for (const rawValue of values) {
|
|
235
|
+
if (config.kind === "keyword") {
|
|
236
|
+
const normalized = normalizeKeywordValue(rawValue, config.normalizer);
|
|
237
|
+
if (normalized != null) textValues.push(normalized);
|
|
238
|
+
} else if (config.kind === "text" && typeof rawValue === "string") {
|
|
239
|
+
textValues.push(rawValue);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
if (textValues.length > 0) out.set(fieldName, textValues);
|
|
243
|
+
}
|
|
244
|
+
return Result.ok(out);
|
|
245
|
+
}
|
package/src/segment/cache.ts
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
import { mkdirSync, readdirSync, statSync, unlinkSync, renameSync, existsSync, writeFileSync, readFileSync } from "node:fs";
|
|
2
2
|
import { dirname, join, relative } from "node:path";
|
|
3
|
+
import { LruCache } from "../util/lru";
|
|
4
|
+
|
|
5
|
+
export type MappedSegmentFile = {
|
|
6
|
+
objectKey: string;
|
|
7
|
+
path: string;
|
|
8
|
+
bytes: Uint8Array;
|
|
9
|
+
sizeBytes: number;
|
|
10
|
+
};
|
|
3
11
|
|
|
4
12
|
export type SegmentCacheStats = {
|
|
5
13
|
hits: number;
|
|
@@ -9,21 +17,27 @@ export type SegmentCacheStats = {
|
|
|
9
17
|
usedBytes: number;
|
|
10
18
|
maxBytes: number;
|
|
11
19
|
entryCount: number;
|
|
20
|
+
mappedBytes: number;
|
|
21
|
+
mappedEntryCount: number;
|
|
22
|
+
pinnedEntryCount: number;
|
|
12
23
|
};
|
|
13
24
|
|
|
14
25
|
export class SegmentDiskCache {
|
|
15
26
|
private readonly rootDir: string;
|
|
16
27
|
private readonly maxBytes: number;
|
|
17
28
|
private readonly entries = new Map<string, { path: string; size: number }>();
|
|
29
|
+
private readonly pinnedKeys = new Set<string>();
|
|
30
|
+
private readonly mappedFiles: LruCache<string, MappedSegmentFile>;
|
|
18
31
|
private totalBytes = 0;
|
|
19
32
|
private hits = 0;
|
|
20
33
|
private misses = 0;
|
|
21
34
|
private evictions = 0;
|
|
22
35
|
private bytesAdded = 0;
|
|
23
36
|
|
|
24
|
-
constructor(rootDir: string, maxBytes: number) {
|
|
37
|
+
constructor(rootDir: string, maxBytes: number, mappedEntries = 64) {
|
|
25
38
|
this.rootDir = rootDir;
|
|
26
39
|
this.maxBytes = maxBytes;
|
|
40
|
+
this.mappedFiles = new LruCache(Math.max(1, mappedEntries));
|
|
27
41
|
if (this.maxBytes > 0) {
|
|
28
42
|
mkdirSync(this.rootDir, { recursive: true });
|
|
29
43
|
this.loadIndex();
|
|
@@ -86,8 +100,43 @@ export class SegmentDiskCache {
|
|
|
86
100
|
}
|
|
87
101
|
this.recordHit();
|
|
88
102
|
this.touch(objectKey);
|
|
103
|
+
const mapped = this.getMapped(objectKey);
|
|
104
|
+
if (mapped) return mapped.bytes;
|
|
105
|
+
const path = this.getPath(objectKey);
|
|
106
|
+
return readFileSync(path);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
getMapped(objectKey: string): MappedSegmentFile | null {
|
|
110
|
+
if (!this.has(objectKey)) return null;
|
|
111
|
+
const cached = this.mappedFiles.get(objectKey);
|
|
112
|
+
if (cached) {
|
|
113
|
+
this.pinnedKeys.add(objectKey);
|
|
114
|
+
this.touch(objectKey);
|
|
115
|
+
return cached;
|
|
116
|
+
}
|
|
117
|
+
|
|
89
118
|
const path = this.getPath(objectKey);
|
|
90
|
-
|
|
119
|
+
let sizeBytes: number;
|
|
120
|
+
try {
|
|
121
|
+
sizeBytes = statSync(path).size;
|
|
122
|
+
} catch {
|
|
123
|
+
this.entries.delete(objectKey);
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let bytes: Uint8Array;
|
|
128
|
+
try {
|
|
129
|
+
bytes = (Bun as any).mmap(path, { shared: true }) as Uint8Array;
|
|
130
|
+
} catch {
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
if (bytes.byteLength !== sizeBytes) return null;
|
|
134
|
+
|
|
135
|
+
const mapped = { objectKey, path, bytes, sizeBytes };
|
|
136
|
+
this.mappedFiles.set(objectKey, mapped);
|
|
137
|
+
this.pinnedKeys.add(objectKey);
|
|
138
|
+
this.touch(objectKey);
|
|
139
|
+
return mapped;
|
|
91
140
|
}
|
|
92
141
|
|
|
93
142
|
put(objectKey: string, bytes: Uint8Array): boolean {
|
|
@@ -111,6 +160,7 @@ export class SegmentDiskCache {
|
|
|
111
160
|
}
|
|
112
161
|
const existing = this.entries.get(objectKey);
|
|
113
162
|
if (existing) this.totalBytes = Math.max(0, this.totalBytes - existing.size);
|
|
163
|
+
this.mappedFiles.delete(objectKey);
|
|
114
164
|
this.entries.set(objectKey, { path: dest, size: sizeBytes });
|
|
115
165
|
this.totalBytes += sizeBytes;
|
|
116
166
|
this.bytesAdded += sizeBytes;
|
|
@@ -130,6 +180,7 @@ export class SegmentDiskCache {
|
|
|
130
180
|
}
|
|
131
181
|
const existing = this.entries.get(objectKey);
|
|
132
182
|
if (existing) this.totalBytes = Math.max(0, this.totalBytes - existing.size);
|
|
183
|
+
this.mappedFiles.delete(objectKey);
|
|
133
184
|
this.entries.set(objectKey, { path: dest, size: sizeBytes });
|
|
134
185
|
this.totalBytes += sizeBytes;
|
|
135
186
|
this.bytesAdded += sizeBytes;
|
|
@@ -137,6 +188,7 @@ export class SegmentDiskCache {
|
|
|
137
188
|
}
|
|
138
189
|
|
|
139
190
|
remove(objectKey: string): void {
|
|
191
|
+
if (this.pinnedKeys.has(objectKey)) return;
|
|
140
192
|
const entry = this.entries.get(objectKey);
|
|
141
193
|
if (!entry) return;
|
|
142
194
|
try {
|
|
@@ -151,6 +203,27 @@ export class SegmentDiskCache {
|
|
|
151
203
|
private evictIfNeeded(incomingBytes: number): void {
|
|
152
204
|
while (this.totalBytes + incomingBytes > this.maxBytes && this.entries.size > 0) {
|
|
153
205
|
const oldestKey = this.entries.keys().next().value as string;
|
|
206
|
+
if (this.pinnedKeys.has(oldestKey)) {
|
|
207
|
+
let removed = false;
|
|
208
|
+
for (const candidateKey of this.entries.keys()) {
|
|
209
|
+
if (this.pinnedKeys.has(candidateKey)) continue;
|
|
210
|
+
const candidate = this.entries.get(candidateKey);
|
|
211
|
+
if (!candidate) continue;
|
|
212
|
+
try {
|
|
213
|
+
unlinkSync(candidate.path);
|
|
214
|
+
} catch {
|
|
215
|
+
// ignore
|
|
216
|
+
}
|
|
217
|
+
this.totalBytes = Math.max(0, this.totalBytes - candidate.size);
|
|
218
|
+
this.entries.delete(candidateKey);
|
|
219
|
+
this.mappedFiles.delete(candidateKey);
|
|
220
|
+
this.evictions += 1;
|
|
221
|
+
removed = true;
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
if (!removed) break;
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
154
227
|
const entry = this.entries.get(oldestKey);
|
|
155
228
|
if (entry) {
|
|
156
229
|
try {
|
|
@@ -159,6 +232,7 @@ export class SegmentDiskCache {
|
|
|
159
232
|
// ignore
|
|
160
233
|
}
|
|
161
234
|
this.totalBytes = Math.max(0, this.totalBytes - entry.size);
|
|
235
|
+
this.mappedFiles.delete(oldestKey);
|
|
162
236
|
this.evictions += 1;
|
|
163
237
|
}
|
|
164
238
|
this.entries.delete(oldestKey);
|
|
@@ -166,6 +240,12 @@ export class SegmentDiskCache {
|
|
|
166
240
|
}
|
|
167
241
|
|
|
168
242
|
stats(): SegmentCacheStats {
|
|
243
|
+
let mappedBytes = 0;
|
|
244
|
+
let mappedEntryCount = 0;
|
|
245
|
+
for (const mapped of this.mappedFiles.values()) {
|
|
246
|
+
mappedBytes += mapped.sizeBytes;
|
|
247
|
+
mappedEntryCount += 1;
|
|
248
|
+
}
|
|
169
249
|
return {
|
|
170
250
|
hits: this.hits,
|
|
171
251
|
misses: this.misses,
|
|
@@ -174,6 +254,17 @@ export class SegmentDiskCache {
|
|
|
174
254
|
usedBytes: this.totalBytes,
|
|
175
255
|
maxBytes: this.maxBytes,
|
|
176
256
|
entryCount: this.entries.size,
|
|
257
|
+
mappedBytes,
|
|
258
|
+
mappedEntryCount,
|
|
259
|
+
pinnedEntryCount: this.pinnedKeys.size,
|
|
177
260
|
};
|
|
178
261
|
}
|
|
262
|
+
|
|
263
|
+
bytesForObjectKeyPrefix(prefix: string): number {
|
|
264
|
+
let total = 0;
|
|
265
|
+
for (const [objectKey, entry] of this.entries.entries()) {
|
|
266
|
+
if (objectKey.startsWith(prefix)) total += entry.size;
|
|
267
|
+
}
|
|
268
|
+
return total;
|
|
269
|
+
}
|
|
179
270
|
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import type { SegmentRow } from "../db/db";
|
|
3
|
+
import type { ObjectStore } from "../objectstore/interface";
|
|
4
|
+
import type { SegmentDiskCache } from "./cache";
|
|
5
|
+
import { dsError } from "../util/ds_error";
|
|
6
|
+
import type { RetryOptions } from "../util/retry";
|
|
7
|
+
import { retry } from "../util/retry";
|
|
8
|
+
import { segmentObjectKey, streamHash16Hex } from "../util/stream_paths";
|
|
9
|
+
|
|
10
|
+
export type SegmentReadSource =
|
|
11
|
+
| { kind: "mapped"; path: string; bytes: Uint8Array }
|
|
12
|
+
| { kind: "bytes"; bytes: Uint8Array };
|
|
13
|
+
|
|
14
|
+
function readRangeFromBytes(bytes: Uint8Array, start: number, end: number): Uint8Array {
|
|
15
|
+
const boundedStart = Math.max(0, Math.min(start, bytes.byteLength));
|
|
16
|
+
const boundedEnd = Math.max(boundedStart, Math.min(end + 1, bytes.byteLength));
|
|
17
|
+
return bytes.subarray(boundedStart, boundedEnd);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function readRangeFromSource(source: SegmentReadSource, start: number, end: number): Uint8Array {
|
|
21
|
+
return readRangeFromBytes(source.bytes, start, end);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function loadSegmentSource(
|
|
25
|
+
os: ObjectStore,
|
|
26
|
+
seg: SegmentRow,
|
|
27
|
+
diskCache?: SegmentDiskCache,
|
|
28
|
+
retryOpts?: RetryOptions
|
|
29
|
+
): Promise<SegmentReadSource> {
|
|
30
|
+
if (seg.local_path && seg.local_path.length > 0 && existsSync(seg.local_path)) {
|
|
31
|
+
try {
|
|
32
|
+
const bytes = (Bun as any).mmap(seg.local_path, { shared: true }) as Uint8Array;
|
|
33
|
+
return { kind: "mapped", path: seg.local_path, bytes };
|
|
34
|
+
} catch {
|
|
35
|
+
return { kind: "bytes", bytes: readFileSync(seg.local_path) };
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const objectKey = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index);
|
|
40
|
+
if (diskCache && diskCache.has(objectKey)) {
|
|
41
|
+
diskCache.recordHit();
|
|
42
|
+
diskCache.touch(objectKey);
|
|
43
|
+
const mapped = diskCache.getMapped(objectKey);
|
|
44
|
+
if (mapped) return { kind: "mapped", path: mapped.path, bytes: mapped.bytes };
|
|
45
|
+
const cachedPath = diskCache.getPath(objectKey);
|
|
46
|
+
if (existsSync(cachedPath)) return { kind: "bytes", bytes: readFileSync(cachedPath) };
|
|
47
|
+
diskCache.remove(objectKey);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (diskCache) diskCache.recordMiss();
|
|
51
|
+
|
|
52
|
+
const bytes = await retry(
|
|
53
|
+
async () => {
|
|
54
|
+
const res = await os.get(objectKey);
|
|
55
|
+
if (!res) throw dsError(`object store missing segment: ${objectKey}`);
|
|
56
|
+
return res;
|
|
57
|
+
},
|
|
58
|
+
retryOpts ?? { retries: 0, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 0 }
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
if (diskCache?.put(objectKey, bytes)) {
|
|
62
|
+
const mapped = diskCache.getMapped(objectKey);
|
|
63
|
+
if (mapped) return { kind: "mapped", path: mapped.path, bytes: mapped.bytes };
|
|
64
|
+
return { kind: "bytes", bytes: readFileSync(diskCache.getPath(objectKey)) };
|
|
65
|
+
}
|
|
66
|
+
return { kind: "bytes", bytes };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export async function loadSegmentBytesCached(
|
|
70
|
+
os: ObjectStore,
|
|
71
|
+
seg: SegmentRow,
|
|
72
|
+
diskCache?: SegmentDiskCache,
|
|
73
|
+
retryOpts?: RetryOptions
|
|
74
|
+
): Promise<Uint8Array> {
|
|
75
|
+
const source = await loadSegmentSource(os, seg, diskCache, retryOpts);
|
|
76
|
+
return source.bytes;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export async function readSegmentRangeCached(
|
|
80
|
+
os: ObjectStore,
|
|
81
|
+
seg: SegmentRow,
|
|
82
|
+
start: number,
|
|
83
|
+
end: number,
|
|
84
|
+
diskCache?: SegmentDiskCache,
|
|
85
|
+
retryOpts?: RetryOptions
|
|
86
|
+
): Promise<Uint8Array> {
|
|
87
|
+
const source = await loadSegmentSource(os, seg, diskCache, retryOpts);
|
|
88
|
+
return readRangeFromSource(source, start, end);
|
|
89
|
+
}
|
package/src/segment/format.ts
CHANGED
|
@@ -59,6 +59,14 @@ export type IterateBlockEntry = {
|
|
|
59
59
|
decoded: DecodedBlock;
|
|
60
60
|
};
|
|
61
61
|
|
|
62
|
+
export type IterateBlockRecordEntry = {
|
|
63
|
+
blockOffset: number;
|
|
64
|
+
recordIndex: number;
|
|
65
|
+
appendNs: bigint;
|
|
66
|
+
routingKey: Uint8Array;
|
|
67
|
+
payload: Uint8Array;
|
|
68
|
+
};
|
|
69
|
+
|
|
62
70
|
function invalidSegment<T = never>(message: string): Result<T, SegmentFormatError> {
|
|
63
71
|
return Result.err({ kind: "invalid_segment_format", message });
|
|
64
72
|
}
|
|
@@ -125,59 +133,39 @@ export function decodeBlock(blockBytes: Uint8Array): DecodedBlock {
|
|
|
125
133
|
}
|
|
126
134
|
|
|
127
135
|
export function decodeBlockResult(blockBytes: Uint8Array): Result<DecodedBlock, SegmentFormatError> {
|
|
128
|
-
|
|
129
|
-
if (
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
) {
|
|
135
|
-
return invalidSegment("bad block magic");
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
const uncompressedLen = readU32BE(blockBytes, 4);
|
|
139
|
-
const compressedLen = readU32BE(blockBytes, 8);
|
|
140
|
-
const recordCount = readU32BE(blockBytes, 12);
|
|
141
|
-
const bloom = blockBytes.slice(16, 48);
|
|
142
|
-
const firstAppendNs = readU64BE(blockBytes, 48);
|
|
143
|
-
const lastAppendNs = readU64BE(blockBytes, 56);
|
|
144
|
-
const expectedCrc = readU32BE(blockBytes, 64);
|
|
145
|
-
|
|
146
|
-
const payload = blockBytes.slice(DSB3_HEADER_BYTES, DSB3_HEADER_BYTES + compressedLen);
|
|
147
|
-
if (payload.byteLength !== compressedLen) return invalidSegment("truncated block");
|
|
148
|
-
const actualCrc = crc32c(payload);
|
|
149
|
-
if (actualCrc !== expectedCrc) return invalidSegment("crc mismatch");
|
|
150
|
-
|
|
151
|
-
let uncompressed: Uint8Array;
|
|
152
|
-
try {
|
|
153
|
-
uncompressed = new Uint8Array(zstdDecompressSync(payload));
|
|
154
|
-
} catch (e: any) {
|
|
155
|
-
return invalidSegment(String(e?.message ?? e));
|
|
156
|
-
}
|
|
157
|
-
if (uncompressed.byteLength !== uncompressedLen) {
|
|
158
|
-
return invalidSegment(`bad uncompressed len: got=${uncompressed.byteLength} expected=${uncompressedLen}`);
|
|
159
|
-
}
|
|
136
|
+
const headerRes = parseBlockHeaderResult(blockBytes);
|
|
137
|
+
if (Result.isError(headerRes)) return headerRes;
|
|
138
|
+
const header = headerRes.value;
|
|
139
|
+
const uncompressedRes = decompressBlockPayloadResult(blockBytes, header);
|
|
140
|
+
if (Result.isError(uncompressedRes)) return uncompressedRes;
|
|
141
|
+
const uncompressed = uncompressedRes.value;
|
|
160
142
|
|
|
161
143
|
const records: SegmentRecord[] = [];
|
|
162
144
|
let off = 0;
|
|
163
|
-
for (let i = 0; i < recordCount; i++) {
|
|
145
|
+
for (let i = 0; i < header.recordCount; i++) {
|
|
164
146
|
if (off + 8 + 4 > uncompressed.byteLength) return invalidSegment("truncated record");
|
|
165
147
|
const appendNs = readU64BE(uncompressed, off);
|
|
166
148
|
off += 8;
|
|
167
149
|
const keyLen = readU32BE(uncompressed, off);
|
|
168
150
|
off += 4;
|
|
169
151
|
if (off + keyLen + 4 > uncompressed.byteLength) return invalidSegment("truncated key");
|
|
170
|
-
const routingKey = uncompressed.
|
|
152
|
+
const routingKey = uncompressed.subarray(off, off + keyLen);
|
|
171
153
|
off += keyLen;
|
|
172
154
|
const dataLen = readU32BE(uncompressed, off);
|
|
173
155
|
off += 4;
|
|
174
156
|
if (off + dataLen > uncompressed.byteLength) return invalidSegment("truncated payload");
|
|
175
|
-
const payload = uncompressed.
|
|
157
|
+
const payload = uncompressed.subarray(off, off + dataLen);
|
|
176
158
|
off += dataLen;
|
|
177
159
|
records.push({ appendNs, routingKey, payload });
|
|
178
160
|
}
|
|
179
161
|
|
|
180
|
-
return Result.ok({
|
|
162
|
+
return Result.ok({
|
|
163
|
+
recordCount: header.recordCount,
|
|
164
|
+
firstAppendNs: header.firstAppendNs,
|
|
165
|
+
lastAppendNs: header.lastAppendNs,
|
|
166
|
+
bloom: header.bloom.slice(),
|
|
167
|
+
records,
|
|
168
|
+
});
|
|
181
169
|
}
|
|
182
170
|
|
|
183
171
|
export function encodeFooter(entries: BlockIndexEntry[]): Uint8Array {
|
|
@@ -261,6 +249,65 @@ export function* iterateBlocksResult(
|
|
|
261
249
|
}
|
|
262
250
|
}
|
|
263
251
|
|
|
252
|
+
export function* iterateBlockRecordsResult(
|
|
253
|
+
segmentBytes: Uint8Array
|
|
254
|
+
): Generator<Result<IterateBlockRecordEntry, SegmentFormatError>, void, void> {
|
|
255
|
+
const parsed = parseFooter(segmentBytes);
|
|
256
|
+
const limit = parsed ? parsed.footerStart : segmentBytes.byteLength;
|
|
257
|
+
let off = 0;
|
|
258
|
+
while (off < limit) {
|
|
259
|
+
if (off + DSB3_HEADER_BYTES > limit) {
|
|
260
|
+
yield invalidSegment("truncated segment (block header)");
|
|
261
|
+
return;
|
|
262
|
+
}
|
|
263
|
+
const headerRes = parseBlockHeaderResult(segmentBytes.subarray(off, off + DSB3_HEADER_BYTES));
|
|
264
|
+
if (Result.isError(headerRes)) {
|
|
265
|
+
yield headerRes;
|
|
266
|
+
return;
|
|
267
|
+
}
|
|
268
|
+
const header = headerRes.value;
|
|
269
|
+
const totalLen = DSB3_HEADER_BYTES + header.compressedLen;
|
|
270
|
+
if (off + totalLen > limit) {
|
|
271
|
+
yield invalidSegment("truncated segment (block payload)");
|
|
272
|
+
return;
|
|
273
|
+
}
|
|
274
|
+
const blockBytes = segmentBytes.subarray(off, off + totalLen);
|
|
275
|
+
const uncompressedRes = decompressBlockPayloadResult(blockBytes, header);
|
|
276
|
+
if (Result.isError(uncompressedRes)) {
|
|
277
|
+
yield uncompressedRes;
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
const uncompressed = uncompressedRes.value;
|
|
281
|
+
let recOff = 0;
|
|
282
|
+
for (let recordIndex = 0; recordIndex < header.recordCount; recordIndex++) {
|
|
283
|
+
if (recOff + 8 + 4 > uncompressed.byteLength) {
|
|
284
|
+
yield invalidSegment("truncated record");
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
287
|
+
const appendNs = readU64BE(uncompressed, recOff);
|
|
288
|
+
recOff += 8;
|
|
289
|
+
const keyLen = readU32BE(uncompressed, recOff);
|
|
290
|
+
recOff += 4;
|
|
291
|
+
if (recOff + keyLen + 4 > uncompressed.byteLength) {
|
|
292
|
+
yield invalidSegment("truncated key");
|
|
293
|
+
return;
|
|
294
|
+
}
|
|
295
|
+
const routingKey = uncompressed.subarray(recOff, recOff + keyLen);
|
|
296
|
+
recOff += keyLen;
|
|
297
|
+
const dataLen = readU32BE(uncompressed, recOff);
|
|
298
|
+
recOff += 4;
|
|
299
|
+
if (recOff + dataLen > uncompressed.byteLength) {
|
|
300
|
+
yield invalidSegment("truncated payload");
|
|
301
|
+
return;
|
|
302
|
+
}
|
|
303
|
+
const payload = uncompressed.subarray(recOff, recOff + dataLen);
|
|
304
|
+
recOff += dataLen;
|
|
305
|
+
yield Result.ok({ blockOffset: off, recordIndex, appendNs, routingKey, payload });
|
|
306
|
+
}
|
|
307
|
+
off += totalLen;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
|
|
264
311
|
export function* iterateBlocks(segmentBytes: Uint8Array): Generator<IterateBlockEntry, void, void> {
|
|
265
312
|
for (const itemRes of iterateBlocksResult(segmentBytes)) {
|
|
266
313
|
if (Result.isError(itemRes)) throw dsError(itemRes.error.message);
|
|
@@ -268,6 +315,13 @@ export function* iterateBlocks(segmentBytes: Uint8Array): Generator<IterateBlock
|
|
|
268
315
|
}
|
|
269
316
|
}
|
|
270
317
|
|
|
318
|
+
export function* iterateBlockRecords(segmentBytes: Uint8Array): Generator<IterateBlockRecordEntry, void, void> {
|
|
319
|
+
for (const itemRes of iterateBlockRecordsResult(segmentBytes)) {
|
|
320
|
+
if (Result.isError(itemRes)) throw dsError(itemRes.error.message);
|
|
321
|
+
yield itemRes.value;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
271
325
|
export function parseFooterBytes(footer: Uint8Array): SegmentFooter | null {
|
|
272
326
|
if (footer.byteLength < 12) return null;
|
|
273
327
|
const magic = String.fromCharCode(footer[0], footer[1], footer[2], footer[3]);
|
|
@@ -329,3 +383,21 @@ export function parseBlockHeaderResult(header: Uint8Array): Result<BlockHeader,
|
|
|
329
383
|
crc32c: crc32cVal,
|
|
330
384
|
});
|
|
331
385
|
}
|
|
386
|
+
|
|
387
|
+
function decompressBlockPayloadResult(blockBytes: Uint8Array, header: BlockHeader): Result<Uint8Array, SegmentFormatError> {
|
|
388
|
+
const payload = blockBytes.subarray(DSB3_HEADER_BYTES, DSB3_HEADER_BYTES + header.compressedLen);
|
|
389
|
+
if (payload.byteLength !== header.compressedLen) return invalidSegment("truncated block");
|
|
390
|
+
const actualCrc = crc32c(payload);
|
|
391
|
+
if (actualCrc !== header.crc32c) return invalidSegment("crc mismatch");
|
|
392
|
+
|
|
393
|
+
let uncompressed: Uint8Array;
|
|
394
|
+
try {
|
|
395
|
+
uncompressed = new Uint8Array(zstdDecompressSync(payload));
|
|
396
|
+
} catch (e: any) {
|
|
397
|
+
return invalidSegment(String(e?.message ?? e));
|
|
398
|
+
}
|
|
399
|
+
if (uncompressed.byteLength !== header.uncompressedLen) {
|
|
400
|
+
return invalidSegment(`bad uncompressed len: got=${uncompressed.byteLength} expected=${header.uncompressedLen}`);
|
|
401
|
+
}
|
|
402
|
+
return Result.ok(uncompressed);
|
|
403
|
+
}
|