@tungthedev/streams-server 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +45 -0
- package/CONTRIBUTING.md +76 -0
- package/LICENSE +201 -0
- package/README.md +58 -0
- package/SECURITY.md +42 -0
- package/bin/prisma-streams-server +2 -0
- package/package.json +46 -0
- package/src/app.ts +583 -0
- package/src/app_core.ts +3144 -0
- package/src/app_local.ts +206 -0
- package/src/auth.ts +124 -0
- package/src/auto_tune.ts +69 -0
- package/src/backpressure.ts +66 -0
- package/src/bootstrap.ts +613 -0
- package/src/compute/demo_entry.ts +415 -0
- package/src/compute/demo_site.ts +1242 -0
- package/src/compute/entry.ts +19 -0
- package/src/compute/package_entry.ts +4 -0
- package/src/compute/virtual-modules.d.ts +15 -0
- package/src/compute/worker_module_url.ts +9 -0
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +402 -0
- package/src/db/bootstrap_store.ts +9 -0
- package/src/db/db.ts +2424 -0
- package/src/db/schema.ts +925 -0
- package/src/db/sqlite_manifest_snapshot.ts +81 -0
- package/src/db/sqlite_touch_store.ts +491 -0
- package/src/db/sqlite_wal_store.ts +472 -0
- package/src/details/full_mode_details.ts +568 -0
- package/src/expiry_sweeper.ts +47 -0
- package/src/foreground_activity.ts +55 -0
- package/src/hist.ts +169 -0
- package/src/index/binary_fuse.ts +379 -0
- package/src/index/indexer.ts +947 -0
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +863 -0
- package/src/index/run_cache.ts +84 -0
- package/src/index/run_format.ts +213 -0
- package/src/index/schedule.ts +28 -0
- package/src/index/secondary_indexer.ts +901 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +309 -0
- package/src/lens/lens.ts +501 -0
- package/src/manifest.ts +249 -0
- package/src/memory.ts +334 -0
- package/src/metrics.ts +147 -0
- package/src/metrics_emitter.ts +83 -0
- package/src/notifier.ts +180 -0
- package/src/objectstore/accounting.ts +151 -0
- package/src/objectstore/interface.ts +13 -0
- package/src/objectstore/mock_r2.ts +269 -0
- package/src/objectstore/null.ts +32 -0
- package/src/objectstore/r2.ts +318 -0
- package/src/observe/pairing.ts +61 -0
- package/src/observe/request.ts +772 -0
- package/src/offset.ts +70 -0
- package/src/postgres/bootstrap.ts +269 -0
- package/src/postgres/companions.ts +197 -0
- package/src/postgres/control_restore.ts +109 -0
- package/src/postgres/details.ts +189 -0
- package/src/postgres/lexicon_index.ts +260 -0
- package/src/postgres/routing_index.ts +189 -0
- package/src/postgres/rows.ts +132 -0
- package/src/postgres/schema.ts +355 -0
- package/src/postgres/secondary_index.ts +238 -0
- package/src/postgres/segments.ts +900 -0
- package/src/postgres/stats.ts +103 -0
- package/src/postgres/store.ts +947 -0
- package/src/postgres/touch.ts +591 -0
- package/src/postgres/types.ts +32 -0
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +473 -0
- package/src/profiles/generic.ts +51 -0
- package/src/profiles/index.ts +237 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +83 -0
- package/src/profiles/otelTraces/normalize.ts +955 -0
- package/src/profiles/otelTraces/otlp.ts +1002 -0
- package/src/profiles/otelTraces/schema.ts +408 -0
- package/src/profiles/otelTraces.ts +390 -0
- package/src/profiles/profile.ts +284 -0
- package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
- package/src/profiles/stateProtocol/changes.ts +24 -0
- package/src/profiles/stateProtocol/ingest.ts +115 -0
- package/src/profiles/stateProtocol/routes.ts +511 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +107 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2986 -0
- package/src/runtime/hash.ts +156 -0
- package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
- package/src/runtime/hash_vendor/NOTICE.md +8 -0
- package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +237 -0
- package/src/schema/lens_schema.ts +290 -0
- package/src/schema/proof.ts +547 -0
- package/src/schema/read_json.ts +51 -0
- package/src/schema/registry.ts +966 -0
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +409 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +327 -0
- package/src/search/companion_manager.ts +1305 -0
- package/src/search/companion_plan.ts +229 -0
- package/src/search/exact_format.ts +281 -0
- package/src/search/exact_runtime.ts +55 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +270 -0
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +403 -0
- package/src/segment/segmenter.ts +412 -0
- package/src/segment/segmenter_worker.ts +72 -0
- package/src/segment/segmenter_workers.ts +130 -0
- package/src/server.ts +264 -0
- package/src/server_auto_tune.ts +158 -0
- package/src/sqlite/adapter.ts +335 -0
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +205 -0
- package/src/store/append.ts +50 -0
- package/src/store/bootstrap_restore_store.ts +71 -0
- package/src/store/capabilities.ts +86 -0
- package/src/store/full_mode_details_store.ts +71 -0
- package/src/store/index_store.ts +104 -0
- package/src/store/profile_touch_store.ts +1 -0
- package/src/store/rows.ts +144 -0
- package/src/store/schema_profile_store.ts +73 -0
- package/src/store/schema_publication.ts +6 -0
- package/src/store/segment_manifest_store.ts +129 -0
- package/src/store/segment_read_store.ts +22 -0
- package/src/store/stats_accounting_store.ts +83 -0
- package/src/store/touch_store.ts +98 -0
- package/src/store/wal_store.ts +21 -0
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_keys.ts +158 -0
- package/src/touch/live_metrics.ts +841 -0
- package/src/touch/live_templates.ts +449 -0
- package/src/touch/manager.ts +1292 -0
- package/src/touch/process_batch.ts +576 -0
- package/src/touch/processor_worker.ts +85 -0
- package/src/touch/spec.ts +459 -0
- package/src/touch/touch_journal.ts +771 -0
- package/src/touch/touch_key_id.ts +20 -0
- package/src/touch/worker_pool.ts +191 -0
- package/src/touch/worker_protocol.ts +57 -0
- package/src/types/proper-lockfile.d.ts +1 -0
- package/src/uploader.ts +358 -0
- package/src/util/base32_crockford.ts +81 -0
- package/src/util/bloom256.ts +67 -0
- package/src/util/byte_lru.ts +73 -0
- package/src/util/cleanup.ts +22 -0
- package/src/util/crc32c.ts +29 -0
- package/src/util/ds_error.ts +15 -0
- package/src/util/duration.ts +17 -0
- package/src/util/endian.ts +53 -0
- package/src/util/json_pointer.ts +148 -0
- package/src/util/log.ts +25 -0
- package/src/util/lru.ts +53 -0
- package/src/util/retry.ts +35 -0
- package/src/util/siphash.ts +71 -0
- package/src/util/stream_paths.ts +50 -0
- package/src/util/time.ts +14 -0
- package/src/util/yield.ts +3 -0
- package/src/util/zstd.ts +24 -0
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import { Result } from "better-result";
|
|
2
|
+
import type { SchemaRegistry, SearchConfig, SearchFieldBinding, SearchFieldConfig } from "../schema/registry";
|
|
3
|
+
import { resolvePointerResult } from "../util/json_pointer";
|
|
4
|
+
import { schemaVersionForOffset } from "../schema/read_json";
|
|
5
|
+
|
|
6
|
+
export type SearchExactTerm = {
|
|
7
|
+
field: string;
|
|
8
|
+
config: SearchFieldConfig;
|
|
9
|
+
canonical: string;
|
|
10
|
+
bytes: Uint8Array;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export function resolveSearchAlias(search: SearchConfig | undefined, fieldName: string): string {
|
|
14
|
+
return search?.aliases?.[fieldName] ?? fieldName;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function getSearchFieldConfig(search: SearchConfig | undefined, fieldName: string): SearchFieldConfig | null {
|
|
18
|
+
const resolved = resolveSearchAlias(search, fieldName);
|
|
19
|
+
return search?.fields?.[resolved] ?? null;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function getSearchFieldBinding(config: SearchFieldConfig, version: number): SearchFieldBinding | null {
|
|
23
|
+
let selected: SearchFieldBinding | null = null;
|
|
24
|
+
for (const binding of config.bindings) {
|
|
25
|
+
if (binding.version <= version && (!selected || binding.version > selected.version)) {
|
|
26
|
+
selected = binding;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return selected;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function normalizeKeywordValue(value: unknown, normalizer: SearchFieldConfig["normalizer"]): string | null {
|
|
33
|
+
if (typeof value !== "string") return null;
|
|
34
|
+
return normalizer === "lowercase_v1" ? value.toLowerCase() : value;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function canonicalizeExactValue(config: SearchFieldConfig, value: unknown): string | null {
|
|
38
|
+
switch (config.kind) {
|
|
39
|
+
case "keyword":
|
|
40
|
+
return normalizeKeywordValue(value, config.normalizer);
|
|
41
|
+
case "integer":
|
|
42
|
+
if (typeof value === "bigint") return value.toString();
|
|
43
|
+
if (typeof value === "number" && Number.isFinite(value) && Number.isInteger(value)) return String(value);
|
|
44
|
+
if (typeof value === "string" && /^-?(0|[1-9][0-9]*)$/.test(value.trim())) return String(BigInt(value.trim()));
|
|
45
|
+
return null;
|
|
46
|
+
case "float":
|
|
47
|
+
if (typeof value === "bigint") return value.toString();
|
|
48
|
+
if (typeof value === "number" && Number.isFinite(value)) return String(value);
|
|
49
|
+
if (typeof value === "string" && value.trim() !== "") {
|
|
50
|
+
const n = Number(value);
|
|
51
|
+
if (Number.isFinite(n)) return String(n);
|
|
52
|
+
}
|
|
53
|
+
return null;
|
|
54
|
+
case "date":
|
|
55
|
+
if (typeof value === "number" && Number.isFinite(value)) return String(Math.trunc(value));
|
|
56
|
+
if (typeof value === "bigint") return value.toString();
|
|
57
|
+
if (typeof value === "string" && value.trim() !== "") {
|
|
58
|
+
const parsed = Date.parse(value);
|
|
59
|
+
if (Number.isFinite(parsed)) return String(Math.trunc(parsed));
|
|
60
|
+
if (/^-?(0|[1-9][0-9]*)$/.test(value.trim())) return String(BigInt(value.trim()));
|
|
61
|
+
}
|
|
62
|
+
return null;
|
|
63
|
+
case "bool":
|
|
64
|
+
if (typeof value === "boolean") return value ? "true" : "false";
|
|
65
|
+
if (typeof value === "string") {
|
|
66
|
+
const lowered = value.trim().toLowerCase();
|
|
67
|
+
if (lowered === "true" || lowered === "false") return lowered;
|
|
68
|
+
}
|
|
69
|
+
return null;
|
|
70
|
+
default:
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export function canonicalizeColumnValue(config: SearchFieldConfig, value: unknown): bigint | number | boolean | null {
|
|
76
|
+
switch (config.kind) {
|
|
77
|
+
case "integer": {
|
|
78
|
+
const canonical = canonicalizeExactValue(config, value);
|
|
79
|
+
return canonical == null ? null : BigInt(canonical);
|
|
80
|
+
}
|
|
81
|
+
case "date": {
|
|
82
|
+
const canonical = canonicalizeExactValue(config, value);
|
|
83
|
+
return canonical == null ? null : BigInt(canonical);
|
|
84
|
+
}
|
|
85
|
+
case "float": {
|
|
86
|
+
const canonical = canonicalizeExactValue(config, value);
|
|
87
|
+
if (canonical == null) return null;
|
|
88
|
+
const parsed = Number(canonical);
|
|
89
|
+
return Number.isFinite(parsed) ? parsed : null;
|
|
90
|
+
}
|
|
91
|
+
case "bool":
|
|
92
|
+
return canonicalizeExactValue(config, value) === "true"
|
|
93
|
+
? true
|
|
94
|
+
: canonicalizeExactValue(config, value) === "false"
|
|
95
|
+
? false
|
|
96
|
+
: null;
|
|
97
|
+
default:
|
|
98
|
+
return null;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function analyzeTextValue(value: string, analyzer: SearchFieldConfig["analyzer"]): string[] {
|
|
103
|
+
if (analyzer !== "unicode_word_v1") return [];
|
|
104
|
+
const matches = value.toLowerCase().match(/[\p{L}\p{N}]+/gu);
|
|
105
|
+
return matches ? matches.filter((token) => token.length > 0) : [];
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function addRawValues(out: unknown[], value: unknown): void {
|
|
109
|
+
if (Array.isArray(value)) {
|
|
110
|
+
for (const item of value) addRawValues(out, item);
|
|
111
|
+
return;
|
|
112
|
+
}
|
|
113
|
+
out.push(value);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function extractRawSearchValuesForFieldsResult(
|
|
117
|
+
reg: SchemaRegistry,
|
|
118
|
+
offset: bigint,
|
|
119
|
+
value: unknown,
|
|
120
|
+
fieldNames: Iterable<string>
|
|
121
|
+
): Result<Map<string, unknown[]>, { message: string }> {
|
|
122
|
+
if (!reg.search) return Result.ok(new Map());
|
|
123
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
124
|
+
return Result.err({ message: "search fields require JSON object records" });
|
|
125
|
+
}
|
|
126
|
+
const version = schemaVersionForOffset(reg, offset);
|
|
127
|
+
const out = new Map<string, unknown[]>();
|
|
128
|
+
for (const fieldName of fieldNames) {
|
|
129
|
+
const config = reg.search.fields[fieldName];
|
|
130
|
+
if (!config) continue;
|
|
131
|
+
const binding = getSearchFieldBinding(config, version);
|
|
132
|
+
if (!binding) continue;
|
|
133
|
+
const resolvedRes = resolvePointerResult(value, binding.jsonPointer);
|
|
134
|
+
if (Result.isError(resolvedRes)) return Result.err({ message: resolvedRes.error.message });
|
|
135
|
+
if (!resolvedRes.value.exists) continue;
|
|
136
|
+
const values: unknown[] = [];
|
|
137
|
+
addRawValues(values, resolvedRes.value.value);
|
|
138
|
+
if (values.length > 0) out.set(fieldName, values);
|
|
139
|
+
}
|
|
140
|
+
return Result.ok(out);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
export function extractRawSearchValuesResult(
|
|
144
|
+
reg: SchemaRegistry,
|
|
145
|
+
offset: bigint,
|
|
146
|
+
value: unknown
|
|
147
|
+
): Result<Map<string, unknown[]>, { message: string }> {
|
|
148
|
+
return extractRawSearchValuesForFieldsResult(reg, offset, value, Object.keys(reg.search?.fields ?? {}));
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
export function extractSearchExactTermsResult(
|
|
152
|
+
reg: SchemaRegistry,
|
|
153
|
+
offset: bigint,
|
|
154
|
+
value: unknown
|
|
155
|
+
): Result<SearchExactTerm[], { message: string }> {
|
|
156
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
157
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
158
|
+
const out: SearchExactTerm[] = [];
|
|
159
|
+
const seen = new Set<string>();
|
|
160
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
161
|
+
const config = reg.search?.fields[fieldName];
|
|
162
|
+
if (!config?.exact) continue;
|
|
163
|
+
for (const rawValue of values) {
|
|
164
|
+
const canonical = canonicalizeExactValue(config, rawValue);
|
|
165
|
+
if (canonical == null) continue;
|
|
166
|
+
const dedupeKey = `${fieldName}\u0000${canonical}`;
|
|
167
|
+
if (seen.has(dedupeKey)) continue;
|
|
168
|
+
seen.add(dedupeKey);
|
|
169
|
+
out.push({
|
|
170
|
+
field: fieldName,
|
|
171
|
+
config,
|
|
172
|
+
canonical,
|
|
173
|
+
bytes: new TextEncoder().encode(canonical),
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
return Result.ok(out);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
export function extractSearchExactValuesResult(
|
|
181
|
+
reg: SchemaRegistry,
|
|
182
|
+
offset: bigint,
|
|
183
|
+
value: unknown
|
|
184
|
+
): Result<Map<string, string[]>, { message: string }> {
|
|
185
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
186
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
187
|
+
const out = new Map<string, string[]>();
|
|
188
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
189
|
+
const config = reg.search?.fields[fieldName];
|
|
190
|
+
if (!config) continue;
|
|
191
|
+
const exactValues: string[] = [];
|
|
192
|
+
for (const rawValue of values) {
|
|
193
|
+
const canonical = canonicalizeExactValue(config, rawValue);
|
|
194
|
+
if (canonical != null) exactValues.push(canonical);
|
|
195
|
+
}
|
|
196
|
+
if (exactValues.length > 0) out.set(fieldName, exactValues);
|
|
197
|
+
}
|
|
198
|
+
return Result.ok(out);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
export function extractSearchColumnValuesResult(
|
|
202
|
+
reg: SchemaRegistry,
|
|
203
|
+
offset: bigint,
|
|
204
|
+
value: unknown
|
|
205
|
+
): Result<Map<string, Array<bigint | number | boolean>>, { message: string }> {
|
|
206
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
207
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
208
|
+
const out = new Map<string, Array<bigint | number | boolean>>();
|
|
209
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
210
|
+
const config = reg.search?.fields[fieldName];
|
|
211
|
+
if (!config?.column) continue;
|
|
212
|
+
const colValues: Array<bigint | number | boolean> = [];
|
|
213
|
+
for (const rawValue of values) {
|
|
214
|
+
const normalized = canonicalizeColumnValue(config, rawValue);
|
|
215
|
+
if (normalized != null) colValues.push(normalized);
|
|
216
|
+
}
|
|
217
|
+
if (colValues.length > 0) out.set(fieldName, colValues);
|
|
218
|
+
}
|
|
219
|
+
return Result.ok(out);
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
export function extractSearchTextValuesResult(
|
|
223
|
+
reg: SchemaRegistry,
|
|
224
|
+
offset: bigint,
|
|
225
|
+
value: unknown
|
|
226
|
+
): Result<Map<string, string[]>, { message: string }> {
|
|
227
|
+
const rawValuesRes = extractRawSearchValuesResult(reg, offset, value);
|
|
228
|
+
if (Result.isError(rawValuesRes)) return rawValuesRes;
|
|
229
|
+
const out = new Map<string, string[]>();
|
|
230
|
+
for (const [fieldName, values] of rawValuesRes.value) {
|
|
231
|
+
const config = reg.search?.fields[fieldName];
|
|
232
|
+
if (!config) continue;
|
|
233
|
+
const textValues: string[] = [];
|
|
234
|
+
for (const rawValue of values) {
|
|
235
|
+
if (config.kind === "keyword") {
|
|
236
|
+
const normalized = normalizeKeywordValue(rawValue, config.normalizer);
|
|
237
|
+
if (normalized != null) textValues.push(normalized);
|
|
238
|
+
} else if (config.kind === "text" && typeof rawValue === "string") {
|
|
239
|
+
textValues.push(rawValue);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
if (textValues.length > 0) out.set(fieldName, textValues);
|
|
243
|
+
}
|
|
244
|
+
return Result.ok(out);
|
|
245
|
+
}
|
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
import { mkdirSync, readdirSync, statSync, unlinkSync, renameSync, existsSync, writeFileSync, readFileSync } from "node:fs";
|
|
2
|
+
import { dirname, join, relative } from "node:path";
|
|
3
|
+
import { LruCache } from "../util/lru";
|
|
4
|
+
|
|
5
|
+
export type MappedSegmentFile = {
|
|
6
|
+
objectKey: string;
|
|
7
|
+
path: string;
|
|
8
|
+
bytes: Uint8Array;
|
|
9
|
+
sizeBytes: number;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export type SegmentCacheStats = {
|
|
13
|
+
hits: number;
|
|
14
|
+
misses: number;
|
|
15
|
+
evictions: number;
|
|
16
|
+
bytesAdded: number;
|
|
17
|
+
usedBytes: number;
|
|
18
|
+
maxBytes: number;
|
|
19
|
+
entryCount: number;
|
|
20
|
+
mappedBytes: number;
|
|
21
|
+
mappedEntryCount: number;
|
|
22
|
+
pinnedEntryCount: number;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
export class SegmentDiskCache {
|
|
26
|
+
private readonly rootDir: string;
|
|
27
|
+
private readonly maxBytes: number;
|
|
28
|
+
private readonly entries = new Map<string, { path: string; size: number }>();
|
|
29
|
+
private readonly pinnedKeys = new Set<string>();
|
|
30
|
+
private readonly mappedFiles: LruCache<string, MappedSegmentFile>;
|
|
31
|
+
private totalBytes = 0;
|
|
32
|
+
private hits = 0;
|
|
33
|
+
private misses = 0;
|
|
34
|
+
private evictions = 0;
|
|
35
|
+
private bytesAdded = 0;
|
|
36
|
+
|
|
37
|
+
constructor(rootDir: string, maxBytes: number, mappedEntries = 64) {
|
|
38
|
+
this.rootDir = rootDir;
|
|
39
|
+
this.maxBytes = maxBytes;
|
|
40
|
+
this.mappedFiles = new LruCache(Math.max(1, mappedEntries));
|
|
41
|
+
if (this.maxBytes > 0) {
|
|
42
|
+
mkdirSync(this.rootDir, { recursive: true });
|
|
43
|
+
this.loadIndex();
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
private loadIndex(): void {
|
|
48
|
+
if (!existsSync(this.rootDir)) return;
|
|
49
|
+
const files: Array<{ key: string; path: string; size: number; mtimeMs: number }> = [];
|
|
50
|
+
const walk = (dir: string) => {
|
|
51
|
+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
|
|
52
|
+
const full = join(dir, entry.name);
|
|
53
|
+
if (entry.isDirectory()) {
|
|
54
|
+
walk(full);
|
|
55
|
+
} else if (entry.isFile()) {
|
|
56
|
+
const stat = statSync(full);
|
|
57
|
+
const key = relative(this.rootDir, full);
|
|
58
|
+
files.push({ key, path: full, size: stat.size, mtimeMs: stat.mtimeMs });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
};
|
|
62
|
+
walk(this.rootDir);
|
|
63
|
+
files.sort((a, b) => a.mtimeMs - b.mtimeMs);
|
|
64
|
+
for (const f of files) {
|
|
65
|
+
this.entries.set(f.key, { path: f.path, size: f.size });
|
|
66
|
+
this.totalBytes += f.size;
|
|
67
|
+
}
|
|
68
|
+
this.evictIfNeeded(0);
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
getPath(objectKey: string): string {
|
|
72
|
+
return join(this.rootDir, objectKey);
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
has(objectKey: string): boolean {
|
|
76
|
+
const exists = this.entries.has(objectKey) && existsSync(this.getPath(objectKey));
|
|
77
|
+
if (!exists) this.entries.delete(objectKey);
|
|
78
|
+
return exists;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
touch(objectKey: string): void {
|
|
82
|
+
const entry = this.entries.get(objectKey);
|
|
83
|
+
if (!entry) return;
|
|
84
|
+
this.entries.delete(objectKey);
|
|
85
|
+
this.entries.set(objectKey, entry);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
recordHit(): void {
|
|
89
|
+
this.hits += 1;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
recordMiss(): void {
|
|
93
|
+
this.misses += 1;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
get(objectKey: string): Uint8Array | null {
|
|
97
|
+
if (!this.has(objectKey)) {
|
|
98
|
+
this.recordMiss();
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
this.recordHit();
|
|
102
|
+
this.touch(objectKey);
|
|
103
|
+
const mapped = this.getMapped(objectKey);
|
|
104
|
+
if (mapped) return mapped.bytes;
|
|
105
|
+
const path = this.getPath(objectKey);
|
|
106
|
+
return readFileSync(path);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
getMapped(objectKey: string): MappedSegmentFile | null {
|
|
110
|
+
if (!this.has(objectKey)) return null;
|
|
111
|
+
const cached = this.mappedFiles.get(objectKey);
|
|
112
|
+
if (cached) {
|
|
113
|
+
this.pinnedKeys.add(objectKey);
|
|
114
|
+
this.touch(objectKey);
|
|
115
|
+
return cached;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
const path = this.getPath(objectKey);
|
|
119
|
+
let sizeBytes: number;
|
|
120
|
+
try {
|
|
121
|
+
sizeBytes = statSync(path).size;
|
|
122
|
+
} catch {
|
|
123
|
+
this.entries.delete(objectKey);
|
|
124
|
+
return null;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
let bytes: Uint8Array;
|
|
128
|
+
try {
|
|
129
|
+
bytes = (Bun as any).mmap(path, { shared: true }) as Uint8Array;
|
|
130
|
+
} catch {
|
|
131
|
+
return null;
|
|
132
|
+
}
|
|
133
|
+
if (bytes.byteLength !== sizeBytes) return null;
|
|
134
|
+
|
|
135
|
+
const mapped = { objectKey, path, bytes, sizeBytes };
|
|
136
|
+
this.mappedFiles.set(objectKey, mapped);
|
|
137
|
+
this.pinnedKeys.add(objectKey);
|
|
138
|
+
this.touch(objectKey);
|
|
139
|
+
return mapped;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
put(objectKey: string, bytes: Uint8Array): boolean {
|
|
143
|
+
if (this.maxBytes <= 0) return false;
|
|
144
|
+
const sizeBytes = bytes.byteLength;
|
|
145
|
+
if (sizeBytes > this.maxBytes) return false;
|
|
146
|
+
this.evictIfNeeded(sizeBytes);
|
|
147
|
+
const dest = this.getPath(objectKey);
|
|
148
|
+
mkdirSync(dirname(dest), { recursive: true });
|
|
149
|
+
const tmp = `${dest}.tmp-${Date.now()}`;
|
|
150
|
+
try {
|
|
151
|
+
writeFileSync(tmp, bytes);
|
|
152
|
+
renameSync(tmp, dest);
|
|
153
|
+
} catch {
|
|
154
|
+
try {
|
|
155
|
+
unlinkSync(tmp);
|
|
156
|
+
} catch {
|
|
157
|
+
// ignore
|
|
158
|
+
}
|
|
159
|
+
return false;
|
|
160
|
+
}
|
|
161
|
+
const existing = this.entries.get(objectKey);
|
|
162
|
+
if (existing) this.totalBytes = Math.max(0, this.totalBytes - existing.size);
|
|
163
|
+
this.mappedFiles.delete(objectKey);
|
|
164
|
+
this.entries.set(objectKey, { path: dest, size: sizeBytes });
|
|
165
|
+
this.totalBytes += sizeBytes;
|
|
166
|
+
this.bytesAdded += sizeBytes;
|
|
167
|
+
return true;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
putFromLocal(objectKey: string, localPath: string, sizeBytes: number): boolean {
|
|
171
|
+
if (this.maxBytes <= 0) return false;
|
|
172
|
+
if (sizeBytes > this.maxBytes) return false;
|
|
173
|
+
this.evictIfNeeded(sizeBytes);
|
|
174
|
+
const dest = this.getPath(objectKey);
|
|
175
|
+
mkdirSync(dirname(dest), { recursive: true });
|
|
176
|
+
try {
|
|
177
|
+
renameSync(localPath, dest);
|
|
178
|
+
} catch {
|
|
179
|
+
return false;
|
|
180
|
+
}
|
|
181
|
+
const existing = this.entries.get(objectKey);
|
|
182
|
+
if (existing) this.totalBytes = Math.max(0, this.totalBytes - existing.size);
|
|
183
|
+
this.mappedFiles.delete(objectKey);
|
|
184
|
+
this.entries.set(objectKey, { path: dest, size: sizeBytes });
|
|
185
|
+
this.totalBytes += sizeBytes;
|
|
186
|
+
this.bytesAdded += sizeBytes;
|
|
187
|
+
return true;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
remove(objectKey: string): void {
|
|
191
|
+
if (this.pinnedKeys.has(objectKey)) return;
|
|
192
|
+
const entry = this.entries.get(objectKey);
|
|
193
|
+
if (!entry) return;
|
|
194
|
+
try {
|
|
195
|
+
unlinkSync(entry.path);
|
|
196
|
+
} catch {
|
|
197
|
+
// ignore
|
|
198
|
+
}
|
|
199
|
+
this.totalBytes = Math.max(0, this.totalBytes - entry.size);
|
|
200
|
+
this.entries.delete(objectKey);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
private evictIfNeeded(incomingBytes: number): void {
|
|
204
|
+
while (this.totalBytes + incomingBytes > this.maxBytes && this.entries.size > 0) {
|
|
205
|
+
const oldestKey = this.entries.keys().next().value as string;
|
|
206
|
+
if (this.pinnedKeys.has(oldestKey)) {
|
|
207
|
+
let removed = false;
|
|
208
|
+
for (const candidateKey of this.entries.keys()) {
|
|
209
|
+
if (this.pinnedKeys.has(candidateKey)) continue;
|
|
210
|
+
const candidate = this.entries.get(candidateKey);
|
|
211
|
+
if (!candidate) continue;
|
|
212
|
+
try {
|
|
213
|
+
unlinkSync(candidate.path);
|
|
214
|
+
} catch {
|
|
215
|
+
// ignore
|
|
216
|
+
}
|
|
217
|
+
this.totalBytes = Math.max(0, this.totalBytes - candidate.size);
|
|
218
|
+
this.entries.delete(candidateKey);
|
|
219
|
+
this.mappedFiles.delete(candidateKey);
|
|
220
|
+
this.evictions += 1;
|
|
221
|
+
removed = true;
|
|
222
|
+
break;
|
|
223
|
+
}
|
|
224
|
+
if (!removed) break;
|
|
225
|
+
continue;
|
|
226
|
+
}
|
|
227
|
+
const entry = this.entries.get(oldestKey);
|
|
228
|
+
if (entry) {
|
|
229
|
+
try {
|
|
230
|
+
unlinkSync(entry.path);
|
|
231
|
+
} catch {
|
|
232
|
+
// ignore
|
|
233
|
+
}
|
|
234
|
+
this.totalBytes = Math.max(0, this.totalBytes - entry.size);
|
|
235
|
+
this.mappedFiles.delete(oldestKey);
|
|
236
|
+
this.evictions += 1;
|
|
237
|
+
}
|
|
238
|
+
this.entries.delete(oldestKey);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
stats(): SegmentCacheStats {
|
|
243
|
+
let mappedBytes = 0;
|
|
244
|
+
let mappedEntryCount = 0;
|
|
245
|
+
for (const mapped of this.mappedFiles.values()) {
|
|
246
|
+
mappedBytes += mapped.sizeBytes;
|
|
247
|
+
mappedEntryCount += 1;
|
|
248
|
+
}
|
|
249
|
+
return {
|
|
250
|
+
hits: this.hits,
|
|
251
|
+
misses: this.misses,
|
|
252
|
+
evictions: this.evictions,
|
|
253
|
+
bytesAdded: this.bytesAdded,
|
|
254
|
+
usedBytes: this.totalBytes,
|
|
255
|
+
maxBytes: this.maxBytes,
|
|
256
|
+
entryCount: this.entries.size,
|
|
257
|
+
mappedBytes,
|
|
258
|
+
mappedEntryCount,
|
|
259
|
+
pinnedEntryCount: this.pinnedKeys.size,
|
|
260
|
+
};
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
bytesForObjectKeyPrefix(prefix: string): number {
|
|
264
|
+
let total = 0;
|
|
265
|
+
for (const [objectKey, entry] of this.entries.entries()) {
|
|
266
|
+
if (objectKey.startsWith(prefix)) total += entry.size;
|
|
267
|
+
}
|
|
268
|
+
return total;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
import { existsSync, readFileSync } from "node:fs";
|
|
2
|
+
import type { SegmentRow } from "../db/db";
|
|
3
|
+
import type { ObjectStore } from "../objectstore/interface";
|
|
4
|
+
import type { SegmentDiskCache } from "./cache";
|
|
5
|
+
import { dsError } from "../util/ds_error";
|
|
6
|
+
import type { RetryOptions } from "../util/retry";
|
|
7
|
+
import { retry } from "../util/retry";
|
|
8
|
+
import { segmentObjectKey, streamHash16Hex } from "../util/stream_paths";
|
|
9
|
+
|
|
10
|
+
export type SegmentReadSource =
|
|
11
|
+
| { kind: "mapped"; path: string; bytes: Uint8Array }
|
|
12
|
+
| { kind: "bytes"; bytes: Uint8Array };
|
|
13
|
+
|
|
14
|
+
function readRangeFromBytes(bytes: Uint8Array, start: number, end: number): Uint8Array {
|
|
15
|
+
const boundedStart = Math.max(0, Math.min(start, bytes.byteLength));
|
|
16
|
+
const boundedEnd = Math.max(boundedStart, Math.min(end + 1, bytes.byteLength));
|
|
17
|
+
return bytes.subarray(boundedStart, boundedEnd);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function readRangeFromSource(source: SegmentReadSource, start: number, end: number): Uint8Array {
|
|
21
|
+
return readRangeFromBytes(source.bytes, start, end);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function loadSegmentSource(
|
|
25
|
+
os: ObjectStore,
|
|
26
|
+
seg: SegmentRow,
|
|
27
|
+
diskCache?: SegmentDiskCache,
|
|
28
|
+
retryOpts?: RetryOptions
|
|
29
|
+
): Promise<SegmentReadSource> {
|
|
30
|
+
if (seg.local_path && seg.local_path.length > 0 && existsSync(seg.local_path)) {
|
|
31
|
+
try {
|
|
32
|
+
const bytes = (Bun as any).mmap(seg.local_path, { shared: true }) as Uint8Array;
|
|
33
|
+
return { kind: "mapped", path: seg.local_path, bytes };
|
|
34
|
+
} catch {
|
|
35
|
+
return { kind: "bytes", bytes: readFileSync(seg.local_path) };
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const objectKey = segmentObjectKey(streamHash16Hex(seg.stream), seg.segment_index);
|
|
40
|
+
if (diskCache && diskCache.has(objectKey)) {
|
|
41
|
+
diskCache.recordHit();
|
|
42
|
+
diskCache.touch(objectKey);
|
|
43
|
+
const mapped = diskCache.getMapped(objectKey);
|
|
44
|
+
if (mapped) return { kind: "mapped", path: mapped.path, bytes: mapped.bytes };
|
|
45
|
+
const cachedPath = diskCache.getPath(objectKey);
|
|
46
|
+
if (existsSync(cachedPath)) return { kind: "bytes", bytes: readFileSync(cachedPath) };
|
|
47
|
+
diskCache.remove(objectKey);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
if (diskCache) diskCache.recordMiss();
|
|
51
|
+
|
|
52
|
+
const bytes = await retry(
|
|
53
|
+
async () => {
|
|
54
|
+
const res = await os.get(objectKey);
|
|
55
|
+
if (!res) throw dsError(`object store missing segment: ${objectKey}`);
|
|
56
|
+
return res;
|
|
57
|
+
},
|
|
58
|
+
retryOpts ?? { retries: 0, baseDelayMs: 0, maxDelayMs: 0, timeoutMs: 0 }
|
|
59
|
+
);
|
|
60
|
+
|
|
61
|
+
if (diskCache?.put(objectKey, bytes)) {
|
|
62
|
+
const mapped = diskCache.getMapped(objectKey);
|
|
63
|
+
if (mapped) return { kind: "mapped", path: mapped.path, bytes: mapped.bytes };
|
|
64
|
+
return { kind: "bytes", bytes: readFileSync(diskCache.getPath(objectKey)) };
|
|
65
|
+
}
|
|
66
|
+
return { kind: "bytes", bytes };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export async function loadSegmentBytesCached(
|
|
70
|
+
os: ObjectStore,
|
|
71
|
+
seg: SegmentRow,
|
|
72
|
+
diskCache?: SegmentDiskCache,
|
|
73
|
+
retryOpts?: RetryOptions
|
|
74
|
+
): Promise<Uint8Array> {
|
|
75
|
+
const source = await loadSegmentSource(os, seg, diskCache, retryOpts);
|
|
76
|
+
return source.bytes;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export async function readSegmentRangeCached(
|
|
80
|
+
os: ObjectStore,
|
|
81
|
+
seg: SegmentRow,
|
|
82
|
+
start: number,
|
|
83
|
+
end: number,
|
|
84
|
+
diskCache?: SegmentDiskCache,
|
|
85
|
+
retryOpts?: RetryOptions
|
|
86
|
+
): Promise<Uint8Array> {
|
|
87
|
+
const source = await loadSegmentSource(os, seg, diskCache, retryOpts);
|
|
88
|
+
return readRangeFromSource(source, start, end);
|
|
89
|
+
}
|