@tungthedev/streams-server 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +45 -0
- package/CONTRIBUTING.md +76 -0
- package/LICENSE +201 -0
- package/README.md +58 -0
- package/SECURITY.md +42 -0
- package/bin/prisma-streams-server +2 -0
- package/package.json +46 -0
- package/src/app.ts +583 -0
- package/src/app_core.ts +3144 -0
- package/src/app_local.ts +206 -0
- package/src/auth.ts +124 -0
- package/src/auto_tune.ts +69 -0
- package/src/backpressure.ts +66 -0
- package/src/bootstrap.ts +613 -0
- package/src/compute/demo_entry.ts +415 -0
- package/src/compute/demo_site.ts +1242 -0
- package/src/compute/entry.ts +19 -0
- package/src/compute/package_entry.ts +4 -0
- package/src/compute/virtual-modules.d.ts +15 -0
- package/src/compute/worker_module_url.ts +9 -0
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +402 -0
- package/src/db/bootstrap_store.ts +9 -0
- package/src/db/db.ts +2424 -0
- package/src/db/schema.ts +925 -0
- package/src/db/sqlite_manifest_snapshot.ts +81 -0
- package/src/db/sqlite_touch_store.ts +491 -0
- package/src/db/sqlite_wal_store.ts +472 -0
- package/src/details/full_mode_details.ts +568 -0
- package/src/expiry_sweeper.ts +47 -0
- package/src/foreground_activity.ts +55 -0
- package/src/hist.ts +169 -0
- package/src/index/binary_fuse.ts +379 -0
- package/src/index/indexer.ts +947 -0
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +863 -0
- package/src/index/run_cache.ts +84 -0
- package/src/index/run_format.ts +213 -0
- package/src/index/schedule.ts +28 -0
- package/src/index/secondary_indexer.ts +901 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +309 -0
- package/src/lens/lens.ts +501 -0
- package/src/manifest.ts +249 -0
- package/src/memory.ts +334 -0
- package/src/metrics.ts +147 -0
- package/src/metrics_emitter.ts +83 -0
- package/src/notifier.ts +180 -0
- package/src/objectstore/accounting.ts +151 -0
- package/src/objectstore/interface.ts +13 -0
- package/src/objectstore/mock_r2.ts +269 -0
- package/src/objectstore/null.ts +32 -0
- package/src/objectstore/r2.ts +318 -0
- package/src/observe/pairing.ts +61 -0
- package/src/observe/request.ts +772 -0
- package/src/offset.ts +70 -0
- package/src/postgres/bootstrap.ts +269 -0
- package/src/postgres/companions.ts +197 -0
- package/src/postgres/control_restore.ts +109 -0
- package/src/postgres/details.ts +189 -0
- package/src/postgres/lexicon_index.ts +260 -0
- package/src/postgres/routing_index.ts +189 -0
- package/src/postgres/rows.ts +132 -0
- package/src/postgres/schema.ts +355 -0
- package/src/postgres/secondary_index.ts +238 -0
- package/src/postgres/segments.ts +900 -0
- package/src/postgres/stats.ts +103 -0
- package/src/postgres/store.ts +947 -0
- package/src/postgres/touch.ts +591 -0
- package/src/postgres/types.ts +32 -0
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +473 -0
- package/src/profiles/generic.ts +51 -0
- package/src/profiles/index.ts +237 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +83 -0
- package/src/profiles/otelTraces/normalize.ts +955 -0
- package/src/profiles/otelTraces/otlp.ts +1002 -0
- package/src/profiles/otelTraces/schema.ts +408 -0
- package/src/profiles/otelTraces.ts +390 -0
- package/src/profiles/profile.ts +284 -0
- package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
- package/src/profiles/stateProtocol/changes.ts +24 -0
- package/src/profiles/stateProtocol/ingest.ts +115 -0
- package/src/profiles/stateProtocol/routes.ts +511 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +107 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2986 -0
- package/src/runtime/hash.ts +156 -0
- package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
- package/src/runtime/hash_vendor/NOTICE.md +8 -0
- package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +237 -0
- package/src/schema/lens_schema.ts +290 -0
- package/src/schema/proof.ts +547 -0
- package/src/schema/read_json.ts +51 -0
- package/src/schema/registry.ts +966 -0
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +409 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +327 -0
- package/src/search/companion_manager.ts +1305 -0
- package/src/search/companion_plan.ts +229 -0
- package/src/search/exact_format.ts +281 -0
- package/src/search/exact_runtime.ts +55 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +270 -0
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +403 -0
- package/src/segment/segmenter.ts +412 -0
- package/src/segment/segmenter_worker.ts +72 -0
- package/src/segment/segmenter_workers.ts +130 -0
- package/src/server.ts +264 -0
- package/src/server_auto_tune.ts +158 -0
- package/src/sqlite/adapter.ts +335 -0
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +205 -0
- package/src/store/append.ts +50 -0
- package/src/store/bootstrap_restore_store.ts +71 -0
- package/src/store/capabilities.ts +86 -0
- package/src/store/full_mode_details_store.ts +71 -0
- package/src/store/index_store.ts +104 -0
- package/src/store/profile_touch_store.ts +1 -0
- package/src/store/rows.ts +144 -0
- package/src/store/schema_profile_store.ts +73 -0
- package/src/store/schema_publication.ts +6 -0
- package/src/store/segment_manifest_store.ts +129 -0
- package/src/store/segment_read_store.ts +22 -0
- package/src/store/stats_accounting_store.ts +83 -0
- package/src/store/touch_store.ts +98 -0
- package/src/store/wal_store.ts +21 -0
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_keys.ts +158 -0
- package/src/touch/live_metrics.ts +841 -0
- package/src/touch/live_templates.ts +449 -0
- package/src/touch/manager.ts +1292 -0
- package/src/touch/process_batch.ts +576 -0
- package/src/touch/processor_worker.ts +85 -0
- package/src/touch/spec.ts +459 -0
- package/src/touch/touch_journal.ts +771 -0
- package/src/touch/touch_key_id.ts +20 -0
- package/src/touch/worker_pool.ts +191 -0
- package/src/touch/worker_protocol.ts +57 -0
- package/src/types/proper-lockfile.d.ts +1 -0
- package/src/uploader.ts +358 -0
- package/src/util/base32_crockford.ts +81 -0
- package/src/util/bloom256.ts +67 -0
- package/src/util/byte_lru.ts +73 -0
- package/src/util/cleanup.ts +22 -0
- package/src/util/crc32c.ts +29 -0
- package/src/util/ds_error.ts +15 -0
- package/src/util/duration.ts +17 -0
- package/src/util/endian.ts +53 -0
- package/src/util/json_pointer.ts +148 -0
- package/src/util/log.ts +25 -0
- package/src/util/lru.ts +53 -0
- package/src/util/retry.ts +35 -0
- package/src/util/siphash.ts +71 -0
- package/src/util/stream_paths.ts +50 -0
- package/src/util/time.ts +14 -0
- package/src/util/yield.ts +3 -0
- package/src/util/zstd.ts +24 -0
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { Result } from "better-result";
|
|
2
|
+
import { xxh32Result, type HashError } from "../runtime/hash";
|
|
3
|
+
import { dsError } from "../util/ds_error.ts";
|
|
4
|
+
|
|
5
|
+
export type TouchKeyIdError = HashError;
|
|
6
|
+
|
|
7
|
+
export function touchKeyIdFromRoutingKeyResult(key: string): Result<number, TouchKeyIdError> {
|
|
8
|
+
const s = key.trim().toLowerCase();
|
|
9
|
+
if (/^[0-9a-f]{16}$/.test(s)) {
|
|
10
|
+
// low32 of the canonical 64-bit routing key.
|
|
11
|
+
return Result.ok(Number.parseInt(s.slice(8), 16) >>> 0);
|
|
12
|
+
}
|
|
13
|
+
return xxh32Result(s);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function touchKeyIdFromRoutingKey(key: string): number {
|
|
17
|
+
const res = touchKeyIdFromRoutingKeyResult(key);
|
|
18
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
19
|
+
return res.value;
|
|
20
|
+
}
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
import { Worker } from "node:worker_threads";
|
|
2
|
+
import { Result } from "better-result";
|
|
3
|
+
import type { Config } from "../config";
|
|
4
|
+
import { detectHostRuntime } from "../runtime/host_runtime.ts";
|
|
5
|
+
import { resolveWorkerModuleUrl } from "../compute/worker_module_url";
|
|
6
|
+
import type { ProcessRequest, ProcessResult, WorkerMessage } from "./worker_protocol";
|
|
7
|
+
import { dsError } from "../util/ds_error.ts";
|
|
8
|
+
|
|
9
|
+
type Pending = {
|
|
10
|
+
resolve: (r: Result<ProcessResult, WorkerPoolProcessError>) => void;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export type WorkerPoolProcessError = {
|
|
14
|
+
kind: "worker_pool_unavailable" | "worker_pool_failure";
|
|
15
|
+
message: string;
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export class TouchProcessorWorkerPool {
|
|
19
|
+
private readonly cfg: Config;
|
|
20
|
+
private readonly workerCount: number;
|
|
21
|
+
private readonly workers: Array<{ worker: Worker; busy: boolean; currentId: number | null }> = [];
|
|
22
|
+
private started = false;
|
|
23
|
+
private generation = 0;
|
|
24
|
+
private nextId = 1;
|
|
25
|
+
private readonly pending = new Map<number, Pending>();
|
|
26
|
+
private readonly queue: Array<Omit<ProcessRequest, "type" | "id"> & { id: number }> = [];
|
|
27
|
+
|
|
28
|
+
constructor(cfg: Config, workerCount: number) {
|
|
29
|
+
this.cfg = cfg;
|
|
30
|
+
this.workerCount = Math.max(0, Math.floor(workerCount));
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
start(): void {
|
|
34
|
+
if (this.started) return;
|
|
35
|
+
this.started = true;
|
|
36
|
+
this.generation += 1;
|
|
37
|
+
const generation = this.generation;
|
|
38
|
+
for (let i = 0; i < this.workerCount; i++) this.spawnWorker(i, generation);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
async stop(): Promise<void> {
|
|
42
|
+
if (!this.started) return;
|
|
43
|
+
this.started = false;
|
|
44
|
+
this.generation += 1;
|
|
45
|
+
const workers = this.workers.slice();
|
|
46
|
+
this.workers.length = 0;
|
|
47
|
+
this.queue.length = 0;
|
|
48
|
+
for (const [id, p] of this.pending.entries()) {
|
|
49
|
+
p.resolve(Result.err({ kind: "worker_pool_failure", message: "worker pool stopped" }));
|
|
50
|
+
this.pending.delete(id);
|
|
51
|
+
}
|
|
52
|
+
// Await termination so the worker threads are actually gone before stop()
|
|
53
|
+
// resolves. Callers (the local server's close path) rely on this: a worker
|
|
54
|
+
// thread still tearing down while the host process frees other native
|
|
55
|
+
// resources -- e.g. PGlite's WebAssembly JIT pages in @prisma/dev -- races
|
|
56
|
+
// V8's process-global JIT bookkeeping and can abort the process on Linux.
|
|
57
|
+
await Promise.all(
|
|
58
|
+
workers.map((w) => {
|
|
59
|
+
try {
|
|
60
|
+
w.worker.postMessage({ type: "stop" });
|
|
61
|
+
} catch {
|
|
62
|
+
// ignore
|
|
63
|
+
}
|
|
64
|
+
return w.worker.terminate();
|
|
65
|
+
}),
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
async restart(): Promise<void> {
|
|
70
|
+
await this.stop();
|
|
71
|
+
this.start();
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
async processResult(req: Omit<ProcessRequest, "type" | "id">): Promise<Result<ProcessResult, WorkerPoolProcessError>> {
|
|
75
|
+
if (!this.started) {
|
|
76
|
+
return Result.err({ kind: "worker_pool_unavailable", message: "worker pool not started" });
|
|
77
|
+
}
|
|
78
|
+
if (this.workerCount === 0) {
|
|
79
|
+
return Result.err({ kind: "worker_pool_unavailable", message: "worker pool disabled" });
|
|
80
|
+
}
|
|
81
|
+
const id = this.nextId++;
|
|
82
|
+
const queued = { ...req, id };
|
|
83
|
+
const value = await new Promise<Result<ProcessResult, WorkerPoolProcessError>>((resolve) => {
|
|
84
|
+
this.pending.set(id, { resolve });
|
|
85
|
+
this.queue.push(queued);
|
|
86
|
+
this.pump();
|
|
87
|
+
});
|
|
88
|
+
return value;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async process(req: Omit<ProcessRequest, "type" | "id">): Promise<ProcessResult> {
|
|
92
|
+
const res = await this.processResult(req);
|
|
93
|
+
if (Result.isError(res)) throw dsError(res.error.message);
|
|
94
|
+
return res.value;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
private pump(): void {
|
|
98
|
+
if (!this.started) return;
|
|
99
|
+
if (this.queue.length === 0) return;
|
|
100
|
+
const slot = this.workers.find((w) => !w.busy);
|
|
101
|
+
if (!slot) return;
|
|
102
|
+
const next = this.queue.shift();
|
|
103
|
+
if (!next) return;
|
|
104
|
+
slot.busy = true;
|
|
105
|
+
slot.currentId = next.id;
|
|
106
|
+
slot.worker.postMessage({
|
|
107
|
+
type: "process",
|
|
108
|
+
id: next.id,
|
|
109
|
+
stream: next.stream,
|
|
110
|
+
fromOffset: next.fromOffset,
|
|
111
|
+
toOffset: next.toOffset,
|
|
112
|
+
profile: next.profile,
|
|
113
|
+
maxRows: next.maxRows,
|
|
114
|
+
maxBytes: next.maxBytes,
|
|
115
|
+
emitFineTouches: next.emitFineTouches,
|
|
116
|
+
fineTouchBudget: next.fineTouchBudget,
|
|
117
|
+
fineGranularity: next.fineGranularity,
|
|
118
|
+
processingMode: next.processingMode,
|
|
119
|
+
filterHotTemplates: next.filterHotTemplates,
|
|
120
|
+
hotTemplateIds: next.hotTemplateIds,
|
|
121
|
+
} satisfies ProcessRequest);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
private spawnWorker(idx: number, generation: number = this.generation): void {
|
|
125
|
+
const workerSpec = resolveWorkerModuleUrl(import.meta.url, "./processor_worker.ts", "../touch/processor_worker.js");
|
|
126
|
+
|
|
127
|
+
const worker = new Worker(workerSpec, {
|
|
128
|
+
workerData: { config: this.cfg, hostRuntime: detectHostRuntime() },
|
|
129
|
+
type: "module",
|
|
130
|
+
smol: true,
|
|
131
|
+
} as any);
|
|
132
|
+
|
|
133
|
+
const slot = { worker, busy: false, currentId: null };
|
|
134
|
+
this.workers.push(slot);
|
|
135
|
+
|
|
136
|
+
worker.on("message", (msg: WorkerMessage) => {
|
|
137
|
+
if (generation !== this.generation) return;
|
|
138
|
+
if (!msg || typeof msg !== "object") return;
|
|
139
|
+
if (msg.type === "result") {
|
|
140
|
+
const p = this.pending.get(msg.id);
|
|
141
|
+
if (p) {
|
|
142
|
+
this.pending.delete(msg.id);
|
|
143
|
+
slot.busy = false;
|
|
144
|
+
slot.currentId = null;
|
|
145
|
+
p.resolve(Result.ok(msg));
|
|
146
|
+
}
|
|
147
|
+
this.pump();
|
|
148
|
+
return;
|
|
149
|
+
}
|
|
150
|
+
if (msg.type === "error") {
|
|
151
|
+
const p = this.pending.get(msg.id);
|
|
152
|
+
if (p) {
|
|
153
|
+
this.pending.delete(msg.id);
|
|
154
|
+
slot.busy = false;
|
|
155
|
+
slot.currentId = null;
|
|
156
|
+
p.resolve(Result.err({ kind: "worker_pool_failure", message: msg.message }));
|
|
157
|
+
}
|
|
158
|
+
this.pump();
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
worker.on("error", (err) => {
|
|
163
|
+
if (generation !== this.generation) return;
|
|
164
|
+
// eslint-disable-next-line no-console
|
|
165
|
+
console.error(`touch processor worker ${idx} error`, err);
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
worker.on("exit", (code) => {
|
|
169
|
+
if (generation !== this.generation || !this.started) return;
|
|
170
|
+
// eslint-disable-next-line no-console
|
|
171
|
+
console.error(`touch processor worker ${idx} exited with code ${code}, respawning`);
|
|
172
|
+
if (slot.currentId != null) {
|
|
173
|
+
const p = this.pending.get(slot.currentId);
|
|
174
|
+
if (p) {
|
|
175
|
+
this.pending.delete(slot.currentId);
|
|
176
|
+
p.resolve(Result.err({ kind: "worker_pool_failure", message: "worker exited" }));
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
slot.busy = false;
|
|
180
|
+
slot.currentId = null;
|
|
181
|
+
try {
|
|
182
|
+
const widx = this.workers.indexOf(slot);
|
|
183
|
+
if (widx >= 0) this.workers.splice(widx, 1);
|
|
184
|
+
} catch {
|
|
185
|
+
// ignore
|
|
186
|
+
}
|
|
187
|
+
this.spawnWorker(idx, generation);
|
|
188
|
+
this.pump();
|
|
189
|
+
});
|
|
190
|
+
}
|
|
191
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import type { StreamProfileSpec } from "../profiles";
|
|
2
|
+
|
|
3
|
+
export type TouchRow = {
|
|
4
|
+
keyId: number;
|
|
5
|
+
routingKey?: string;
|
|
6
|
+
watermark: string; // source stream offset (base-10 string)
|
|
7
|
+
entity: string;
|
|
8
|
+
kind: "table" | "template";
|
|
9
|
+
templateId?: string;
|
|
10
|
+
};
|
|
11
|
+
|
|
12
|
+
export type ProcessRequest = {
|
|
13
|
+
type: "process";
|
|
14
|
+
id: number;
|
|
15
|
+
stream: string;
|
|
16
|
+
fromOffset: bigint;
|
|
17
|
+
toOffset: bigint;
|
|
18
|
+
profile: StreamProfileSpec;
|
|
19
|
+
maxRows: number;
|
|
20
|
+
maxBytes: number;
|
|
21
|
+
emitFineTouches?: boolean;
|
|
22
|
+
fineTouchBudget?: number | null;
|
|
23
|
+
fineGranularity?: "key" | "template";
|
|
24
|
+
processingMode?: "full" | "hotTemplatesOnly";
|
|
25
|
+
filterHotTemplates?: boolean;
|
|
26
|
+
hotTemplateIds?: string[] | null;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type ProcessResult = {
|
|
30
|
+
type: "result";
|
|
31
|
+
id: number;
|
|
32
|
+
stream: string;
|
|
33
|
+
processedThrough: bigint;
|
|
34
|
+
touches: TouchRow[];
|
|
35
|
+
stats: {
|
|
36
|
+
rowsRead: number;
|
|
37
|
+
bytesRead: number;
|
|
38
|
+
changes: number;
|
|
39
|
+
touchesEmitted: number;
|
|
40
|
+
tableTouchesEmitted: number;
|
|
41
|
+
templateTouchesEmitted: number;
|
|
42
|
+
maxSourceTsMs?: number;
|
|
43
|
+
fineTouchesDroppedDueToBudget?: number;
|
|
44
|
+
fineTouchesSuppressedDueToBudget?: boolean;
|
|
45
|
+
fineTouchesSkippedColdTemplate?: number;
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
export type ProcessError = {
|
|
50
|
+
type: "error";
|
|
51
|
+
id: number;
|
|
52
|
+
stream: string;
|
|
53
|
+
message: string;
|
|
54
|
+
stack?: string;
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
export type WorkerMessage = ProcessResult | ProcessError;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
declare module "proper-lockfile";
|
package/src/uploader.ts
ADDED
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
import { unlinkSync } from "node:fs";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { Result } from "better-result";
|
|
4
|
+
import type { Config } from "./config";
|
|
5
|
+
import type { ManifestStore, SegmentRow } from "./store/segment_manifest_store";
|
|
6
|
+
import type { ObjectStore } from "./objectstore/interface";
|
|
7
|
+
import { buildManifestResult } from "./manifest";
|
|
8
|
+
import { manifestObjectKey, segmentObjectKey, streamHash16Hex } from "./util/stream_paths";
|
|
9
|
+
import { SegmentDiskCache } from "./segment/cache";
|
|
10
|
+
import { retry } from "./util/retry";
|
|
11
|
+
import { LruCache } from "./util/lru";
|
|
12
|
+
import type { StatsCollector } from "./stats";
|
|
13
|
+
import type { BackpressureGate } from "./backpressure";
|
|
14
|
+
import { dsError } from "./util/ds_error.ts";
|
|
15
|
+
import { RuntimeMemorySampler } from "./runtime_memory_sampler";
|
|
16
|
+
|
|
17
|
+
export type UploaderController = {
|
|
18
|
+
start(): void;
|
|
19
|
+
stop(hard?: boolean): void | Promise<void>;
|
|
20
|
+
tick?: () => Promise<void>;
|
|
21
|
+
countSegmentsWaiting(): number;
|
|
22
|
+
getMemoryStats?: () => {
|
|
23
|
+
inflight_segments: number;
|
|
24
|
+
inflight_segment_bytes: number;
|
|
25
|
+
manifest_inflight_streams: number;
|
|
26
|
+
};
|
|
27
|
+
setHooks(hooks: UploaderHooks | undefined): void;
|
|
28
|
+
publishManifest(stream: string, opts?: { wait?: boolean }): Promise<void>;
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
export type UploaderHooks = {
|
|
32
|
+
onSegmentsUploaded?: (stream: string) => void;
|
|
33
|
+
onMetadataChanged?: (stream: string) => void;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
export class Uploader {
|
|
37
|
+
private readonly config: Config;
|
|
38
|
+
private readonly db: ManifestStore;
|
|
39
|
+
private readonly os: ObjectStore;
|
|
40
|
+
private readonly diskCache?: SegmentDiskCache;
|
|
41
|
+
private readonly stats?: StatsCollector;
|
|
42
|
+
private readonly gate?: BackpressureGate;
|
|
43
|
+
private readonly memorySampler?: RuntimeMemorySampler;
|
|
44
|
+
private timer: any | null = null;
|
|
45
|
+
private running = false;
|
|
46
|
+
private stopping = false;
|
|
47
|
+
private readonly inflight = new Set<string>();
|
|
48
|
+
private readonly failures = new FailureTracker(1024);
|
|
49
|
+
private hooks?: UploaderHooks;
|
|
50
|
+
private readonly manifestInflight = new Set<string>();
|
|
51
|
+
private inflightSegmentBytes = 0;
|
|
52
|
+
private pendingSegmentsWaiting = 0;
|
|
53
|
+
|
|
54
|
+
constructor(
|
|
55
|
+
config: Config,
|
|
56
|
+
db: ManifestStore,
|
|
57
|
+
os: ObjectStore,
|
|
58
|
+
diskCache?: SegmentDiskCache,
|
|
59
|
+
stats?: StatsCollector,
|
|
60
|
+
gate?: BackpressureGate,
|
|
61
|
+
hooks?: UploaderHooks,
|
|
62
|
+
memorySampler?: RuntimeMemorySampler
|
|
63
|
+
) {
|
|
64
|
+
this.config = config;
|
|
65
|
+
this.db = db;
|
|
66
|
+
this.os = os;
|
|
67
|
+
this.diskCache = diskCache;
|
|
68
|
+
this.stats = stats;
|
|
69
|
+
this.gate = gate;
|
|
70
|
+
this.hooks = hooks;
|
|
71
|
+
this.memorySampler = memorySampler;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
setHooks(hooks: UploaderHooks | undefined): void {
|
|
75
|
+
this.hooks = hooks;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
start(): void {
|
|
79
|
+
this.stopping = false;
|
|
80
|
+
if (this.timer) return;
|
|
81
|
+
if (this.config.uploadIntervalMs <= 0) return;
|
|
82
|
+
this.timer = setInterval(() => {
|
|
83
|
+
void this.tick();
|
|
84
|
+
}, this.config.uploadIntervalMs);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
async stop(hard = false): Promise<void> {
|
|
88
|
+
if (hard) this.stopping = true;
|
|
89
|
+
else this.stopping = false;
|
|
90
|
+
if (this.timer) clearInterval(this.timer);
|
|
91
|
+
this.timer = null;
|
|
92
|
+
while (this.running) {
|
|
93
|
+
await new Promise((resolve) => setTimeout(resolve, 5));
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
countSegmentsWaiting(): number {
|
|
98
|
+
return this.pendingSegmentsWaiting;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
getMemoryStats(): { inflight_segments: number; inflight_segment_bytes: number; manifest_inflight_streams: number } {
|
|
102
|
+
return {
|
|
103
|
+
inflight_segments: this.inflight.size,
|
|
104
|
+
inflight_segment_bytes: this.inflightSegmentBytes,
|
|
105
|
+
manifest_inflight_streams: this.manifestInflight.size,
|
|
106
|
+
};
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async tick(): Promise<void> {
|
|
110
|
+
if (this.stopping) return;
|
|
111
|
+
if (this.running) return;
|
|
112
|
+
this.running = true;
|
|
113
|
+
try {
|
|
114
|
+
this.pendingSegmentsWaiting = await this.db.countPendingSegments();
|
|
115
|
+
const pending = await this.db.pendingUploadHeads(1000);
|
|
116
|
+
if (pending.length === 0) return;
|
|
117
|
+
|
|
118
|
+
// Upload with bounded concurrency.
|
|
119
|
+
const queue = pending.filter((s) => !this.inflight.has(s.segment_id) && !this.failures.shouldSkip(s.stream));
|
|
120
|
+
if (queue.length === 0) return;
|
|
121
|
+
const streams = new Set(queue.map((s) => s.stream));
|
|
122
|
+
|
|
123
|
+
const workers: Promise<void>[] = [];
|
|
124
|
+
for (let i = 0; i < this.config.uploadConcurrency; i++) {
|
|
125
|
+
workers.push(this.uploadWorker(queue));
|
|
126
|
+
}
|
|
127
|
+
await Promise.all(workers);
|
|
128
|
+
|
|
129
|
+
// Notify indexer / listeners.
|
|
130
|
+
if (this.hooks?.onSegmentsUploaded) {
|
|
131
|
+
for (const stream of streams) {
|
|
132
|
+
try {
|
|
133
|
+
this.hooks.onSegmentsUploaded(stream);
|
|
134
|
+
} catch {
|
|
135
|
+
// ignore
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Publish manifests for affected streams.
|
|
141
|
+
for (const stream of streams) {
|
|
142
|
+
if (this.failures.shouldSkip(stream)) continue;
|
|
143
|
+
try {
|
|
144
|
+
await this.publishManifest(stream);
|
|
145
|
+
} catch (e) {
|
|
146
|
+
const msg = String((e as any)?.message ?? e);
|
|
147
|
+
const lower = msg.toLowerCase();
|
|
148
|
+
if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
|
|
149
|
+
// eslint-disable-next-line no-console
|
|
150
|
+
console.error("manifest publish failed", stream, e);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
} catch (e) {
|
|
155
|
+
const msg = String((e as any)?.message ?? e);
|
|
156
|
+
const lower = msg.toLowerCase();
|
|
157
|
+
if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
|
|
158
|
+
// eslint-disable-next-line no-console
|
|
159
|
+
console.error("uploader tick error", e);
|
|
160
|
+
}
|
|
161
|
+
} finally {
|
|
162
|
+
this.running = false;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
private async uploadWorker(queue: SegmentRow[]): Promise<void> {
|
|
167
|
+
while (queue.length > 0) {
|
|
168
|
+
if (this.stopping) return;
|
|
169
|
+
const seg = queue.shift();
|
|
170
|
+
if (!seg) return;
|
|
171
|
+
if (this.inflight.has(seg.segment_id)) continue;
|
|
172
|
+
this.inflight.add(seg.segment_id);
|
|
173
|
+
this.inflightSegmentBytes += Math.max(0, seg.size_bytes);
|
|
174
|
+
try {
|
|
175
|
+
try {
|
|
176
|
+
await this.uploadOne(seg);
|
|
177
|
+
this.failures.recordSuccess(seg.stream);
|
|
178
|
+
} catch (e) {
|
|
179
|
+
const msg = String((e as any)?.message ?? e);
|
|
180
|
+
const lower = msg.toLowerCase();
|
|
181
|
+
if (!this.stopping && !lower.includes("database has closed") && !lower.includes("closed database") && !lower.includes("statement has finalized")) {
|
|
182
|
+
// eslint-disable-next-line no-console
|
|
183
|
+
console.error("segment upload failed", seg.segment_id, e);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
} finally {
|
|
187
|
+
this.inflight.delete(seg.segment_id);
|
|
188
|
+
this.inflightSegmentBytes = Math.max(0, this.inflightSegmentBytes - Math.max(0, seg.size_bytes));
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
private async uploadOne(seg: SegmentRow): Promise<void> {
|
|
194
|
+
if (this.stopping) return;
|
|
195
|
+
const shash = streamHash16Hex(seg.stream);
|
|
196
|
+
const objectKey = segmentObjectKey(shash, seg.segment_index);
|
|
197
|
+
const leaveUploadPhase = this.memorySampler?.enter("upload", {
|
|
198
|
+
stream: seg.stream,
|
|
199
|
+
segment_index: seg.segment_index,
|
|
200
|
+
size_bytes: seg.size_bytes,
|
|
201
|
+
});
|
|
202
|
+
try {
|
|
203
|
+
const res = await retry(
|
|
204
|
+
async () => {
|
|
205
|
+
if (this.os.putFile) {
|
|
206
|
+
return this.os.putFile(objectKey, seg.local_path, seg.size_bytes);
|
|
207
|
+
}
|
|
208
|
+
const bytes = new Uint8Array(await readFile(seg.local_path));
|
|
209
|
+
return this.os.put(objectKey, bytes, { contentLength: seg.size_bytes });
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
retries: this.config.objectStoreRetries,
|
|
213
|
+
baseDelayMs: this.config.objectStoreBaseDelayMs,
|
|
214
|
+
maxDelayMs: this.config.objectStoreMaxDelayMs,
|
|
215
|
+
timeoutMs: this.config.objectStoreTimeoutMs,
|
|
216
|
+
}
|
|
217
|
+
);
|
|
218
|
+
await this.db.markSegmentUploaded(seg.segment_id, res.etag, this.db.nowMs());
|
|
219
|
+
this.hooks?.onMetadataChanged?.(seg.stream);
|
|
220
|
+
if (this.stats) this.stats.recordUploadedBytes(seg.size_bytes);
|
|
221
|
+
if (this.gate) this.gate.adjustOnUpload(seg.size_bytes);
|
|
222
|
+
} catch (e) {
|
|
223
|
+
this.failures.recordFailure(seg.stream);
|
|
224
|
+
throw e;
|
|
225
|
+
} finally {
|
|
226
|
+
leaveUploadPhase?.();
|
|
227
|
+
}
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
async publishManifest(stream: string, opts: { wait?: boolean } = {}): Promise<void> {
|
|
231
|
+
if (this.stopping) return;
|
|
232
|
+
while (this.manifestInflight.has(stream)) {
|
|
233
|
+
if (!opts.wait) return;
|
|
234
|
+
await new Promise((resolve) => setTimeout(resolve, 10));
|
|
235
|
+
if (this.stopping) return;
|
|
236
|
+
}
|
|
237
|
+
this.manifestInflight.add(stream);
|
|
238
|
+
let publicationToken: string | undefined;
|
|
239
|
+
let committed = false;
|
|
240
|
+
try {
|
|
241
|
+
let snapshot;
|
|
242
|
+
try {
|
|
243
|
+
snapshot = await this.db.loadManifestPublicationSnapshot(stream, { wait: opts.wait });
|
|
244
|
+
} catch (e) {
|
|
245
|
+
this.failures.recordFailure(stream);
|
|
246
|
+
throw e;
|
|
247
|
+
}
|
|
248
|
+
if (!snapshot) return;
|
|
249
|
+
publicationToken = snapshot.publicationToken;
|
|
250
|
+
const manifestRes = buildManifestResult({
|
|
251
|
+
streamName: stream,
|
|
252
|
+
streamRow: snapshot.streamRow,
|
|
253
|
+
publishedLogicalSizeBytes: snapshot.publishedLogicalSizeBytes,
|
|
254
|
+
profileJson: snapshot.profileJson,
|
|
255
|
+
segmentMeta: snapshot.segmentMeta,
|
|
256
|
+
uploadedPrefixCount: snapshot.uploadedPrefixCount,
|
|
257
|
+
generation: snapshot.generation,
|
|
258
|
+
indexState: snapshot.indexState,
|
|
259
|
+
indexRuns: snapshot.indexRuns,
|
|
260
|
+
retiredRuns: snapshot.retiredRuns,
|
|
261
|
+
secondaryIndexStates: snapshot.secondaryIndexStates,
|
|
262
|
+
secondaryIndexRuns: snapshot.secondaryIndexRuns,
|
|
263
|
+
retiredSecondaryIndexRuns: snapshot.retiredSecondaryIndexRuns,
|
|
264
|
+
lexiconIndexStates: snapshot.lexiconIndexStates,
|
|
265
|
+
lexiconIndexRuns: snapshot.lexiconIndexRuns,
|
|
266
|
+
retiredLexiconIndexRuns: snapshot.retiredLexiconIndexRuns,
|
|
267
|
+
searchCompanionPlan: snapshot.searchCompanionPlan,
|
|
268
|
+
searchSegmentCompanions: snapshot.searchSegmentCompanions,
|
|
269
|
+
});
|
|
270
|
+
if (Result.isError(manifestRes)) {
|
|
271
|
+
this.failures.recordFailure(stream);
|
|
272
|
+
throw dsError(manifestRes.error.message);
|
|
273
|
+
}
|
|
274
|
+
const manifest = manifestRes.value;
|
|
275
|
+
|
|
276
|
+
const shash = streamHash16Hex(stream);
|
|
277
|
+
const mKey = manifestObjectKey(shash);
|
|
278
|
+
const body = new TextEncoder().encode(JSON.stringify(manifest));
|
|
279
|
+
let putRes;
|
|
280
|
+
try {
|
|
281
|
+
putRes = await retry(
|
|
282
|
+
() => this.os.put(mKey, body),
|
|
283
|
+
{
|
|
284
|
+
retries: this.config.objectStoreRetries,
|
|
285
|
+
baseDelayMs: this.config.objectStoreBaseDelayMs,
|
|
286
|
+
maxDelayMs: this.config.objectStoreMaxDelayMs,
|
|
287
|
+
timeoutMs: this.config.objectStoreTimeoutMs,
|
|
288
|
+
}
|
|
289
|
+
);
|
|
290
|
+
} catch (e) {
|
|
291
|
+
this.failures.recordFailure(stream);
|
|
292
|
+
throw e;
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Commit point: advance uploaded_through and delete WAL prefix.
|
|
296
|
+
await this.db.commitManifest(stream, snapshot.generation, putRes.etag, this.db.nowMs(), snapshot.uploadedThrough, body.byteLength, publicationToken);
|
|
297
|
+
committed = true;
|
|
298
|
+
this.hooks?.onMetadataChanged?.(stream);
|
|
299
|
+
|
|
300
|
+
// Local disk cleanup: delete newly uploaded segment files.
|
|
301
|
+
if (snapshot.uploadedPrefixCount > snapshot.prevUploadedSegmentCount) {
|
|
302
|
+
for (let i = snapshot.prevUploadedSegmentCount; i < snapshot.uploadedPrefixCount; i++) {
|
|
303
|
+
const seg = await this.db.getSegmentForManifestCleanup(stream, i);
|
|
304
|
+
if (!seg) continue;
|
|
305
|
+
try {
|
|
306
|
+
const objectKey = segmentObjectKey(shash, seg.segment_index);
|
|
307
|
+
if (this.diskCache && this.diskCache.putFromLocal(objectKey, seg.local_path, seg.size_bytes)) {
|
|
308
|
+
continue;
|
|
309
|
+
}
|
|
310
|
+
unlinkSync(seg.local_path);
|
|
311
|
+
} catch {
|
|
312
|
+
// ignore
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
} finally {
|
|
317
|
+
if (publicationToken && !committed) {
|
|
318
|
+
try {
|
|
319
|
+
await this.db.releaseManifestPublication?.(publicationToken);
|
|
320
|
+
} catch {
|
|
321
|
+
// ignore release failures
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
this.manifestInflight.delete(stream);
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
class FailureTracker {
|
|
330
|
+
private readonly cache: LruCache<string, { attempts: number; untilMs: number }>;
|
|
331
|
+
|
|
332
|
+
constructor(maxEntries: number) {
|
|
333
|
+
this.cache = new LruCache(maxEntries);
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
shouldSkip(stream: string): boolean {
|
|
337
|
+
const item = this.cache.get(stream);
|
|
338
|
+
if (!item) return false;
|
|
339
|
+
if (Date.now() >= item.untilMs) {
|
|
340
|
+
this.cache.delete(stream);
|
|
341
|
+
return false;
|
|
342
|
+
}
|
|
343
|
+
return true;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
recordFailure(stream: string): void {
|
|
347
|
+
const now = Date.now();
|
|
348
|
+
const item = this.cache.get(stream) ?? { attempts: 0, untilMs: now };
|
|
349
|
+
item.attempts += 1;
|
|
350
|
+
const backoff = Math.min(60_000, 500 * 2 ** (item.attempts - 1));
|
|
351
|
+
item.untilMs = now + backoff;
|
|
352
|
+
this.cache.set(stream, item);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
recordSuccess(stream: string): void {
|
|
356
|
+
this.cache.delete(stream);
|
|
357
|
+
}
|
|
358
|
+
}
|