@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { Result } from "better-result";
|
|
3
|
+
import type { SchemaRegistry, SearchFieldConfig } from "../schema/registry";
|
|
4
|
+
import { canonicalizeExactValue, extractSearchExactTermsResult, extractSearchExactValuesResult, getSearchFieldBinding } from "../search/schema";
|
|
5
|
+
import { schemaVersionForOffset } from "../schema/read_json";
|
|
6
|
+
import { resolvePointerResult } from "../util/json_pointer";
|
|
7
|
+
|
|
8
|
+
export type SecondaryIndexField = {
|
|
9
|
+
name: string;
|
|
10
|
+
config: SearchFieldConfig;
|
|
11
|
+
};
|
|
12
|
+
|
|
13
|
+
export type SecondaryIndexTerm = {
|
|
14
|
+
index: SecondaryIndexField;
|
|
15
|
+
canonical: string;
|
|
16
|
+
bytes: Uint8Array;
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
function addRawValues(out: unknown[], value: unknown): void {
|
|
20
|
+
if (Array.isArray(value)) {
|
|
21
|
+
for (const item of value) addRawValues(out, item);
|
|
22
|
+
return;
|
|
23
|
+
}
|
|
24
|
+
out.push(value);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function getConfiguredSecondaryIndexes(registry: SchemaRegistry): SecondaryIndexField[] {
|
|
28
|
+
const search = registry.search;
|
|
29
|
+
if (!search) return [];
|
|
30
|
+
return Object.entries(search.fields)
|
|
31
|
+
.filter(([, config]) => config.exact === true)
|
|
32
|
+
.map(([name, config]) => ({ name, config }));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function hashSecondaryIndexField(index: SecondaryIndexField): string {
|
|
36
|
+
return createHash("sha256")
|
|
37
|
+
.update(
|
|
38
|
+
JSON.stringify({
|
|
39
|
+
name: index.name,
|
|
40
|
+
kind: index.config.kind,
|
|
41
|
+
bindings: index.config.bindings,
|
|
42
|
+
normalizer: index.config.normalizer ?? null,
|
|
43
|
+
analyzer: index.config.analyzer ?? null,
|
|
44
|
+
exact: index.config.exact === true,
|
|
45
|
+
})
|
|
46
|
+
)
|
|
47
|
+
.digest("hex");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function canonicalizeSecondaryIndexValue(config: SearchFieldConfig, value: unknown): string | null {
|
|
51
|
+
return canonicalizeExactValue(config, value);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export function extractSecondaryIndexTermsResult(
|
|
55
|
+
registry: SchemaRegistry,
|
|
56
|
+
offset: bigint,
|
|
57
|
+
value: unknown
|
|
58
|
+
): Result<SecondaryIndexTerm[], { message: string }> {
|
|
59
|
+
const termsRes = extractSearchExactTermsResult(registry, offset, value);
|
|
60
|
+
if (Result.isError(termsRes)) return termsRes;
|
|
61
|
+
return Result.ok(
|
|
62
|
+
termsRes.value.map((term) => ({
|
|
63
|
+
index: { name: term.field, config: term.config },
|
|
64
|
+
canonical: term.canonical,
|
|
65
|
+
bytes: term.bytes,
|
|
66
|
+
}))
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function extractSecondaryIndexValuesResult(
|
|
71
|
+
registry: SchemaRegistry,
|
|
72
|
+
offset: bigint,
|
|
73
|
+
value: unknown
|
|
74
|
+
): Result<Map<string, string[]>, { message: string }> {
|
|
75
|
+
return extractSearchExactValuesResult(registry, offset, value);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function extractSecondaryIndexValuesForFieldResult(
|
|
79
|
+
registry: SchemaRegistry,
|
|
80
|
+
offset: bigint,
|
|
81
|
+
value: unknown,
|
|
82
|
+
index: SecondaryIndexField
|
|
83
|
+
): Result<string[], { message: string }> {
|
|
84
|
+
if (!value || typeof value !== "object" || Array.isArray(value)) {
|
|
85
|
+
return Result.err({ message: "search fields require JSON object records" });
|
|
86
|
+
}
|
|
87
|
+
const version = schemaVersionForOffset(registry, offset);
|
|
88
|
+
const binding = getSearchFieldBinding(index.config, version);
|
|
89
|
+
if (!binding) return Result.ok([]);
|
|
90
|
+
const resolvedRes = resolvePointerResult(value, binding.jsonPointer);
|
|
91
|
+
if (Result.isError(resolvedRes)) return Result.err({ message: resolvedRes.error.message });
|
|
92
|
+
if (!resolvedRes.value.exists) return Result.ok([]);
|
|
93
|
+
|
|
94
|
+
const rawValues: unknown[] = [];
|
|
95
|
+
addRawValues(rawValues, resolvedRes.value.value);
|
|
96
|
+
const out: string[] = [];
|
|
97
|
+
const seen = new Set<string>();
|
|
98
|
+
for (const rawValue of rawValues) {
|
|
99
|
+
const canonical = canonicalizeExactValue(index.config, rawValue);
|
|
100
|
+
if (canonical == null || seen.has(canonical)) continue;
|
|
101
|
+
seen.add(canonical);
|
|
102
|
+
out.push(canonical);
|
|
103
|
+
}
|
|
104
|
+
return Result.ok(out);
|
|
105
|
+
}
|
package/src/ingest.ts
CHANGED
|
@@ -3,7 +3,6 @@ import type { SqliteDurableStore } from "./db/db";
|
|
|
3
3
|
import { STREAM_FLAG_DELETED } from "./db/db";
|
|
4
4
|
import type { StatsCollector } from "./stats";
|
|
5
5
|
import type { BackpressureGate } from "./backpressure";
|
|
6
|
-
import type { MemoryGuard } from "./memory";
|
|
7
6
|
import type { Metrics } from "./metrics";
|
|
8
7
|
import { Result } from "better-result";
|
|
9
8
|
|
|
@@ -55,7 +54,6 @@ export class IngestQueue {
|
|
|
55
54
|
private readonly db: SqliteDurableStore;
|
|
56
55
|
private readonly stats?: StatsCollector;
|
|
57
56
|
private readonly gate?: BackpressureGate;
|
|
58
|
-
private readonly memory?: MemoryGuard;
|
|
59
57
|
private readonly metrics?: Metrics;
|
|
60
58
|
private readonly q: AppendTask[] = [];
|
|
61
59
|
private timer: any | null = null;
|
|
@@ -73,12 +71,11 @@ export class IngestQueue {
|
|
|
73
71
|
upsertProducerState: any;
|
|
74
72
|
};
|
|
75
73
|
|
|
76
|
-
constructor(cfg: Config, db: SqliteDurableStore, stats?: StatsCollector, gate?: BackpressureGate,
|
|
74
|
+
constructor(cfg: Config, db: SqliteDurableStore, stats?: StatsCollector, gate?: BackpressureGate, metrics?: Metrics) {
|
|
77
75
|
this.cfg = cfg;
|
|
78
76
|
this.db = db;
|
|
79
77
|
this.stats = stats;
|
|
80
78
|
this.gate = gate;
|
|
81
|
-
this.memory = memory;
|
|
82
79
|
this.metrics = metrics;
|
|
83
80
|
|
|
84
81
|
this.stmts = {
|
|
@@ -95,6 +92,7 @@ export class IngestQueue {
|
|
|
95
92
|
`UPDATE streams
|
|
96
93
|
SET next_offset=?, updated_at_ms=?, last_append_ms=?,
|
|
97
94
|
pending_rows=pending_rows+?, pending_bytes=pending_bytes+?,
|
|
95
|
+
logical_size_bytes=logical_size_bytes+?,
|
|
98
96
|
wal_rows=wal_rows+?, wal_bytes=wal_bytes+?,
|
|
99
97
|
stream_seq=?,
|
|
100
98
|
closed=CASE WHEN ? THEN 1 ELSE closed END,
|
|
@@ -149,14 +147,6 @@ export class IngestQueue {
|
|
|
149
147
|
close?: boolean;
|
|
150
148
|
}, opts?: { bypassBackpressure?: boolean; priority?: "high" | "normal" }): Promise<AppendResult> {
|
|
151
149
|
const bytes = args.rows.reduce((acc, r) => acc + r.payload.byteLength, 0);
|
|
152
|
-
if (this.memory && !this.memory.shouldAllow()) {
|
|
153
|
-
this.memory.maybeGc("memory limit");
|
|
154
|
-
if (!opts?.bypassBackpressure) {
|
|
155
|
-
this.memory.maybeHeapSnapshot("memory limit");
|
|
156
|
-
if (this.metrics) this.metrics.record("tieredstore.backpressure.over_limit", 1, "count", { reason: "memory" });
|
|
157
|
-
return Promise.resolve(Result.err({ kind: "overloaded" }));
|
|
158
|
-
}
|
|
159
|
-
}
|
|
160
150
|
if (!opts?.bypassBackpressure) {
|
|
161
151
|
if (this.q.length >= this.cfg.ingestMaxQueueRequests || this.queuedBytes + bytes > this.cfg.ingestMaxQueueBytes) {
|
|
162
152
|
if (this.metrics) this.metrics.record("tieredstore.backpressure.over_limit", 1, "count", { reason: "queue" });
|
|
@@ -208,6 +198,13 @@ export class IngestQueue {
|
|
|
208
198
|
return { requests: this.q.length, bytes: this.queuedBytes };
|
|
209
199
|
}
|
|
210
200
|
|
|
201
|
+
getMemoryStats(): { queuedPayloadBytes: number; queuedRequests: number } {
|
|
202
|
+
return {
|
|
203
|
+
queuedPayloadBytes: this.queuedBytes,
|
|
204
|
+
queuedRequests: this.q.length,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
|
|
211
208
|
isQueueFull(): boolean {
|
|
212
209
|
return this.q.length >= this.cfg.ingestMaxQueueRequests || this.queuedBytes >= this.cfg.ingestMaxQueueBytes;
|
|
213
210
|
}
|
|
@@ -560,6 +557,7 @@ export class IngestQueue {
|
|
|
560
557
|
st.lastAppendMs,
|
|
561
558
|
BigInt(task.rows.length),
|
|
562
559
|
totalBytes,
|
|
560
|
+
totalBytes,
|
|
563
561
|
BigInt(task.rows.length),
|
|
564
562
|
totalBytes,
|
|
565
563
|
st.streamSeq,
|
package/src/manifest.ts
CHANGED
|
@@ -1,6 +1,17 @@
|
|
|
1
1
|
import { zstdCompressSync } from "node:zlib";
|
|
2
2
|
import { Result } from "better-result";
|
|
3
|
-
import type {
|
|
3
|
+
import type {
|
|
4
|
+
IndexRunRow,
|
|
5
|
+
IndexStateRow,
|
|
6
|
+
LexiconIndexRunRow,
|
|
7
|
+
LexiconIndexStateRow,
|
|
8
|
+
SearchCompanionPlanRow,
|
|
9
|
+
SearchSegmentCompanionRow,
|
|
10
|
+
SecondaryIndexRunRow,
|
|
11
|
+
SecondaryIndexStateRow,
|
|
12
|
+
SegmentMetaRow,
|
|
13
|
+
StreamRow,
|
|
14
|
+
} from "./db/db";
|
|
4
15
|
import { encodeOffsetResult } from "./offset";
|
|
5
16
|
import { dsError } from "./util/ds_error.ts";
|
|
6
17
|
|
|
@@ -12,6 +23,15 @@ function compressB64(bytes: Uint8Array): string {
|
|
|
12
23
|
return b64(new Uint8Array(zstdCompressSync(bytes)));
|
|
13
24
|
}
|
|
14
25
|
|
|
26
|
+
function parseSectionsJson(raw: string): string[] {
|
|
27
|
+
try {
|
|
28
|
+
const parsed = JSON.parse(raw);
|
|
29
|
+
return Array.isArray(parsed) ? parsed.filter((value): value is string => typeof value === "string") : [];
|
|
30
|
+
} catch {
|
|
31
|
+
return [];
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
15
35
|
export type ManifestJson = Record<string, any>;
|
|
16
36
|
export type ManifestBuildError = { kind: "invalid_manifest"; message: string };
|
|
17
37
|
|
|
@@ -22,16 +42,37 @@ function invalidManifest<T = never>(message: string): Result<T, ManifestBuildErr
|
|
|
22
42
|
type BuildManifestArgs = {
|
|
23
43
|
streamName: string;
|
|
24
44
|
streamRow: StreamRow;
|
|
45
|
+
publishedLogicalSizeBytes: bigint;
|
|
46
|
+
profileJson?: Record<string, any> | null;
|
|
25
47
|
segmentMeta: SegmentMetaRow;
|
|
26
48
|
uploadedPrefixCount: number;
|
|
27
49
|
generation: number;
|
|
28
50
|
indexState?: IndexStateRow | null;
|
|
29
51
|
indexRuns?: IndexRunRow[];
|
|
30
52
|
retiredRuns?: IndexRunRow[];
|
|
53
|
+
secondaryIndexStates?: SecondaryIndexStateRow[];
|
|
54
|
+
secondaryIndexRuns?: SecondaryIndexRunRow[];
|
|
55
|
+
retiredSecondaryIndexRuns?: SecondaryIndexRunRow[];
|
|
56
|
+
lexiconIndexStates?: LexiconIndexStateRow[];
|
|
57
|
+
lexiconIndexRuns?: LexiconIndexRunRow[];
|
|
58
|
+
retiredLexiconIndexRuns?: LexiconIndexRunRow[];
|
|
59
|
+
searchCompanionPlan?: SearchCompanionPlanRow | null;
|
|
60
|
+
searchSegmentCompanions?: SearchSegmentCompanionRow[];
|
|
31
61
|
};
|
|
32
62
|
|
|
33
63
|
export function buildManifestResult(args: BuildManifestArgs): Result<ManifestJson, ManifestBuildError> {
|
|
34
|
-
const {
|
|
64
|
+
const {
|
|
65
|
+
streamName,
|
|
66
|
+
streamRow,
|
|
67
|
+
publishedLogicalSizeBytes,
|
|
68
|
+
profileJson,
|
|
69
|
+
segmentMeta,
|
|
70
|
+
uploadedPrefixCount,
|
|
71
|
+
generation,
|
|
72
|
+
indexState,
|
|
73
|
+
indexRuns,
|
|
74
|
+
retiredRuns,
|
|
75
|
+
} = args;
|
|
35
76
|
|
|
36
77
|
const createdAt = new Date(Number(streamRow.created_at_ms)).toISOString();
|
|
37
78
|
const expiresAt = streamRow.expires_at_ms == null ? null : new Date(Number(streamRow.expires_at_ms)).toISOString();
|
|
@@ -59,28 +100,111 @@ export function buildManifestResult(args: BuildManifestArgs): Result<ManifestJso
|
|
|
59
100
|
start_segment: r.start_segment,
|
|
60
101
|
end_segment: r.end_segment,
|
|
61
102
|
object_key: r.object_key,
|
|
103
|
+
size_bytes: r.size_bytes,
|
|
62
104
|
filter_len: r.filter_len,
|
|
63
105
|
record_count: r.record_count,
|
|
64
106
|
})) ?? [];
|
|
65
107
|
const retired = retiredRuns?.map((r) => ({
|
|
66
108
|
run_id: r.run_id,
|
|
67
109
|
level: r.level,
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
110
|
+
start_segment: r.start_segment,
|
|
111
|
+
end_segment: r.end_segment,
|
|
112
|
+
object_key: r.object_key,
|
|
113
|
+
size_bytes: r.size_bytes,
|
|
114
|
+
filter_len: r.filter_len,
|
|
115
|
+
record_count: r.record_count,
|
|
116
|
+
retired_gen: r.retired_gen ?? undefined,
|
|
74
117
|
retired_at_unix: r.retired_at_ms != null ? Number(r.retired_at_ms / 1000n) : undefined,
|
|
75
118
|
})) ?? [];
|
|
76
119
|
const indexSecret = indexState?.index_secret ? b64(indexState.index_secret) : "";
|
|
77
120
|
const indexedThrough = indexState?.indexed_through ?? 0;
|
|
121
|
+
const secondaryIndexes: Record<string, any> = {};
|
|
122
|
+
const secondaryStates = args.secondaryIndexStates ?? [];
|
|
123
|
+
const secondaryRuns = args.secondaryIndexRuns ?? [];
|
|
124
|
+
const retiredSecondaryRuns = args.retiredSecondaryIndexRuns ?? [];
|
|
125
|
+
for (const state of secondaryStates) {
|
|
126
|
+
secondaryIndexes[state.index_name] = {
|
|
127
|
+
index_secret: b64(state.index_secret),
|
|
128
|
+
config_hash: state.config_hash,
|
|
129
|
+
indexed_through: state.indexed_through,
|
|
130
|
+
active_runs: secondaryRuns
|
|
131
|
+
.filter((run) => run.index_name === state.index_name)
|
|
132
|
+
.map((run) => ({
|
|
133
|
+
run_id: run.run_id,
|
|
134
|
+
level: run.level,
|
|
135
|
+
start_segment: run.start_segment,
|
|
136
|
+
end_segment: run.end_segment,
|
|
137
|
+
object_key: run.object_key,
|
|
138
|
+
size_bytes: run.size_bytes,
|
|
139
|
+
filter_len: run.filter_len,
|
|
140
|
+
record_count: run.record_count,
|
|
141
|
+
})),
|
|
142
|
+
retired_runs: retiredSecondaryRuns
|
|
143
|
+
.filter((run) => run.index_name === state.index_name)
|
|
144
|
+
.map((run) => ({
|
|
145
|
+
run_id: run.run_id,
|
|
146
|
+
level: run.level,
|
|
147
|
+
start_segment: run.start_segment,
|
|
148
|
+
end_segment: run.end_segment,
|
|
149
|
+
object_key: run.object_key,
|
|
150
|
+
size_bytes: run.size_bytes,
|
|
151
|
+
filter_len: run.filter_len,
|
|
152
|
+
record_count: run.record_count,
|
|
153
|
+
retired_gen: run.retired_gen ?? undefined,
|
|
154
|
+
retired_at_unix: run.retired_at_ms != null ? Number(run.retired_at_ms / 1000n) : undefined,
|
|
155
|
+
})),
|
|
156
|
+
};
|
|
157
|
+
}
|
|
158
|
+
const lexiconIndexes = (args.lexiconIndexStates ?? []).map((state) => ({
|
|
159
|
+
source_kind: state.source_kind,
|
|
160
|
+
source_name: state.source_name,
|
|
161
|
+
indexed_through: state.indexed_through,
|
|
162
|
+
active_runs: (args.lexiconIndexRuns ?? [])
|
|
163
|
+
.filter((run) => run.source_kind === state.source_kind && run.source_name === state.source_name)
|
|
164
|
+
.map((run) => ({
|
|
165
|
+
run_id: run.run_id,
|
|
166
|
+
level: run.level,
|
|
167
|
+
start_segment: run.start_segment,
|
|
168
|
+
end_segment: run.end_segment,
|
|
169
|
+
object_key: run.object_key,
|
|
170
|
+
size_bytes: run.size_bytes,
|
|
171
|
+
record_count: run.record_count,
|
|
172
|
+
})),
|
|
173
|
+
retired_runs: (args.retiredLexiconIndexRuns ?? [])
|
|
174
|
+
.filter((run) => run.source_kind === state.source_kind && run.source_name === state.source_name)
|
|
175
|
+
.map((run) => ({
|
|
176
|
+
run_id: run.run_id,
|
|
177
|
+
level: run.level,
|
|
178
|
+
start_segment: run.start_segment,
|
|
179
|
+
end_segment: run.end_segment,
|
|
180
|
+
object_key: run.object_key,
|
|
181
|
+
size_bytes: run.size_bytes,
|
|
182
|
+
record_count: run.record_count,
|
|
183
|
+
retired_gen: run.retired_gen ?? undefined,
|
|
184
|
+
retired_at_unix: run.retired_at_ms != null ? Number(run.retired_at_ms / 1000n) : undefined,
|
|
185
|
+
})),
|
|
186
|
+
}));
|
|
187
|
+
const searchCompanionPlan = args.searchCompanionPlan ?? null;
|
|
188
|
+
const searchCompanionSegments = (args.searchSegmentCompanions ?? [])
|
|
189
|
+
.filter((segment) => segment.segment_index < prefix)
|
|
190
|
+
.map((segment) => ({
|
|
191
|
+
segment_index: segment.segment_index,
|
|
192
|
+
object_key: segment.object_key,
|
|
193
|
+
size_bytes: segment.size_bytes,
|
|
194
|
+
plan_generation: segment.plan_generation,
|
|
195
|
+
primary_timestamp_min_ms: segment.primary_timestamp_min_ms?.toString() ?? undefined,
|
|
196
|
+
primary_timestamp_max_ms: segment.primary_timestamp_max_ms?.toString() ?? undefined,
|
|
197
|
+
sections: parseSectionsJson(segment.sections_json),
|
|
198
|
+
section_sizes: JSON.parse(segment.section_sizes_json || "{}"),
|
|
199
|
+
}));
|
|
78
200
|
|
|
79
201
|
return Result.ok({
|
|
80
202
|
name: streamName,
|
|
81
203
|
created_at: createdAt,
|
|
82
204
|
expires_at: expiresAt,
|
|
83
205
|
content_type: streamRow.content_type,
|
|
206
|
+
profile: streamRow.profile ?? "generic",
|
|
207
|
+
profile_json: profileJson ?? null,
|
|
84
208
|
stream_seq: streamRow.stream_seq ?? null,
|
|
85
209
|
closed: streamRow.closed,
|
|
86
210
|
closed_producer_id: streamRow.closed_producer_id ?? null,
|
|
@@ -92,6 +216,7 @@ export function buildManifestResult(args: BuildManifestArgs): Result<ManifestJso
|
|
|
92
216
|
epoch: streamRow.epoch,
|
|
93
217
|
next_offset: nextOffsetNum,
|
|
94
218
|
next_offset_encoded: nextOffsetEncoded,
|
|
219
|
+
logical_size_bytes: publishedLogicalSizeBytes.toString(),
|
|
95
220
|
segment_count: prefix,
|
|
96
221
|
uploaded_through: prefix,
|
|
97
222
|
active_file_offset: nextOffsetNum,
|
|
@@ -104,6 +229,16 @@ export function buildManifestResult(args: BuildManifestArgs): Result<ManifestJso
|
|
|
104
229
|
index_secret: indexSecret,
|
|
105
230
|
active_runs: activeRuns,
|
|
106
231
|
retired_runs: retired,
|
|
232
|
+
secondary_indexes: secondaryIndexes,
|
|
233
|
+
lexicon_indexes: lexiconIndexes,
|
|
234
|
+
search_companions: searchCompanionPlan
|
|
235
|
+
? {
|
|
236
|
+
generation: searchCompanionPlan.generation,
|
|
237
|
+
plan_hash: searchCompanionPlan.plan_hash,
|
|
238
|
+
plan_json: JSON.parse(searchCompanionPlan.plan_json),
|
|
239
|
+
segments: searchCompanionSegments,
|
|
240
|
+
}
|
|
241
|
+
: null,
|
|
107
242
|
});
|
|
108
243
|
}
|
|
109
244
|
|
package/src/memory.ts
CHANGED
|
@@ -1,6 +1,36 @@
|
|
|
1
|
-
|
|
1
|
+
import { execFileSync } from "node:child_process";
|
|
2
|
+
import { readFileSync } from "node:fs";
|
|
3
|
+
import os from "node:os";
|
|
4
|
+
|
|
5
|
+
const HOST_MEMORY_GUARD_FRACTION = 0.7;
|
|
6
|
+
const HOST_MEMORY_HEADROOM_FRACTION = 0.15;
|
|
7
|
+
const HOST_MEMORY_HEADROOM_MIN_BYTES = 512 * 1024 * 1024;
|
|
8
|
+
const HOST_MEMORY_HEADROOM_MAX_BYTES = 2 * 1024 * 1024 * 1024;
|
|
9
|
+
|
|
10
|
+
export function deriveMemoryPressureLimitBytes(requestedLimitBytes: number, hostTotalBytes = os.totalmem()): number {
|
|
11
|
+
const requested = Math.max(0, Math.floor(requestedLimitBytes));
|
|
12
|
+
if (requested <= 0) return 0;
|
|
13
|
+
if (!Number.isFinite(hostTotalBytes) || hostTotalBytes <= 0) return requested;
|
|
14
|
+
const safeHostCap = Math.max(256 * 1024 * 1024, Math.floor(hostTotalBytes * HOST_MEMORY_GUARD_FRACTION));
|
|
15
|
+
return Math.min(requested, safeHostCap);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export function deriveMemoryPressureHeadroomBytes(limitBytes: number, hostTotalBytes = os.totalmem()): number {
|
|
19
|
+
const limit = Math.max(0, Math.floor(limitBytes));
|
|
20
|
+
if (limit <= 0) return 0;
|
|
21
|
+
if (!Number.isFinite(hostTotalBytes) || hostTotalBytes <= 0) {
|
|
22
|
+
return Math.min(limit, HOST_MEMORY_HEADROOM_MIN_BYTES);
|
|
23
|
+
}
|
|
24
|
+
const headroomFromHost = Math.floor(hostTotalBytes * HOST_MEMORY_HEADROOM_FRACTION);
|
|
25
|
+
const headroom = Math.max(HOST_MEMORY_HEADROOM_MIN_BYTES, headroomFromHost);
|
|
26
|
+
return Math.min(limit, Math.min(HOST_MEMORY_HEADROOM_MAX_BYTES, headroom));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export class MemoryPressureMonitor {
|
|
2
30
|
private readonly limitBytes: number;
|
|
3
31
|
private readonly resumeBytes: number;
|
|
32
|
+
private readonly hostHeadroomBytes: number;
|
|
33
|
+
private readonly hostResumeHeadroomBytes: number;
|
|
4
34
|
private readonly intervalMs: number;
|
|
5
35
|
private readonly onSample?: (rssBytes: number, overLimit: boolean, limitBytes: number) => void;
|
|
6
36
|
private readonly heapSnapshotPath?: string;
|
|
@@ -10,7 +40,16 @@ export class MemoryGuard {
|
|
|
10
40
|
private maxRssBytes = 0;
|
|
11
41
|
private lastRssBytes = 0;
|
|
12
42
|
private lastGcMs = 0;
|
|
43
|
+
private forcedGcCount = 0;
|
|
44
|
+
private forcedGcReclaimedBytesTotal = 0;
|
|
45
|
+
private lastForcedGcAtMs = 0;
|
|
46
|
+
private lastForcedGcBeforeBytes = 0;
|
|
47
|
+
private lastForcedGcAfterBytes = 0;
|
|
48
|
+
private lastForcedGcReclaimedBytes = 0;
|
|
13
49
|
private lastSnapshotMs = 0;
|
|
50
|
+
private heapSnapshotsWritten = 0;
|
|
51
|
+
private lastDarwinPhysicalBytes = 0;
|
|
52
|
+
private lastDarwinPhysicalAtMs = 0;
|
|
14
53
|
|
|
15
54
|
constructor(
|
|
16
55
|
limitBytes: number,
|
|
@@ -22,11 +61,20 @@ export class MemoryGuard {
|
|
|
22
61
|
heapSnapshotMinIntervalMs?: number;
|
|
23
62
|
} = {}
|
|
24
63
|
) {
|
|
25
|
-
|
|
64
|
+
const requestedLimitBytes = Math.max(0, Math.floor(limitBytes));
|
|
65
|
+
this.limitBytes = deriveMemoryPressureLimitBytes(requestedLimitBytes);
|
|
66
|
+
if (requestedLimitBytes > 0 && this.limitBytes < requestedLimitBytes) {
|
|
67
|
+
// eslint-disable-next-line no-console
|
|
68
|
+
console.warn(
|
|
69
|
+
`[memory] clamped limit from ${formatBytes(requestedLimitBytes)} to ${formatBytes(this.limitBytes)} based on host memory`
|
|
70
|
+
);
|
|
71
|
+
}
|
|
26
72
|
// Resume as soon as RSS drops back below the limit by default (no hysteresis),
|
|
27
73
|
// so the server doesn't "deadlock" itself under a stable high-water mark.
|
|
28
74
|
const resumeFraction = Math.min(1.0, Math.max(0.5, opts.resumeFraction ?? 1.0));
|
|
29
75
|
this.resumeBytes = Math.floor(this.limitBytes * resumeFraction);
|
|
76
|
+
this.hostHeadroomBytes = deriveMemoryPressureHeadroomBytes(this.limitBytes);
|
|
77
|
+
this.hostResumeHeadroomBytes = Math.floor(this.hostHeadroomBytes * 1.25);
|
|
30
78
|
this.intervalMs = Math.max(50, opts.intervalMs ?? 1000);
|
|
31
79
|
this.onSample = opts.onSample;
|
|
32
80
|
this.heapSnapshotPath = opts.heapSnapshotPath;
|
|
@@ -46,10 +94,13 @@ export class MemoryGuard {
|
|
|
46
94
|
|
|
47
95
|
private sample(): void {
|
|
48
96
|
const rss = process.memoryUsage().rss;
|
|
97
|
+
const effectiveBytes = this.effectiveBytesForGuard(rss);
|
|
98
|
+
const hostAvailableBytes = readHostAvailableMemoryBytes();
|
|
49
99
|
this.lastRssBytes = rss;
|
|
50
100
|
if (rss > this.maxRssBytes) this.maxRssBytes = rss;
|
|
101
|
+
const hostLowMemory = this.hostHeadroomBytes > 0 && hostAvailableBytes <= this.hostHeadroomBytes;
|
|
102
|
+
const overLimit = this.limitBytes > 0 && (effectiveBytes > this.limitBytes || hostLowMemory);
|
|
51
103
|
if (this.onSample) {
|
|
52
|
-
const overLimit = this.limitBytes > 0 && rss > this.limitBytes;
|
|
53
104
|
try {
|
|
54
105
|
this.onSample(rss, overLimit, this.limitBytes);
|
|
55
106
|
} catch {
|
|
@@ -57,16 +108,35 @@ export class MemoryGuard {
|
|
|
57
108
|
}
|
|
58
109
|
}
|
|
59
110
|
if (this.limitBytes <= 0) return;
|
|
111
|
+
if (overLimit) {
|
|
112
|
+
this.maybeGc(hostLowMemory ? "host memory headroom" : "memory sample");
|
|
113
|
+
this.maybeHeapSnapshot(hostLowMemory ? "host memory headroom" : "memory sample");
|
|
114
|
+
}
|
|
60
115
|
if (this.overLimit) {
|
|
61
|
-
if (
|
|
62
|
-
} else if (
|
|
116
|
+
if (effectiveBytes <= this.resumeBytes && hostAvailableBytes > this.hostResumeHeadroomBytes) this.overLimit = false;
|
|
117
|
+
} else if (effectiveBytes > this.limitBytes) {
|
|
118
|
+
this.overLimit = true;
|
|
119
|
+
} else if (hostLowMemory) {
|
|
63
120
|
this.overLimit = true;
|
|
64
121
|
}
|
|
65
122
|
}
|
|
66
123
|
|
|
67
|
-
|
|
68
|
-
if (this.limitBytes <= 0) return
|
|
69
|
-
return
|
|
124
|
+
private effectiveBytesForGuard(rssBytes: number): number {
|
|
125
|
+
if (this.limitBytes <= 0 || rssBytes <= this.limitBytes) return rssBytes;
|
|
126
|
+
if (process.platform !== "darwin") return rssBytes;
|
|
127
|
+
const now = Date.now();
|
|
128
|
+
if (this.lastDarwinPhysicalAtMs !== 0 && now - this.lastDarwinPhysicalAtMs < 5_000) {
|
|
129
|
+
return this.lastDarwinPhysicalBytes > 0 ? this.lastDarwinPhysicalBytes : this.limitBytes;
|
|
130
|
+
}
|
|
131
|
+
this.lastDarwinPhysicalAtMs = now;
|
|
132
|
+
const physicalBytes = readDarwinTopMemBytes(process.pid);
|
|
133
|
+
if (physicalBytes != null) {
|
|
134
|
+
this.lastDarwinPhysicalBytes = physicalBytes;
|
|
135
|
+
return physicalBytes;
|
|
136
|
+
}
|
|
137
|
+
if (this.lastDarwinPhysicalBytes > 0) return this.lastDarwinPhysicalBytes;
|
|
138
|
+
this.lastDarwinPhysicalBytes = this.limitBytes;
|
|
139
|
+
return this.lastDarwinPhysicalBytes;
|
|
70
140
|
}
|
|
71
141
|
|
|
72
142
|
isOverLimit(): boolean {
|
|
@@ -91,6 +161,28 @@ export class MemoryGuard {
|
|
|
91
161
|
return this.limitBytes;
|
|
92
162
|
}
|
|
93
163
|
|
|
164
|
+
getGcStats(): {
|
|
165
|
+
forced_gc_count: number;
|
|
166
|
+
forced_gc_reclaimed_bytes_total: number;
|
|
167
|
+
last_forced_gc_at_ms: number | null;
|
|
168
|
+
last_forced_gc_before_bytes: number | null;
|
|
169
|
+
last_forced_gc_after_bytes: number | null;
|
|
170
|
+
last_forced_gc_reclaimed_bytes: number | null;
|
|
171
|
+
heap_snapshots_written: number;
|
|
172
|
+
last_heap_snapshot_at_ms: number | null;
|
|
173
|
+
} {
|
|
174
|
+
return {
|
|
175
|
+
forced_gc_count: this.forcedGcCount,
|
|
176
|
+
forced_gc_reclaimed_bytes_total: this.forcedGcReclaimedBytesTotal,
|
|
177
|
+
last_forced_gc_at_ms: this.lastForcedGcAtMs > 0 ? this.lastForcedGcAtMs : null,
|
|
178
|
+
last_forced_gc_before_bytes: this.lastForcedGcAtMs > 0 ? this.lastForcedGcBeforeBytes : null,
|
|
179
|
+
last_forced_gc_after_bytes: this.lastForcedGcAtMs > 0 ? this.lastForcedGcAfterBytes : null,
|
|
180
|
+
last_forced_gc_reclaimed_bytes: this.lastForcedGcAtMs > 0 ? this.lastForcedGcReclaimedBytes : null,
|
|
181
|
+
heap_snapshots_written: this.heapSnapshotsWritten,
|
|
182
|
+
last_heap_snapshot_at_ms: this.lastSnapshotMs > 0 ? this.lastSnapshotMs : null,
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
|
|
94
186
|
maybeGc(reason: string): void {
|
|
95
187
|
const gcFn = (globalThis as any)?.Bun?.gc;
|
|
96
188
|
if (typeof gcFn !== "function") return;
|
|
@@ -108,6 +200,13 @@ export class MemoryGuard {
|
|
|
108
200
|
}
|
|
109
201
|
}
|
|
110
202
|
const after = process.memoryUsage().rss;
|
|
203
|
+
const reclaimed = Math.max(0, before - after);
|
|
204
|
+
this.forcedGcCount += 1;
|
|
205
|
+
this.forcedGcReclaimedBytesTotal += reclaimed;
|
|
206
|
+
this.lastForcedGcAtMs = now;
|
|
207
|
+
this.lastForcedGcBeforeBytes = before;
|
|
208
|
+
this.lastForcedGcAfterBytes = after;
|
|
209
|
+
this.lastForcedGcReclaimedBytes = reclaimed;
|
|
111
210
|
// eslint-disable-next-line no-console
|
|
112
211
|
console.warn(`[gc] forced GC (${reason}) rss ${formatBytes(before)} -> ${formatBytes(after)}`);
|
|
113
212
|
}
|
|
@@ -133,6 +232,7 @@ export class MemoryGuard {
|
|
|
133
232
|
const before = process.memoryUsage().rss;
|
|
134
233
|
v8.writeHeapSnapshot(this.heapSnapshotPath);
|
|
135
234
|
const after = process.memoryUsage().rss;
|
|
235
|
+
this.heapSnapshotsWritten += 1;
|
|
136
236
|
// eslint-disable-next-line no-console
|
|
137
237
|
console.warn(`[heap] snapshot (${reason}) rss ${formatBytes(before)} -> ${formatBytes(after)} path=${this.heapSnapshotPath}`);
|
|
138
238
|
} catch (err) {
|
|
@@ -142,6 +242,81 @@ export class MemoryGuard {
|
|
|
142
242
|
}
|
|
143
243
|
}
|
|
144
244
|
|
|
245
|
+
export function parseDarwinTopMemBytes(output: string, pid: number): number | null {
|
|
246
|
+
const line = output
|
|
247
|
+
.split(/\r?\n/)
|
|
248
|
+
.map((entry) => entry.trim())
|
|
249
|
+
.find((entry) => new RegExp(`^${pid}\\s+`).test(entry));
|
|
250
|
+
if (!line) return null;
|
|
251
|
+
const match = line.match(new RegExp(`^${pid}\\s+([0-9]+(?:\\.[0-9]+)?)([BKMGTP])\\+?\\b`, "i"));
|
|
252
|
+
if (!match) return null;
|
|
253
|
+
const value = Number(match[1]);
|
|
254
|
+
if (!Number.isFinite(value)) return null;
|
|
255
|
+
const unit = match[2]!.toUpperCase();
|
|
256
|
+
const power =
|
|
257
|
+
unit === "B"
|
|
258
|
+
? 0
|
|
259
|
+
: unit === "K"
|
|
260
|
+
? 1
|
|
261
|
+
: unit === "M"
|
|
262
|
+
? 2
|
|
263
|
+
: unit === "G"
|
|
264
|
+
? 3
|
|
265
|
+
: unit === "T"
|
|
266
|
+
? 4
|
|
267
|
+
: unit === "P"
|
|
268
|
+
? 5
|
|
269
|
+
: -1;
|
|
270
|
+
if (power < 0) return null;
|
|
271
|
+
return Math.round(value * 1024 ** power);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
export function darwinTopMemArgs(pid: number): string[] {
|
|
275
|
+
return ["-l", "1", "-pid", String(pid), "-stats", "pid,mem"];
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
export function parseLinuxMemAvailableBytes(meminfo: string): number | null {
|
|
279
|
+
for (const line of meminfo.split(/\r?\n/)) {
|
|
280
|
+
const match = line.match(/^MemAvailable:\s+([0-9]+)\s+kB$/i);
|
|
281
|
+
if (!match) continue;
|
|
282
|
+
const kb = Number(match[1]);
|
|
283
|
+
if (!Number.isFinite(kb) || kb < 0) return null;
|
|
284
|
+
return kb * 1024;
|
|
285
|
+
}
|
|
286
|
+
return null;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
function readLinuxMemAvailableBytes(): number | null {
|
|
290
|
+
try {
|
|
291
|
+
const meminfo = readFileSync("/proc/meminfo", "utf8");
|
|
292
|
+
return parseLinuxMemAvailableBytes(meminfo);
|
|
293
|
+
} catch {
|
|
294
|
+
return null;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
function readHostAvailableMemoryBytes(): number {
|
|
299
|
+
if (process.platform === "linux") {
|
|
300
|
+
const available = readLinuxMemAvailableBytes();
|
|
301
|
+
if (available != null) return available;
|
|
302
|
+
}
|
|
303
|
+
return os.freemem();
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
function readDarwinTopMemBytes(pid: number): number | null {
|
|
307
|
+
try {
|
|
308
|
+
const output = execFileSync("/usr/bin/top", darwinTopMemArgs(pid), {
|
|
309
|
+
encoding: "utf8",
|
|
310
|
+
stdio: ["ignore", "pipe", "ignore"],
|
|
311
|
+
timeout: 5_000,
|
|
312
|
+
maxBuffer: 256 * 1024,
|
|
313
|
+
});
|
|
314
|
+
return parseDarwinTopMemBytes(output, pid);
|
|
315
|
+
} catch {
|
|
316
|
+
return null;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
145
320
|
export function formatBytes(bytes: number): string {
|
|
146
321
|
const units = ["b", "kb", "mb", "gb"];
|
|
147
322
|
let value = bytes;
|