@tungthedev/streams-server 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CODE_OF_CONDUCT.md +45 -0
- package/CONTRIBUTING.md +76 -0
- package/LICENSE +201 -0
- package/README.md +58 -0
- package/SECURITY.md +42 -0
- package/bin/prisma-streams-server +2 -0
- package/package.json +46 -0
- package/src/app.ts +583 -0
- package/src/app_core.ts +3144 -0
- package/src/app_local.ts +206 -0
- package/src/auth.ts +124 -0
- package/src/auto_tune.ts +69 -0
- package/src/backpressure.ts +66 -0
- package/src/bootstrap.ts +613 -0
- package/src/compute/demo_entry.ts +415 -0
- package/src/compute/demo_site.ts +1242 -0
- package/src/compute/entry.ts +19 -0
- package/src/compute/package_entry.ts +4 -0
- package/src/compute/virtual-modules.d.ts +15 -0
- package/src/compute/worker_module_url.ts +9 -0
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +402 -0
- package/src/db/bootstrap_store.ts +9 -0
- package/src/db/db.ts +2424 -0
- package/src/db/schema.ts +925 -0
- package/src/db/sqlite_manifest_snapshot.ts +81 -0
- package/src/db/sqlite_touch_store.ts +491 -0
- package/src/db/sqlite_wal_store.ts +472 -0
- package/src/details/full_mode_details.ts +568 -0
- package/src/expiry_sweeper.ts +47 -0
- package/src/foreground_activity.ts +55 -0
- package/src/hist.ts +169 -0
- package/src/index/binary_fuse.ts +379 -0
- package/src/index/indexer.ts +947 -0
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +863 -0
- package/src/index/run_cache.ts +84 -0
- package/src/index/run_format.ts +213 -0
- package/src/index/schedule.ts +28 -0
- package/src/index/secondary_indexer.ts +901 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +309 -0
- package/src/lens/lens.ts +501 -0
- package/src/manifest.ts +249 -0
- package/src/memory.ts +334 -0
- package/src/metrics.ts +147 -0
- package/src/metrics_emitter.ts +83 -0
- package/src/notifier.ts +180 -0
- package/src/objectstore/accounting.ts +151 -0
- package/src/objectstore/interface.ts +13 -0
- package/src/objectstore/mock_r2.ts +269 -0
- package/src/objectstore/null.ts +32 -0
- package/src/objectstore/r2.ts +318 -0
- package/src/observe/pairing.ts +61 -0
- package/src/observe/request.ts +772 -0
- package/src/offset.ts +70 -0
- package/src/postgres/bootstrap.ts +269 -0
- package/src/postgres/companions.ts +197 -0
- package/src/postgres/control_restore.ts +109 -0
- package/src/postgres/details.ts +189 -0
- package/src/postgres/lexicon_index.ts +260 -0
- package/src/postgres/routing_index.ts +189 -0
- package/src/postgres/rows.ts +132 -0
- package/src/postgres/schema.ts +355 -0
- package/src/postgres/secondary_index.ts +238 -0
- package/src/postgres/segments.ts +900 -0
- package/src/postgres/stats.ts +103 -0
- package/src/postgres/store.ts +947 -0
- package/src/postgres/touch.ts +591 -0
- package/src/postgres/types.ts +32 -0
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +473 -0
- package/src/profiles/generic.ts +51 -0
- package/src/profiles/index.ts +237 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +83 -0
- package/src/profiles/otelTraces/normalize.ts +955 -0
- package/src/profiles/otelTraces/otlp.ts +1002 -0
- package/src/profiles/otelTraces/schema.ts +408 -0
- package/src/profiles/otelTraces.ts +390 -0
- package/src/profiles/profile.ts +284 -0
- package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
- package/src/profiles/stateProtocol/changes.ts +24 -0
- package/src/profiles/stateProtocol/ingest.ts +115 -0
- package/src/profiles/stateProtocol/routes.ts +511 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +107 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2986 -0
- package/src/runtime/hash.ts +156 -0
- package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
- package/src/runtime/hash_vendor/NOTICE.md +8 -0
- package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
- package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +237 -0
- package/src/schema/lens_schema.ts +290 -0
- package/src/schema/proof.ts +547 -0
- package/src/schema/read_json.ts +51 -0
- package/src/schema/registry.ts +966 -0
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +409 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +327 -0
- package/src/search/companion_manager.ts +1305 -0
- package/src/search/companion_plan.ts +229 -0
- package/src/search/exact_format.ts +281 -0
- package/src/search/exact_runtime.ts +55 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +270 -0
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +403 -0
- package/src/segment/segmenter.ts +412 -0
- package/src/segment/segmenter_worker.ts +72 -0
- package/src/segment/segmenter_workers.ts +130 -0
- package/src/server.ts +264 -0
- package/src/server_auto_tune.ts +158 -0
- package/src/sqlite/adapter.ts +335 -0
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +205 -0
- package/src/store/append.ts +50 -0
- package/src/store/bootstrap_restore_store.ts +71 -0
- package/src/store/capabilities.ts +86 -0
- package/src/store/full_mode_details_store.ts +71 -0
- package/src/store/index_store.ts +104 -0
- package/src/store/profile_touch_store.ts +1 -0
- package/src/store/rows.ts +144 -0
- package/src/store/schema_profile_store.ts +73 -0
- package/src/store/schema_publication.ts +6 -0
- package/src/store/segment_manifest_store.ts +129 -0
- package/src/store/segment_read_store.ts +22 -0
- package/src/store/stats_accounting_store.ts +83 -0
- package/src/store/touch_store.ts +98 -0
- package/src/store/wal_store.ts +21 -0
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_keys.ts +158 -0
- package/src/touch/live_metrics.ts +841 -0
- package/src/touch/live_templates.ts +449 -0
- package/src/touch/manager.ts +1292 -0
- package/src/touch/process_batch.ts +576 -0
- package/src/touch/processor_worker.ts +85 -0
- package/src/touch/spec.ts +459 -0
- package/src/touch/touch_journal.ts +771 -0
- package/src/touch/touch_key_id.ts +20 -0
- package/src/touch/worker_pool.ts +191 -0
- package/src/touch/worker_protocol.ts +57 -0
- package/src/types/proper-lockfile.d.ts +1 -0
- package/src/uploader.ts +358 -0
- package/src/util/base32_crockford.ts +81 -0
- package/src/util/bloom256.ts +67 -0
- package/src/util/byte_lru.ts +73 -0
- package/src/util/cleanup.ts +22 -0
- package/src/util/crc32c.ts +29 -0
- package/src/util/ds_error.ts +15 -0
- package/src/util/duration.ts +17 -0
- package/src/util/endian.ts +53 -0
- package/src/util/json_pointer.ts +148 -0
- package/src/util/log.ts +25 -0
- package/src/util/lru.ts +53 -0
- package/src/util/retry.ts +35 -0
- package/src/util/siphash.ts +71 -0
- package/src/util/stream_paths.ts +50 -0
- package/src/util/time.ts +14 -0
- package/src/util/yield.ts +3 -0
- package/src/util/zstd.ts +24 -0
|
@@ -0,0 +1,900 @@
|
|
|
1
|
+
import { createHash, randomUUID } from "node:crypto";
|
|
2
|
+
import type { Pool, PoolClient } from "pg";
|
|
3
|
+
import type { SegmentReadStore, SegmentReadRow, StreamReadRow, SearchCompanionPlanReadRow, SearchSegmentCompanionReadRow } from "../store/segment_read_store";
|
|
4
|
+
import type {
|
|
5
|
+
SegmentClaim,
|
|
6
|
+
ManifestPublicationSnapshot,
|
|
7
|
+
ManifestRow,
|
|
8
|
+
SegmentCandidateRow,
|
|
9
|
+
SegmentMetaRow,
|
|
10
|
+
SegmentRow,
|
|
11
|
+
SegmentStore,
|
|
12
|
+
SealedSegmentCommit,
|
|
13
|
+
ManifestStore,
|
|
14
|
+
} from "../store/segment_manifest_store";
|
|
15
|
+
import type { WalReadRow } from "../store/wal_store";
|
|
16
|
+
import { STREAM_FLAG_DELETED, STREAM_FLAG_TOUCH } from "../store/rows";
|
|
17
|
+
import { readU64LE } from "../util/endian";
|
|
18
|
+
import { dsError } from "../util/ds_error";
|
|
19
|
+
import type { PgExecutor, PgStreamRow } from "./types";
|
|
20
|
+
import { loadPostgresRoutingIndexManifest } from "./routing_index";
|
|
21
|
+
import { loadPostgresSecondaryIndexManifest } from "./secondary_index";
|
|
22
|
+
import { loadPostgresLexiconIndexManifest } from "./lexicon_index";
|
|
23
|
+
import {
|
|
24
|
+
getPostgresSearchCompanionPlan,
|
|
25
|
+
getPostgresSearchSegmentCompanion,
|
|
26
|
+
listPostgresSearchSegmentCompanions,
|
|
27
|
+
loadPostgresSearchCompanionManifest,
|
|
28
|
+
} from "./companions";
|
|
29
|
+
|
|
30
|
+
const WAL_GC_CHUNK_OFFSETS = 100_000n;
|
|
31
|
+
const SEGMENT_CLAIM_LEASE_MS = 5 * 60 * 1000;
|
|
32
|
+
|
|
33
|
+
type ManifestLease = {
|
|
34
|
+
client: PoolClient;
|
|
35
|
+
stream: string;
|
|
36
|
+
lockKey: bigint;
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
export class PostgresSegmentManifestStore implements SegmentReadStore, SegmentStore, ManifestStore {
|
|
40
|
+
private readonly manifestLeases = new Map<string, ManifestLease>();
|
|
41
|
+
|
|
42
|
+
constructor(
|
|
43
|
+
private readonly pool: Pool,
|
|
44
|
+
private readonly currentTimeMs: () => bigint,
|
|
45
|
+
private readonly readWal: (stream: string, startOffset: bigint, endOffset: bigint, routingKey?: Uint8Array) => AsyncIterable<WalReadRow>
|
|
46
|
+
) {}
|
|
47
|
+
|
|
48
|
+
nowMs(): bigint {
|
|
49
|
+
return this.currentTimeMs();
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
nowMsForRead(): Promise<bigint> {
|
|
53
|
+
return Promise.resolve(this.currentTimeMs());
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
async getSegmentStreamState(stream: string): Promise<StreamReadRow | null> {
|
|
57
|
+
return this.getStream(stream);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
isDeleted(row: StreamReadRow): boolean {
|
|
61
|
+
return (row.stream_flags & STREAM_FLAG_DELETED) !== 0;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
readWalRange(stream: string, startOffset: bigint, endOffset: bigint, routingKey?: Uint8Array): AsyncIterable<WalReadRow> {
|
|
65
|
+
return this.readWal(stream, startOffset, endOffset, routingKey);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async getStreamForRead(stream: string): Promise<StreamReadRow | null> {
|
|
69
|
+
return this.getStream(stream);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
async getStream(stream: string): Promise<StreamReadRow | null> {
|
|
73
|
+
return this.getStreamWithExecutor(this.pool, stream);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async listSegmentsForRead(stream: string): Promise<SegmentReadRow[]> {
|
|
77
|
+
const res = await this.pool.query(segmentSelectSql(`WHERE stream = $1 ORDER BY segment_index ASC`), [stream]);
|
|
78
|
+
return res.rows.map(coerceSegmentRow);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
async getSegmentByIndexForRead(stream: string, segmentIndex: number): Promise<SegmentReadRow | null> {
|
|
82
|
+
return this.getSegmentByIndex(stream, segmentIndex);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
async findSegmentForOffsetForRead(stream: string, offset: bigint): Promise<SegmentReadRow | null> {
|
|
86
|
+
const res = await this.pool.query(
|
|
87
|
+
segmentSelectSql(`WHERE stream = $1 AND start_offset <= $2 AND end_offset >= $2 ORDER BY segment_index DESC LIMIT 1`),
|
|
88
|
+
[stream, pgInt(offset)]
|
|
89
|
+
);
|
|
90
|
+
return res.rows[0] ? coerceSegmentRow(res.rows[0]) : null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
async countSegmentsForRead(stream: string): Promise<number> {
|
|
94
|
+
return this.countSegmentsWithExecutor(this.pool, stream);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
getSearchCompanionPlanForRead(_stream: string): Promise<SearchCompanionPlanReadRow | null> {
|
|
98
|
+
return getPostgresSearchCompanionPlan(this.pool, _stream);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
listSearchSegmentCompanionsForRead(_stream: string): Promise<SearchSegmentCompanionReadRow[]> {
|
|
102
|
+
return listPostgresSearchSegmentCompanions(this.pool, _stream);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
getSearchSegmentCompanionForRead(_stream: string, _segmentIndex: number): Promise<SearchSegmentCompanionReadRow | null> {
|
|
106
|
+
return getPostgresSearchSegmentCompanion(this.pool, _stream, _segmentIndex);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async candidates(minPendingBytes: bigint, minPendingRows: bigint, maxIntervalMs: bigint, limit: number): Promise<SegmentCandidateRow[]> {
|
|
110
|
+
const now = this.currentTimeMs();
|
|
111
|
+
const includeInterval = maxIntervalMs > 0n;
|
|
112
|
+
const sql = includeInterval
|
|
113
|
+
? `SELECT stream, pending_bytes, pending_rows, last_segment_cut_ms, sealed_through, next_offset, epoch
|
|
114
|
+
FROM streams
|
|
115
|
+
WHERE (stream_flags & $1) = 0
|
|
116
|
+
AND (segment_in_progress = 0 OR segment_claimed_at_ms IS NULL OR segment_claimed_at_ms < $6)
|
|
117
|
+
AND (pending_bytes >= $2 OR pending_rows >= $3 OR ($4 - last_segment_cut_ms) >= $5)
|
|
118
|
+
ORDER BY pending_bytes DESC
|
|
119
|
+
LIMIT $7;`
|
|
120
|
+
: `SELECT stream, pending_bytes, pending_rows, last_segment_cut_ms, sealed_through, next_offset, epoch
|
|
121
|
+
FROM streams
|
|
122
|
+
WHERE (stream_flags & $1) = 0
|
|
123
|
+
AND (segment_in_progress = 0 OR segment_claimed_at_ms IS NULL OR segment_claimed_at_ms < $4)
|
|
124
|
+
AND (pending_bytes >= $2 OR pending_rows >= $3)
|
|
125
|
+
ORDER BY pending_bytes DESC
|
|
126
|
+
LIMIT $5;`;
|
|
127
|
+
const excludedFlags = STREAM_FLAG_DELETED | STREAM_FLAG_TOUCH;
|
|
128
|
+
const params = includeInterval
|
|
129
|
+
? [excludedFlags, pgInt(minPendingBytes), pgInt(minPendingRows), pgInt(now), pgInt(maxIntervalMs), pgInt(now - BigInt(SEGMENT_CLAIM_LEASE_MS)), limit]
|
|
130
|
+
: [excludedFlags, pgInt(minPendingBytes), pgInt(minPendingRows), pgInt(now - BigInt(SEGMENT_CLAIM_LEASE_MS)), limit];
|
|
131
|
+
const res = await this.pool.query(sql, params);
|
|
132
|
+
return res.rows.map((row) => ({
|
|
133
|
+
stream: String(row.stream),
|
|
134
|
+
pending_bytes: toBigInt(row.pending_bytes),
|
|
135
|
+
pending_rows: toBigInt(row.pending_rows),
|
|
136
|
+
last_segment_cut_ms: toBigInt(row.last_segment_cut_ms),
|
|
137
|
+
sealed_through: toBigInt(row.sealed_through),
|
|
138
|
+
next_offset: toBigInt(row.next_offset),
|
|
139
|
+
epoch: Number(row.epoch),
|
|
140
|
+
}));
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
async recentSegmentCompressionRatio(stream: string, limit = 8): Promise<number | null> {
|
|
144
|
+
const res = await this.pool.query<{ payload_total: string | null; size_total: string | null; count: string }>(
|
|
145
|
+
`SELECT
|
|
146
|
+
COALESCE(SUM(payload_bytes), 0) AS payload_total,
|
|
147
|
+
COALESCE(SUM(size_bytes), 0) AS size_total,
|
|
148
|
+
COUNT(*) AS count
|
|
149
|
+
FROM (
|
|
150
|
+
SELECT payload_bytes, size_bytes
|
|
151
|
+
FROM segments
|
|
152
|
+
WHERE stream = $1 AND payload_bytes > 0
|
|
153
|
+
ORDER BY segment_index DESC
|
|
154
|
+
LIMIT $2
|
|
155
|
+
) recent;`,
|
|
156
|
+
[stream, Math.max(1, limit)]
|
|
157
|
+
);
|
|
158
|
+
const count = Number(res.rows[0]?.count ?? 0);
|
|
159
|
+
if (!Number.isFinite(count) || count <= 0) return null;
|
|
160
|
+
const payloadTotal = toBigInt(res.rows[0]?.payload_total ?? 0);
|
|
161
|
+
const sizeTotal = toBigInt(res.rows[0]?.size_total ?? 0);
|
|
162
|
+
if (payloadTotal <= 0n || sizeTotal <= 0n) return null;
|
|
163
|
+
return Number(sizeTotal) / Number(payloadTotal);
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
async tryClaimSegment(stream: string): Promise<SegmentClaim | null> {
|
|
167
|
+
const token = randomUUID();
|
|
168
|
+
const now = this.currentTimeMs();
|
|
169
|
+
const staleBefore = now - BigInt(SEGMENT_CLAIM_LEASE_MS);
|
|
170
|
+
const res = await this.pool.query(
|
|
171
|
+
`UPDATE streams
|
|
172
|
+
SET segment_in_progress = 1,
|
|
173
|
+
segment_claim_token = $1,
|
|
174
|
+
segment_claimed_at_ms = $2,
|
|
175
|
+
updated_at_ms = $2
|
|
176
|
+
WHERE stream = $3
|
|
177
|
+
AND (stream_flags & $4) = 0
|
|
178
|
+
AND (segment_in_progress = 0 OR segment_claimed_at_ms IS NULL OR segment_claimed_at_ms < $5);`,
|
|
179
|
+
[token, pgInt(now), stream, STREAM_FLAG_DELETED, pgInt(staleBefore)]
|
|
180
|
+
);
|
|
181
|
+
return (res.rowCount ?? 0) > 0 ? { token } : null;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
async setSegmentInProgress(stream: string, inProgress: number, claim?: SegmentClaim): Promise<void> {
|
|
185
|
+
if (inProgress === 0 && claim?.token) {
|
|
186
|
+
await this.pool.query(
|
|
187
|
+
`UPDATE streams
|
|
188
|
+
SET segment_in_progress = 0,
|
|
189
|
+
segment_claim_token = NULL,
|
|
190
|
+
segment_claimed_at_ms = NULL,
|
|
191
|
+
updated_at_ms = $1
|
|
192
|
+
WHERE stream = $2 AND segment_claim_token = $3;`,
|
|
193
|
+
[pgInt(this.currentTimeMs()), stream, claim.token]
|
|
194
|
+
);
|
|
195
|
+
return;
|
|
196
|
+
}
|
|
197
|
+
await this.pool.query(
|
|
198
|
+
`UPDATE streams
|
|
199
|
+
SET segment_in_progress = $1,
|
|
200
|
+
segment_claim_token = NULL,
|
|
201
|
+
segment_claimed_at_ms = NULL,
|
|
202
|
+
updated_at_ms = $2
|
|
203
|
+
WHERE stream = $3;`,
|
|
204
|
+
[inProgress, pgInt(this.currentTimeMs()), stream]
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
async nextSegmentIndexForStream(stream: string): Promise<number> {
|
|
209
|
+
const res = await this.pool.query<{ next_idx: string | null }>(`SELECT COALESCE(MAX(segment_index) + 1, 0) AS next_idx FROM segments WHERE stream = $1;`, [stream]);
|
|
210
|
+
return Number(res.rows[0]?.next_idx ?? 0);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
async commitSealedSegment(row: SealedSegmentCommit): Promise<void> {
|
|
214
|
+
if (!row.claimToken) throw dsError("postgres segment commit requires a claim token", { code: "unsupported_capability" });
|
|
215
|
+
const client = await this.pool.connect();
|
|
216
|
+
try {
|
|
217
|
+
await client.query("BEGIN");
|
|
218
|
+
const claim = await client.query<{ sealed_through: string | number | bigint }>(
|
|
219
|
+
`SELECT sealed_through FROM streams
|
|
220
|
+
WHERE stream = $1
|
|
221
|
+
AND segment_in_progress = 1
|
|
222
|
+
AND segment_claim_token = $2
|
|
223
|
+
FOR UPDATE;`,
|
|
224
|
+
[row.stream, row.claimToken]
|
|
225
|
+
);
|
|
226
|
+
if (claim.rows.length === 0) throw dsError("postgres segment claim is no longer active", { code: "conflict" });
|
|
227
|
+
const currentSealedThrough = toBigInt(claim.rows[0]!.sealed_through);
|
|
228
|
+
if (currentSealedThrough !== row.startOffset - 1n) {
|
|
229
|
+
throw dsError("postgres segment commit start offset is stale", { code: "conflict" });
|
|
230
|
+
}
|
|
231
|
+
const nextIndex = await client.query<{ next_idx: string | number | null }>(
|
|
232
|
+
`SELECT COALESCE(MAX(segment_index) + 1, 0) AS next_idx FROM segments WHERE stream = $1;`,
|
|
233
|
+
[row.stream]
|
|
234
|
+
);
|
|
235
|
+
if (Number(nextIndex.rows[0]?.next_idx ?? 0) !== row.segmentIndex) {
|
|
236
|
+
throw dsError("postgres segment commit index is stale", { code: "conflict" });
|
|
237
|
+
}
|
|
238
|
+
await client.query(
|
|
239
|
+
`INSERT INTO segments(segment_id, stream, segment_index, start_offset, end_offset, block_count,
|
|
240
|
+
last_append_ms, payload_bytes, size_bytes, local_path, created_at_ms, uploaded_at_ms, r2_etag)
|
|
241
|
+
VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL);`,
|
|
242
|
+
[
|
|
243
|
+
row.segmentId,
|
|
244
|
+
row.stream,
|
|
245
|
+
row.segmentIndex,
|
|
246
|
+
pgInt(row.startOffset),
|
|
247
|
+
pgInt(row.endOffset),
|
|
248
|
+
row.blockCount,
|
|
249
|
+
pgInt(row.lastAppendMs),
|
|
250
|
+
pgInt(row.payloadBytes),
|
|
251
|
+
row.sizeBytes,
|
|
252
|
+
row.localPath,
|
|
253
|
+
pgInt(this.currentTimeMs()),
|
|
254
|
+
]
|
|
255
|
+
);
|
|
256
|
+
await this.appendSegmentMeta(client, row.stream, row.endOffset + 1n, row.blockCount, row.lastAppendMs * 1_000_000n);
|
|
257
|
+
await client.query(
|
|
258
|
+
`UPDATE streams
|
|
259
|
+
SET sealed_through = $1,
|
|
260
|
+
pending_bytes = GREATEST(pending_bytes - $2, 0),
|
|
261
|
+
pending_rows = GREATEST(pending_rows - $3, 0),
|
|
262
|
+
segment_in_progress = 0,
|
|
263
|
+
segment_claim_token = NULL,
|
|
264
|
+
segment_claimed_at_ms = NULL,
|
|
265
|
+
last_segment_cut_ms = $4,
|
|
266
|
+
updated_at_ms = $4
|
|
267
|
+
WHERE stream = $5 AND segment_claim_token = $6;`,
|
|
268
|
+
[pgInt(row.endOffset), pgInt(row.payloadBytes), pgInt(row.rowsSealed), pgInt(this.currentTimeMs()), row.stream, row.claimToken]
|
|
269
|
+
);
|
|
270
|
+
await client.query("COMMIT");
|
|
271
|
+
} catch (error) {
|
|
272
|
+
await client.query("ROLLBACK").catch(() => {});
|
|
273
|
+
throw error;
|
|
274
|
+
} finally {
|
|
275
|
+
client.release();
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
async countPendingSegments(): Promise<number> {
|
|
280
|
+
const res = await this.pool.query<{ count: string }>(`SELECT COUNT(*) AS count FROM segments WHERE uploaded_at_ms IS NULL;`);
|
|
281
|
+
return Number(res.rows[0]?.count ?? 0);
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
async pendingUploadHeads(limit: number): Promise<SegmentRow[]> {
|
|
285
|
+
const res = await this.pool.query(
|
|
286
|
+
segmentSelectSql(
|
|
287
|
+
`WHERE uploaded_at_ms IS NULL
|
|
288
|
+
AND segment_index = (
|
|
289
|
+
SELECT MIN(s2.segment_index)
|
|
290
|
+
FROM segments s2
|
|
291
|
+
WHERE s2.stream = segments.stream AND s2.uploaded_at_ms IS NULL
|
|
292
|
+
)
|
|
293
|
+
ORDER BY created_at_ms ASC, stream ASC
|
|
294
|
+
LIMIT $1`
|
|
295
|
+
),
|
|
296
|
+
[limit]
|
|
297
|
+
);
|
|
298
|
+
return res.rows.map(coerceSegmentRow);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
async markSegmentUploaded(segmentId: string, etag: string, uploadedAtMs: bigint): Promise<void> {
|
|
302
|
+
await this.pool.query(`UPDATE segments SET r2_etag = $1, uploaded_at_ms = $2 WHERE segment_id = $3;`, [
|
|
303
|
+
etag,
|
|
304
|
+
pgInt(uploadedAtMs),
|
|
305
|
+
segmentId,
|
|
306
|
+
]);
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
async loadManifestPublicationSnapshot(stream: string, opts: { wait?: boolean } = {}): Promise<ManifestPublicationSnapshot | null> {
|
|
310
|
+
const maxAttempts = opts.wait ? 5 : 3;
|
|
311
|
+
for (let attempt = 1; ; attempt += 1) {
|
|
312
|
+
try {
|
|
313
|
+
return await this.loadManifestPublicationSnapshotOnce(stream, opts);
|
|
314
|
+
} catch (error) {
|
|
315
|
+
if (attempt >= maxAttempts || !isRetryablePostgresPublicationError(error)) throw error;
|
|
316
|
+
await sleep(Math.min(100, 10 * 2 ** (attempt - 1)));
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
private async loadManifestPublicationSnapshotOnce(stream: string, opts: { wait?: boolean } = {}): Promise<ManifestPublicationSnapshot | null> {
|
|
322
|
+
const publication = await this.acquireManifestPublication(stream, { wait: opts.wait });
|
|
323
|
+
if (!publication) return null;
|
|
324
|
+
let keepLease = false;
|
|
325
|
+
try {
|
|
326
|
+
await publication.client.query("BEGIN ISOLATION LEVEL REPEATABLE READ");
|
|
327
|
+
const streamRow = await this.getStreamWithExecutor(publication.client, stream);
|
|
328
|
+
if (!streamRow) return null;
|
|
329
|
+
|
|
330
|
+
const prevUploadedSegmentCount = streamRow.uploaded_segment_count ?? 0;
|
|
331
|
+
let uploadedPrefixCount = await this.advanceUploadedSegmentCount(publication.client, stream, prevUploadedSegmentCount);
|
|
332
|
+
const segmentCount = await this.countSegmentsWithExecutor(publication.client, stream);
|
|
333
|
+
let segmentMeta = await this.getSegmentMeta(publication.client, stream);
|
|
334
|
+
const needsRebuild =
|
|
335
|
+
!segmentMeta ||
|
|
336
|
+
segmentMeta.segment_count !== segmentCount ||
|
|
337
|
+
segmentMeta.segment_offsets.byteLength !== segmentCount * 8 ||
|
|
338
|
+
segmentMeta.segment_blocks.byteLength !== segmentCount * 4 ||
|
|
339
|
+
segmentMeta.segment_last_ts.byteLength !== segmentCount * 8;
|
|
340
|
+
if (needsRebuild) segmentMeta = await this.rebuildSegmentMeta(publication.client, stream);
|
|
341
|
+
if (!segmentMeta) return null;
|
|
342
|
+
if (uploadedPrefixCount > segmentMeta.segment_count) {
|
|
343
|
+
uploadedPrefixCount = segmentMeta.segment_count;
|
|
344
|
+
await this.setUploadedSegmentCount(publication.client, stream, uploadedPrefixCount);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const uploadedThrough =
|
|
348
|
+
uploadedPrefixCount === 0 ? -1n : readU64LE(segmentMeta.segment_offsets, (uploadedPrefixCount - 1) * 8) - 1n;
|
|
349
|
+
const unpublishedWalBytes = await this.getWalBytesAfterOffset(publication.client, stream, uploadedThrough);
|
|
350
|
+
const publishedLogicalSizeBytes =
|
|
351
|
+
streamRow.logical_size_bytes > unpublishedWalBytes ? streamRow.logical_size_bytes - unpublishedWalBytes : 0n;
|
|
352
|
+
const manifestRow = await this.getManifestRow(publication.client, stream);
|
|
353
|
+
const profileJson = await this.getProfileJson(publication.client, stream);
|
|
354
|
+
const routingIndex = await loadPostgresRoutingIndexManifest(publication.client, stream);
|
|
355
|
+
const secondaryIndex = await loadPostgresSecondaryIndexManifest(publication.client, stream);
|
|
356
|
+
const lexiconIndex = await loadPostgresLexiconIndexManifest(publication.client, stream);
|
|
357
|
+
const searchCompanions = await loadPostgresSearchCompanionManifest(publication.client, stream);
|
|
358
|
+
await publication.client.query("COMMIT");
|
|
359
|
+
|
|
360
|
+
keepLease = true;
|
|
361
|
+
return {
|
|
362
|
+
publicationToken: publication.token,
|
|
363
|
+
streamRow,
|
|
364
|
+
prevUploadedSegmentCount,
|
|
365
|
+
uploadedPrefixCount,
|
|
366
|
+
uploadedThrough,
|
|
367
|
+
publishedLogicalSizeBytes,
|
|
368
|
+
generation: manifestRow.generation + 1,
|
|
369
|
+
segmentMeta,
|
|
370
|
+
profileJson,
|
|
371
|
+
indexState: routingIndex.indexState,
|
|
372
|
+
indexRuns: routingIndex.indexRuns,
|
|
373
|
+
retiredRuns: routingIndex.retiredRuns,
|
|
374
|
+
secondaryIndexStates: secondaryIndex.secondaryIndexStates,
|
|
375
|
+
secondaryIndexRuns: secondaryIndex.secondaryIndexRuns,
|
|
376
|
+
retiredSecondaryIndexRuns: secondaryIndex.retiredSecondaryIndexRuns,
|
|
377
|
+
lexiconIndexStates: lexiconIndex.lexiconIndexStates,
|
|
378
|
+
lexiconIndexRuns: lexiconIndex.lexiconIndexRuns,
|
|
379
|
+
retiredLexiconIndexRuns: lexiconIndex.retiredLexiconIndexRuns,
|
|
380
|
+
searchCompanionPlan: searchCompanions.searchCompanionPlan,
|
|
381
|
+
searchSegmentCompanions: searchCompanions.searchSegmentCompanions,
|
|
382
|
+
};
|
|
383
|
+
} finally {
|
|
384
|
+
if (!keepLease) {
|
|
385
|
+
await publication.client.query("ROLLBACK").catch(() => {});
|
|
386
|
+
await this.releaseManifestPublication(publication.token);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
async commitManifest(
|
|
392
|
+
stream: string,
|
|
393
|
+
generation: number,
|
|
394
|
+
etag: string,
|
|
395
|
+
uploadedAtMs: bigint,
|
|
396
|
+
uploadedThrough: bigint,
|
|
397
|
+
sizeBytes: number,
|
|
398
|
+
publicationToken?: string
|
|
399
|
+
): Promise<void> {
|
|
400
|
+
if (!publicationToken) throw dsError("postgres manifest commit requires a publication token", { code: "unsupported_capability" });
|
|
401
|
+
const lease = this.manifestLeases.get(publicationToken);
|
|
402
|
+
if (!lease || lease.stream !== stream) throw dsError("postgres manifest publication token is not active", { code: "conflict" });
|
|
403
|
+
const client = lease.client;
|
|
404
|
+
try {
|
|
405
|
+
await client.query("BEGIN");
|
|
406
|
+
const current = await client.query<{ generation: number | string | null; uploaded_through: string | number | bigint | null }>(
|
|
407
|
+
`SELECT m.generation, s.uploaded_through
|
|
408
|
+
FROM streams s
|
|
409
|
+
LEFT JOIN manifests m ON m.stream = s.stream
|
|
410
|
+
WHERE s.stream = $1
|
|
411
|
+
FOR UPDATE OF s;`,
|
|
412
|
+
[stream]
|
|
413
|
+
);
|
|
414
|
+
const currentRow = current.rows[0];
|
|
415
|
+
if (!currentRow) throw dsError(`stream not found: ${stream}`, { code: "not_found" });
|
|
416
|
+
const currentGeneration = currentRow.generation == null ? 0 : Number(currentRow.generation);
|
|
417
|
+
const currentUploadedThrough = currentRow.uploaded_through == null ? -1n : toBigInt(currentRow.uploaded_through);
|
|
418
|
+
if (generation <= currentGeneration || uploadedThrough < currentUploadedThrough) {
|
|
419
|
+
throw dsError("postgres manifest publication is stale", { code: "conflict" });
|
|
420
|
+
}
|
|
421
|
+
await client.query(
|
|
422
|
+
`INSERT INTO manifests(stream, generation, uploaded_generation, last_uploaded_at_ms, last_uploaded_etag, last_uploaded_size_bytes)
|
|
423
|
+
VALUES($1, $2, $2, $3, $4, $5)
|
|
424
|
+
ON CONFLICT(stream) DO UPDATE SET
|
|
425
|
+
generation = excluded.generation,
|
|
426
|
+
uploaded_generation = excluded.uploaded_generation,
|
|
427
|
+
last_uploaded_at_ms = excluded.last_uploaded_at_ms,
|
|
428
|
+
last_uploaded_etag = excluded.last_uploaded_etag,
|
|
429
|
+
last_uploaded_size_bytes = excluded.last_uploaded_size_bytes;`,
|
|
430
|
+
[stream, generation, pgInt(uploadedAtMs), etag, sizeBytes]
|
|
431
|
+
);
|
|
432
|
+
await client.query(`UPDATE streams SET uploaded_through = $1, updated_at_ms = $2 WHERE stream = $3;`, [
|
|
433
|
+
pgInt(uploadedThrough),
|
|
434
|
+
pgInt(this.currentTimeMs()),
|
|
435
|
+
stream,
|
|
436
|
+
]);
|
|
437
|
+
let gcThrough = uploadedThrough;
|
|
438
|
+
const touchState = await client.query<{ processed_through: string | number | bigint }>(
|
|
439
|
+
`SELECT processed_through FROM stream_touch_state WHERE stream = $1;`,
|
|
440
|
+
[stream]
|
|
441
|
+
);
|
|
442
|
+
const processedThrough = touchState.rows[0]?.processed_through;
|
|
443
|
+
if (processedThrough != null) {
|
|
444
|
+
const touchThrough = toBigInt(processedThrough);
|
|
445
|
+
gcThrough = touchThrough < gcThrough ? touchThrough : gcThrough;
|
|
446
|
+
}
|
|
447
|
+
if (gcThrough >= 0n) await this.deleteWalThrough(client, stream, gcThrough);
|
|
448
|
+
await client.query("COMMIT");
|
|
449
|
+
} catch (error) {
|
|
450
|
+
await client.query("ROLLBACK").catch(() => {});
|
|
451
|
+
throw error;
|
|
452
|
+
} finally {
|
|
453
|
+
await this.releaseManifestPublication(publicationToken);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
async releaseManifestPublication(publicationToken: string): Promise<void> {
|
|
458
|
+
const lease = this.manifestLeases.get(publicationToken);
|
|
459
|
+
if (!lease) return;
|
|
460
|
+
this.manifestLeases.delete(publicationToken);
|
|
461
|
+
try {
|
|
462
|
+
await lease.client.query(`SELECT pg_advisory_unlock($1::bigint);`, [pgInt(lease.lockKey)]);
|
|
463
|
+
} finally {
|
|
464
|
+
lease.client.release();
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
getSegmentForManifestCleanup(stream: string, segmentIndex: number): Promise<SegmentRow | null> {
|
|
469
|
+
return this.getSegmentByIndex(stream, segmentIndex);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
private async getStreamWithExecutor(executor: PgExecutor, stream: string): Promise<StreamReadRow | null> {
|
|
473
|
+
const res = await executor.query<PgStreamRow>(`SELECT * FROM streams WHERE stream = $1;`, [stream]);
|
|
474
|
+
return res.rows[0] ? coerceStreamRow(res.rows[0]) : null;
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
private async countSegmentsWithExecutor(executor: PgExecutor, stream: string): Promise<number> {
|
|
478
|
+
const res = await executor.query<{ count: string }>(`SELECT COUNT(*) AS count FROM segments WHERE stream = $1;`, [stream]);
|
|
479
|
+
return Number(res.rows[0]?.count ?? 0);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
private async acquireManifestPublication(stream: string, opts: { wait?: boolean } = {}): Promise<{ token: string; client: PoolClient } | null> {
|
|
483
|
+
const client = await this.pool.connect();
|
|
484
|
+
const token = randomUUID();
|
|
485
|
+
const lockKey = manifestLockKey(stream);
|
|
486
|
+
try {
|
|
487
|
+
const sql = opts.wait ? `SELECT pg_advisory_lock($1::bigint) AS locked;` : `SELECT pg_try_advisory_lock($1::bigint) AS locked;`;
|
|
488
|
+
const res = await client.query<{ locked: boolean | null }>(sql, [pgInt(lockKey)]);
|
|
489
|
+
if (!opts.wait && !res.rows[0]?.locked) {
|
|
490
|
+
client.release();
|
|
491
|
+
return null;
|
|
492
|
+
}
|
|
493
|
+
this.manifestLeases.set(token, { client, stream, lockKey });
|
|
494
|
+
return { token, client };
|
|
495
|
+
} catch (error) {
|
|
496
|
+
client.release();
|
|
497
|
+
throw error;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
async setSchemaUploadedSizeBytes(stream: string, sizeBytes: number): Promise<void> {
|
|
502
|
+
await this.pool.query(`UPDATE schemas SET uploaded_size_bytes = $1, updated_at_ms = $2 WHERE stream = $3;`, [
|
|
503
|
+
sizeBytes,
|
|
504
|
+
pgInt(this.currentTimeMs()),
|
|
505
|
+
stream,
|
|
506
|
+
]);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
private async getSegmentByIndex(stream: string, segmentIndex: number): Promise<SegmentRow | null> {
|
|
510
|
+
return this.getSegmentByIndexWithExecutor(this.pool, stream, segmentIndex);
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
private async getSegmentByIndexWithExecutor(executor: PgExecutor, stream: string, segmentIndex: number): Promise<SegmentRow | null> {
|
|
514
|
+
const res = await executor.query(segmentSelectSql(`WHERE stream = $1 AND segment_index = $2 LIMIT 1`), [stream, segmentIndex]);
|
|
515
|
+
return res.rows[0] ? coerceSegmentRow(res.rows[0]) : null;
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
private async appendSegmentMeta(executor: PgExecutor, stream: string, offsetPlusOne: bigint, blockCount: number, lastAppendNs: bigint): Promise<void> {
|
|
519
|
+
await executor.query(
|
|
520
|
+
`INSERT INTO stream_segment_meta(stream, segment_count, segment_offsets, segment_blocks, segment_last_ts)
|
|
521
|
+
VALUES($1, 0, ''::bytea, ''::bytea, ''::bytea)
|
|
522
|
+
ON CONFLICT(stream) DO NOTHING;`,
|
|
523
|
+
[stream]
|
|
524
|
+
);
|
|
525
|
+
await executor.query(
|
|
526
|
+
`UPDATE stream_segment_meta
|
|
527
|
+
SET segment_count = segment_count + 1,
|
|
528
|
+
segment_offsets = segment_offsets || $1::bytea,
|
|
529
|
+
segment_blocks = segment_blocks || $2::bytea,
|
|
530
|
+
segment_last_ts = segment_last_ts || $3::bytea
|
|
531
|
+
WHERE stream = $4;`,
|
|
532
|
+
[encodeU64Le(offsetPlusOne), encodeU32Le(blockCount), encodeU64Le(lastAppendNs), stream]
|
|
533
|
+
);
|
|
534
|
+
}
|
|
535
|
+
|
|
536
|
+
private async getSegmentMeta(executor: PgExecutor, stream: string): Promise<SegmentMetaRow | null> {
|
|
537
|
+
const res = await executor.query(
|
|
538
|
+
`SELECT stream, segment_count, segment_offsets, segment_blocks, segment_last_ts
|
|
539
|
+
FROM stream_segment_meta WHERE stream = $1 LIMIT 1;`,
|
|
540
|
+
[stream]
|
|
541
|
+
);
|
|
542
|
+
const row = res.rows[0];
|
|
543
|
+
return row ? coerceSegmentMetaRow(row) : null;
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
private async rebuildSegmentMeta(executor: PgExecutor, stream: string): Promise<SegmentMetaRow> {
|
|
547
|
+
const rows = await executor.query<{ end_offset: string; block_count: number; last_append_ms: string }>(
|
|
548
|
+
`SELECT end_offset, block_count, last_append_ms
|
|
549
|
+
FROM segments WHERE stream = $1 ORDER BY segment_index ASC;`,
|
|
550
|
+
[stream]
|
|
551
|
+
);
|
|
552
|
+
const count = rows.rows.length;
|
|
553
|
+
const offsets = new Uint8Array(count * 8);
|
|
554
|
+
const blocks = new Uint8Array(count * 4);
|
|
555
|
+
const lastTs = new Uint8Array(count * 8);
|
|
556
|
+
const dvOffsets = new DataView(offsets.buffer, offsets.byteOffset, offsets.byteLength);
|
|
557
|
+
const dvBlocks = new DataView(blocks.buffer, blocks.byteOffset, blocks.byteLength);
|
|
558
|
+
const dvLastTs = new DataView(lastTs.buffer, lastTs.byteOffset, lastTs.byteLength);
|
|
559
|
+
for (let i = 0; i < rows.rows.length; i++) {
|
|
560
|
+
const row = rows.rows[i]!;
|
|
561
|
+
dvOffsets.setBigUint64(i * 8, toBigInt(row.end_offset) + 1n, true);
|
|
562
|
+
dvBlocks.setUint32(i * 4, Number(row.block_count) >>> 0, true);
|
|
563
|
+
dvLastTs.setBigUint64(i * 8, toBigInt(row.last_append_ms) * 1_000_000n, true);
|
|
564
|
+
}
|
|
565
|
+
await executor.query(
|
|
566
|
+
`INSERT INTO stream_segment_meta(stream, segment_count, segment_offsets, segment_blocks, segment_last_ts)
|
|
567
|
+
VALUES($1, $2, $3, $4, $5)
|
|
568
|
+
ON CONFLICT(stream) DO UPDATE SET
|
|
569
|
+
segment_count = excluded.segment_count,
|
|
570
|
+
segment_offsets = excluded.segment_offsets,
|
|
571
|
+
segment_blocks = excluded.segment_blocks,
|
|
572
|
+
segment_last_ts = excluded.segment_last_ts;`,
|
|
573
|
+
[stream, count, Buffer.from(offsets), Buffer.from(blocks), Buffer.from(lastTs)]
|
|
574
|
+
);
|
|
575
|
+
return { stream, segment_count: count, segment_offsets: offsets, segment_blocks: blocks, segment_last_ts: lastTs };
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
private async setUploadedSegmentCount(executor: PgExecutor, stream: string, count: number): Promise<void> {
|
|
579
|
+
await executor.query(`UPDATE streams SET uploaded_segment_count = $1, updated_at_ms = $2 WHERE stream = $3;`, [
|
|
580
|
+
count,
|
|
581
|
+
pgInt(this.currentTimeMs()),
|
|
582
|
+
stream,
|
|
583
|
+
]);
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
private async advanceUploadedSegmentCount(executor: PgExecutor, stream: string, currentCount: number): Promise<number> {
|
|
587
|
+
let count = currentCount;
|
|
588
|
+
for (;;) {
|
|
589
|
+
const segment = await this.getSegmentByIndexWithExecutor(executor, stream, count);
|
|
590
|
+
if (!segment || !segment.r2_etag) break;
|
|
591
|
+
count += 1;
|
|
592
|
+
}
|
|
593
|
+
if (count !== currentCount) await this.setUploadedSegmentCount(executor, stream, count);
|
|
594
|
+
return count;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
private async getWalBytesAfterOffset(executor: PgExecutor, stream: string, offset: bigint): Promise<bigint> {
|
|
598
|
+
const res = await executor.query<{ bytes: string | null }>(
|
|
599
|
+
`SELECT COALESCE(SUM(payload_len), 0) AS bytes
|
|
600
|
+
FROM wal
|
|
601
|
+
WHERE stream = $1 AND "offset" > $2;`,
|
|
602
|
+
[stream, pgInt(offset)]
|
|
603
|
+
);
|
|
604
|
+
return toBigInt(res.rows[0]?.bytes ?? 0);
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
private async getManifestRow(executor: PgExecutor, stream: string): Promise<ManifestRow> {
|
|
608
|
+
const res = await executor.query(
|
|
609
|
+
`SELECT stream, generation, uploaded_generation, last_uploaded_at_ms, last_uploaded_etag, last_uploaded_size_bytes
|
|
610
|
+
FROM manifests WHERE stream = $1 LIMIT 1;`,
|
|
611
|
+
[stream]
|
|
612
|
+
);
|
|
613
|
+
if (res.rows[0]) return coerceManifestRow(res.rows[0]);
|
|
614
|
+
await executor.query(
|
|
615
|
+
`INSERT INTO manifests(stream, generation, uploaded_generation, last_uploaded_at_ms, last_uploaded_etag, last_uploaded_size_bytes)
|
|
616
|
+
VALUES($1, 0, 0, NULL, NULL, NULL)
|
|
617
|
+
ON CONFLICT(stream) DO NOTHING;`,
|
|
618
|
+
[stream]
|
|
619
|
+
);
|
|
620
|
+
return { stream, generation: 0, uploaded_generation: 0, last_uploaded_at_ms: null, last_uploaded_etag: null, last_uploaded_size_bytes: null };
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
private async getProfileJson(executor: PgExecutor, stream: string): Promise<Record<string, any> | null> {
|
|
624
|
+
const res = await executor.query<{ profile_json: string }>(`SELECT profile_json FROM stream_profiles WHERE stream = $1;`, [stream]);
|
|
625
|
+
const raw = res.rows[0]?.profile_json;
|
|
626
|
+
if (raw == null) return null;
|
|
627
|
+
try {
|
|
628
|
+
return JSON.parse(raw);
|
|
629
|
+
} catch {
|
|
630
|
+
throw dsError(`invalid profile_json for ${stream}`);
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
private async deleteWalThrough(client: PoolClient, stream: string, uploadedThrough: bigint): Promise<void> {
|
|
635
|
+
const upper = uploadedThrough + 1n;
|
|
636
|
+
for (;;) {
|
|
637
|
+
const res = await client.query<{ rows_deleted: string | number | bigint | null; bytes_deleted: string | number | bigint | null }>(
|
|
638
|
+
`WITH doomed AS (
|
|
639
|
+
SELECT stream, "offset", payload_len
|
|
640
|
+
FROM wal
|
|
641
|
+
WHERE stream = $1 AND "offset" < $2
|
|
642
|
+
ORDER BY "offset" ASC
|
|
643
|
+
LIMIT $3
|
|
644
|
+
),
|
|
645
|
+
deleted AS (
|
|
646
|
+
DELETE FROM wal
|
|
647
|
+
USING doomed
|
|
648
|
+
WHERE wal.stream = doomed.stream AND wal."offset" = doomed."offset"
|
|
649
|
+
RETURNING doomed.payload_len
|
|
650
|
+
)
|
|
651
|
+
SELECT COUNT(*) AS rows_deleted, COALESCE(SUM(payload_len), 0) AS bytes_deleted FROM deleted;`,
|
|
652
|
+
[stream, pgInt(upper), pgInt(WAL_GC_CHUNK_OFFSETS)]
|
|
653
|
+
);
|
|
654
|
+
const rowsDeleted = toBigInt(res.rows[0]?.rows_deleted ?? 0);
|
|
655
|
+
const bytesDeleted = toBigInt(res.rows[0]?.bytes_deleted ?? 0);
|
|
656
|
+
if (rowsDeleted <= 0n) break;
|
|
657
|
+
await client.query(
|
|
658
|
+
`UPDATE streams
|
|
659
|
+
SET wal_bytes = GREATEST(wal_bytes - $1, 0),
|
|
660
|
+
wal_rows = GREATEST(wal_rows - $2, 0),
|
|
661
|
+
updated_at_ms = $3
|
|
662
|
+
WHERE stream = $4;`,
|
|
663
|
+
[pgInt(bytesDeleted), pgInt(rowsDeleted), pgInt(this.currentTimeMs()), stream]
|
|
664
|
+
);
|
|
665
|
+
if (rowsDeleted < WAL_GC_CHUNK_OFFSETS) break;
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
export type PostgresSegmentRestoreRow = {
|
|
671
|
+
segmentId: string;
|
|
672
|
+
stream: string;
|
|
673
|
+
segmentIndex: number;
|
|
674
|
+
startOffset: bigint;
|
|
675
|
+
endOffset: bigint;
|
|
676
|
+
blockCount: number;
|
|
677
|
+
lastAppendMs: bigint;
|
|
678
|
+
payloadBytes: bigint;
|
|
679
|
+
sizeBytes: number;
|
|
680
|
+
localPath: string;
|
|
681
|
+
};
|
|
682
|
+
|
|
683
|
+
export async function restorePostgresSegmentRow(
|
|
684
|
+
executor: PgExecutor,
|
|
685
|
+
nowMs: bigint,
|
|
686
|
+
row: PostgresSegmentRestoreRow
|
|
687
|
+
): Promise<void> {
|
|
688
|
+
await executor.query(
|
|
689
|
+
`INSERT INTO segments(segment_id, stream, segment_index, start_offset, end_offset, block_count,
|
|
690
|
+
last_append_ms, payload_bytes, size_bytes, local_path, created_at_ms, uploaded_at_ms, r2_etag)
|
|
691
|
+
VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, NULL, NULL)
|
|
692
|
+
ON CONFLICT(stream, segment_index) DO UPDATE SET
|
|
693
|
+
segment_id = excluded.segment_id,
|
|
694
|
+
start_offset = excluded.start_offset,
|
|
695
|
+
end_offset = excluded.end_offset,
|
|
696
|
+
block_count = excluded.block_count,
|
|
697
|
+
last_append_ms = excluded.last_append_ms,
|
|
698
|
+
payload_bytes = excluded.payload_bytes,
|
|
699
|
+
size_bytes = excluded.size_bytes,
|
|
700
|
+
local_path = excluded.local_path;`,
|
|
701
|
+
[
|
|
702
|
+
row.segmentId,
|
|
703
|
+
row.stream,
|
|
704
|
+
row.segmentIndex,
|
|
705
|
+
pgInt(row.startOffset),
|
|
706
|
+
pgInt(row.endOffset),
|
|
707
|
+
row.blockCount,
|
|
708
|
+
pgInt(row.lastAppendMs),
|
|
709
|
+
pgInt(row.payloadBytes),
|
|
710
|
+
row.sizeBytes,
|
|
711
|
+
row.localPath,
|
|
712
|
+
pgInt(nowMs),
|
|
713
|
+
]
|
|
714
|
+
);
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
export async function restorePostgresSegmentMeta(
|
|
718
|
+
executor: PgExecutor,
|
|
719
|
+
stream: string,
|
|
720
|
+
count: number,
|
|
721
|
+
offsets: Uint8Array,
|
|
722
|
+
blocks: Uint8Array,
|
|
723
|
+
lastTs: Uint8Array
|
|
724
|
+
): Promise<void> {
|
|
725
|
+
await executor.query(
|
|
726
|
+
`INSERT INTO stream_segment_meta(stream, segment_count, segment_offsets, segment_blocks, segment_last_ts)
|
|
727
|
+
VALUES($1, $2, $3, $4, $5)
|
|
728
|
+
ON CONFLICT(stream) DO UPDATE SET
|
|
729
|
+
segment_count = excluded.segment_count,
|
|
730
|
+
segment_offsets = excluded.segment_offsets,
|
|
731
|
+
segment_blocks = excluded.segment_blocks,
|
|
732
|
+
segment_last_ts = excluded.segment_last_ts;`,
|
|
733
|
+
[stream, count, Buffer.from(offsets), Buffer.from(blocks), Buffer.from(lastTs)]
|
|
734
|
+
);
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
export async function restorePostgresManifestRow(
|
|
738
|
+
executor: PgExecutor,
|
|
739
|
+
stream: string,
|
|
740
|
+
generation: number,
|
|
741
|
+
uploadedGeneration: number,
|
|
742
|
+
uploadedAtMs: bigint | null,
|
|
743
|
+
etag: string | null,
|
|
744
|
+
sizeBytes: number | null
|
|
745
|
+
): Promise<void> {
|
|
746
|
+
await executor.query(
|
|
747
|
+
`INSERT INTO manifests(stream, generation, uploaded_generation, last_uploaded_at_ms, last_uploaded_etag, last_uploaded_size_bytes)
|
|
748
|
+
VALUES($1, $2, $3, $4, $5, $6)
|
|
749
|
+
ON CONFLICT(stream) DO UPDATE SET
|
|
750
|
+
generation = excluded.generation,
|
|
751
|
+
uploaded_generation = excluded.uploaded_generation,
|
|
752
|
+
last_uploaded_at_ms = excluded.last_uploaded_at_ms,
|
|
753
|
+
last_uploaded_etag = excluded.last_uploaded_etag,
|
|
754
|
+
last_uploaded_size_bytes = excluded.last_uploaded_size_bytes;`,
|
|
755
|
+
[stream, generation, uploadedGeneration, uploadedAtMs == null ? null : pgInt(uploadedAtMs), etag, sizeBytes]
|
|
756
|
+
);
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
export async function markPostgresSegmentUploaded(
|
|
760
|
+
executor: PgExecutor,
|
|
761
|
+
segmentId: string,
|
|
762
|
+
etag: string,
|
|
763
|
+
uploadedAtMs: bigint
|
|
764
|
+
): Promise<void> {
|
|
765
|
+
await executor.query(`UPDATE segments SET r2_etag = $1, uploaded_at_ms = $2 WHERE segment_id = $3;`, [
|
|
766
|
+
etag,
|
|
767
|
+
pgInt(uploadedAtMs),
|
|
768
|
+
segmentId,
|
|
769
|
+
]);
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
export async function setPostgresSchemaUploadedSizeBytes(
|
|
773
|
+
executor: PgExecutor,
|
|
774
|
+
nowMs: bigint,
|
|
775
|
+
stream: string,
|
|
776
|
+
sizeBytes: number
|
|
777
|
+
): Promise<void> {
|
|
778
|
+
await executor.query(`UPDATE schemas SET uploaded_size_bytes = $1, updated_at_ms = $2 WHERE stream = $3;`, [
|
|
779
|
+
sizeBytes,
|
|
780
|
+
pgInt(nowMs),
|
|
781
|
+
stream,
|
|
782
|
+
]);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
function segmentSelectSql(whereSql: string): string {
|
|
786
|
+
return `SELECT segment_id, stream, segment_index, start_offset, end_offset, block_count, last_append_ms,
|
|
787
|
+
payload_bytes, size_bytes, local_path, created_at_ms, uploaded_at_ms, r2_etag
|
|
788
|
+
FROM segments ${whereSql};`;
|
|
789
|
+
}
|
|
790
|
+
|
|
791
|
+
function pgInt(value: bigint): string {
|
|
792
|
+
return value.toString();
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
function toBigInt(value: unknown): bigint {
|
|
796
|
+
return typeof value === "bigint" ? value : BigInt(value as any);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
function toBytes(value: unknown): Uint8Array {
|
|
800
|
+
if (value instanceof Uint8Array) return new Uint8Array(value.buffer, value.byteOffset, value.byteLength);
|
|
801
|
+
return new Uint8Array(value as ArrayBuffer);
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
function encodeU64Le(value: bigint): Buffer {
|
|
805
|
+
const bytes = Buffer.alloc(8);
|
|
806
|
+
bytes.writeBigUInt64LE(value);
|
|
807
|
+
return bytes;
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
function encodeU32Le(value: number): Buffer {
|
|
811
|
+
const bytes = Buffer.alloc(4);
|
|
812
|
+
bytes.writeUInt32LE(value >>> 0);
|
|
813
|
+
return bytes;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
function manifestLockKey(stream: string): bigint {
|
|
817
|
+
return createHash("sha256").update(`manifest:${stream}`).digest().readBigInt64BE(0);
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
function isRetryablePostgresPublicationError(error: unknown): boolean {
|
|
821
|
+
const code = String((error as { code?: unknown })?.code ?? "");
|
|
822
|
+
return code === "40001" || code === "40P01" || code === "55P03";
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
function sleep(ms: number): Promise<void> {
|
|
826
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
827
|
+
}
|
|
828
|
+
|
|
829
|
+
function coerceStreamRow(row: PgStreamRow): StreamReadRow {
|
|
830
|
+
const walRows = toBigInt(row.wal_rows);
|
|
831
|
+
const walBytes = toBigInt(row.wal_bytes);
|
|
832
|
+
const lastAppendMs = toBigInt(row.last_append_ms);
|
|
833
|
+
return {
|
|
834
|
+
stream: String(row.stream),
|
|
835
|
+
created_at_ms: toBigInt(row.created_at_ms),
|
|
836
|
+
updated_at_ms: toBigInt(row.updated_at_ms),
|
|
837
|
+
content_type: String(row.content_type),
|
|
838
|
+
profile: row.profile == null ? null : String(row.profile),
|
|
839
|
+
stream_seq: row.stream_seq == null ? null : String(row.stream_seq),
|
|
840
|
+
closed: Number(row.closed),
|
|
841
|
+
closed_producer_id: row.closed_producer_id == null ? null : String(row.closed_producer_id),
|
|
842
|
+
closed_producer_epoch: row.closed_producer_epoch == null ? null : Number(row.closed_producer_epoch),
|
|
843
|
+
closed_producer_seq: row.closed_producer_seq == null ? null : Number(row.closed_producer_seq),
|
|
844
|
+
ttl_seconds: row.ttl_seconds == null ? null : Number(row.ttl_seconds),
|
|
845
|
+
epoch: Number(row.epoch),
|
|
846
|
+
next_offset: toBigInt(row.next_offset),
|
|
847
|
+
sealed_through: row.sealed_through == null ? -1n : toBigInt(row.sealed_through),
|
|
848
|
+
uploaded_through: row.uploaded_through == null ? -1n : toBigInt(row.uploaded_through),
|
|
849
|
+
uploaded_segment_count: row.uploaded_segment_count == null ? 0 : Number(row.uploaded_segment_count),
|
|
850
|
+
pending_rows: row.pending_rows == null ? walRows : toBigInt(row.pending_rows),
|
|
851
|
+
pending_bytes: row.pending_bytes == null ? walBytes : toBigInt(row.pending_bytes),
|
|
852
|
+
logical_size_bytes: toBigInt(row.logical_size_bytes),
|
|
853
|
+
wal_rows: walRows,
|
|
854
|
+
wal_bytes: walBytes,
|
|
855
|
+
last_append_ms: lastAppendMs,
|
|
856
|
+
last_segment_cut_ms: row.last_segment_cut_ms == null ? lastAppendMs : toBigInt(row.last_segment_cut_ms),
|
|
857
|
+
segment_in_progress: row.segment_in_progress == null ? 0 : Number(row.segment_in_progress),
|
|
858
|
+
expires_at_ms: row.expires_at_ms == null ? null : toBigInt(row.expires_at_ms),
|
|
859
|
+
stream_flags: Number(row.stream_flags),
|
|
860
|
+
};
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
function coerceSegmentRow(row: any): SegmentRow {
|
|
864
|
+
return {
|
|
865
|
+
segment_id: String(row.segment_id),
|
|
866
|
+
stream: String(row.stream),
|
|
867
|
+
segment_index: Number(row.segment_index),
|
|
868
|
+
start_offset: toBigInt(row.start_offset),
|
|
869
|
+
end_offset: toBigInt(row.end_offset),
|
|
870
|
+
block_count: Number(row.block_count),
|
|
871
|
+
last_append_ms: toBigInt(row.last_append_ms),
|
|
872
|
+
payload_bytes: toBigInt(row.payload_bytes),
|
|
873
|
+
size_bytes: Number(row.size_bytes),
|
|
874
|
+
local_path: String(row.local_path),
|
|
875
|
+
created_at_ms: toBigInt(row.created_at_ms),
|
|
876
|
+
uploaded_at_ms: row.uploaded_at_ms == null ? null : toBigInt(row.uploaded_at_ms),
|
|
877
|
+
r2_etag: row.r2_etag == null ? null : String(row.r2_etag),
|
|
878
|
+
};
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
function coerceSegmentMetaRow(row: any): SegmentMetaRow {
|
|
882
|
+
return {
|
|
883
|
+
stream: String(row.stream),
|
|
884
|
+
segment_count: Number(row.segment_count),
|
|
885
|
+
segment_offsets: toBytes(row.segment_offsets),
|
|
886
|
+
segment_blocks: toBytes(row.segment_blocks),
|
|
887
|
+
segment_last_ts: toBytes(row.segment_last_ts),
|
|
888
|
+
};
|
|
889
|
+
}
|
|
890
|
+
|
|
891
|
+
function coerceManifestRow(row: any): ManifestRow {
|
|
892
|
+
return {
|
|
893
|
+
stream: String(row.stream),
|
|
894
|
+
generation: Number(row.generation),
|
|
895
|
+
uploaded_generation: Number(row.uploaded_generation),
|
|
896
|
+
last_uploaded_at_ms: row.last_uploaded_at_ms == null ? null : toBigInt(row.last_uploaded_at_ms),
|
|
897
|
+
last_uploaded_etag: row.last_uploaded_etag == null ? null : String(row.last_uploaded_etag),
|
|
898
|
+
last_uploaded_size_bytes: row.last_uploaded_size_bytes == null ? null : toBigInt(row.last_uploaded_size_bytes),
|
|
899
|
+
};
|
|
900
|
+
}
|