@dotdo/postgres 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/backup/backup-manager.d.ts +244 -0
- package/dist/backup/backup-manager.d.ts.map +1 -0
- package/dist/backup/backup-manager.js +726 -0
- package/dist/backup/backup-manager.js.map +1 -0
- package/dist/observability/production-metrics.d.ts +318 -0
- package/dist/observability/production-metrics.d.ts.map +1 -0
- package/dist/observability/production-metrics.js +747 -0
- package/dist/observability/production-metrics.js.map +1 -0
- package/dist/pitr/pitr-manager.d.ts +240 -0
- package/dist/pitr/pitr-manager.d.ts.map +1 -0
- package/dist/pitr/pitr-manager.js +837 -0
- package/dist/pitr/pitr-manager.js.map +1 -0
- package/dist/streaming/cdc-iceberg-connector.d.ts +1 -1
- package/dist/streaming/cdc-iceberg-connector.js +1 -1
- package/dist/streaming/live-cdc-stream.d.ts +1 -1
- package/dist/streaming/live-cdc-stream.js +1 -1
- package/package.json +4 -4
- package/src/__tests__/backup.test.ts +944 -0
- package/src/__tests__/observability.test.ts +1089 -0
- package/src/__tests__/pitr.test.ts +1240 -0
- package/src/backup/backup-manager.ts +1006 -0
- package/src/observability/production-metrics.ts +1054 -0
- package/src/pitr/pitr-manager.ts +1136 -0
|
@@ -0,0 +1,1136 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Point-in-Time Recovery (PITR) Manager for PostgreSQL Durable Objects
|
|
3
|
+
*
|
|
4
|
+
* Provides WAL archiving to R2, recovery to timestamp/LSN/named restore points,
|
|
5
|
+
* timeline management, and WAL segment management.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// =============================================================================
|
|
9
|
+
// Constants
|
|
10
|
+
// =============================================================================
|
|
11
|
+
|
|
12
|
+
/** Default WAL segment size (16 MB) */
|
|
13
|
+
const DEFAULT_SEGMENT_SIZE_BYTES = 16 * 1024 * 1024
|
|
14
|
+
|
|
15
|
+
/** Default retention period for WAL segments in days */
|
|
16
|
+
const DEFAULT_RETENTION_DAYS = 7
|
|
17
|
+
|
|
18
|
+
/** Milliseconds in one day */
|
|
19
|
+
const MS_PER_DAY = 86_400_000
|
|
20
|
+
|
|
21
|
+
/** Maximum number of timelines to retain in history */
|
|
22
|
+
const DEFAULT_MAX_TIMELINE_HISTORY = 10
|
|
23
|
+
|
|
24
|
+
/** Maximum retry attempts for R2 uploads */
|
|
25
|
+
const MAX_UPLOAD_RETRIES = 2
|
|
26
|
+
|
|
27
|
+
/** Estimated WAL replay speed (bytes per millisecond) for recovery plan estimates */
|
|
28
|
+
const ESTIMATED_REPLAY_BYTES_PER_MS = 1024
|
|
29
|
+
|
|
30
|
+
/** Minimum estimated duration for recovery plans in milliseconds */
|
|
31
|
+
const MIN_RECOVERY_PLAN_DURATION_MS = 100
|
|
32
|
+
|
|
33
|
+
/** Threshold for considering a timestamp recovery target as "current" (5 seconds) */
|
|
34
|
+
const CURRENT_TIME_THRESHOLD_MS = 5000
|
|
35
|
+
|
|
36
|
+
// =============================================================================
|
|
37
|
+
// Types
|
|
38
|
+
// =============================================================================
|
|
39
|
+
|
|
40
|
+
/** Configuration for the PITR Manager */
|
|
41
|
+
export interface PITRConfig {
|
|
42
|
+
/** R2 bucket for WAL segment storage */
|
|
43
|
+
bucket: R2Bucket
|
|
44
|
+
/** Durable Object identifier */
|
|
45
|
+
doId: string
|
|
46
|
+
/** Key prefix for all WAL objects in R2 */
|
|
47
|
+
prefix: string
|
|
48
|
+
/** WAL archiving configuration */
|
|
49
|
+
archiveConfig?: WALArchiveConfig
|
|
50
|
+
/** Number of days to retain WAL segments */
|
|
51
|
+
retentionDays?: number
|
|
52
|
+
/** Maximum number of timeline entries to keep in history */
|
|
53
|
+
maxTimelineHistory?: number
|
|
54
|
+
/** Whether to enable continuous WAL archiving */
|
|
55
|
+
enableContinuousArchiving?: boolean
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/** Configuration for WAL segment archiving behavior */
|
|
59
|
+
export interface WALArchiveConfig {
|
|
60
|
+
/** Maximum size of a single WAL segment in bytes */
|
|
61
|
+
segmentSizeBytes?: number
|
|
62
|
+
/** Interval between automatic WAL flushes in milliseconds */
|
|
63
|
+
flushIntervalMs?: number
|
|
64
|
+
/** Whether to compress WAL segments before upload */
|
|
65
|
+
compression?: boolean
|
|
66
|
+
/** Whether to validate checksums on read operations */
|
|
67
|
+
checksumValidation?: boolean
|
|
68
|
+
/** Maximum number of segments to hold in memory before flushing */
|
|
69
|
+
maxSegmentsInMemory?: number
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/** Represents a single WAL log entry for replay */
|
|
73
|
+
export interface WALEntry {
|
|
74
|
+
lsn: string
|
|
75
|
+
operation: 'INSERT' | 'UPDATE' | 'DELETE' | 'TRUNCATE'
|
|
76
|
+
schema: string
|
|
77
|
+
table: string
|
|
78
|
+
newRow?: Record<string, unknown>
|
|
79
|
+
oldRow?: Record<string, unknown>
|
|
80
|
+
timestamp: number
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** Specifies the target for a recovery operation */
|
|
84
|
+
export interface RecoveryTarget {
|
|
85
|
+
type: 'timestamp' | 'lsn' | 'named'
|
|
86
|
+
value: Date | string
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/** Result of a recovery operation */
|
|
90
|
+
export interface RecoveryResult {
|
|
91
|
+
success: boolean
|
|
92
|
+
recoveryTarget: RecoveryTarget
|
|
93
|
+
entriesApplied: number
|
|
94
|
+
durationMs?: number
|
|
95
|
+
error?: string
|
|
96
|
+
timelineId?: number
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Metadata about a WAL segment stored in R2 */
|
|
100
|
+
export interface WALSegmentInfo {
|
|
101
|
+
key: string
|
|
102
|
+
startLsn: string
|
|
103
|
+
endLsn: string
|
|
104
|
+
entryCount: number
|
|
105
|
+
sizeBytes: number
|
|
106
|
+
compressed: boolean
|
|
107
|
+
checksum: string
|
|
108
|
+
timestamp: number
|
|
109
|
+
timelineId: number
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
/** Represents a timeline branch in the recovery history */
|
|
113
|
+
export interface TimelineInfo {
|
|
114
|
+
id: number
|
|
115
|
+
startedAt: number
|
|
116
|
+
branchPoint?: {
|
|
117
|
+
parentTimelineId: number
|
|
118
|
+
lsn: string
|
|
119
|
+
timestamp: number
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/** A named point in the WAL stream for targeted recovery */
|
|
124
|
+
export interface RestorePoint {
|
|
125
|
+
name: string
|
|
126
|
+
lsn: string
|
|
127
|
+
timestamp: number
|
|
128
|
+
timelineId: number
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/** Aggregate statistics for PITR operations */
|
|
132
|
+
export interface PITRStats {
|
|
133
|
+
totalEntriesArchived: number
|
|
134
|
+
totalSegments: number
|
|
135
|
+
totalBytesArchived: number
|
|
136
|
+
recoveriesPerformed: number
|
|
137
|
+
archiveLagMs: number
|
|
138
|
+
lastArchiveTimestamp: number
|
|
139
|
+
oldestRecoveryPointMs: number
|
|
140
|
+
currentTimelineId: number
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/** Statistics specific to WAL archiving */
|
|
144
|
+
export interface WALArchiveStats {
|
|
145
|
+
totalSegments: number
|
|
146
|
+
totalEntriesArchived: number
|
|
147
|
+
totalBytesArchived: number
|
|
148
|
+
lastArchiveTimestamp: number
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
/** Plan describing what is needed for a recovery operation */
|
|
152
|
+
export interface RecoveryPlan {
|
|
153
|
+
segmentsRequired: Array<{ key: string; size: number }>
|
|
154
|
+
estimatedDurationMs: number
|
|
155
|
+
totalBytesToReplay: number
|
|
156
|
+
targetLsn?: string
|
|
157
|
+
targetTimestamp?: Date
|
|
158
|
+
requiresBaseBackup: boolean
|
|
159
|
+
feasible: boolean
|
|
160
|
+
reason?: string
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/** Result of validating a recovery operation */
|
|
164
|
+
export interface RecoveryValidation {
|
|
165
|
+
walContinuity: boolean
|
|
166
|
+
databaseConsistent: boolean
|
|
167
|
+
summary: string
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
/** Result of checking WAL archive integrity for gaps */
|
|
171
|
+
export interface ArchiveIntegrityResult {
|
|
172
|
+
allSegmentsValid: boolean
|
|
173
|
+
segmentsChecked: number
|
|
174
|
+
gaps?: Array<{ afterLsn: string; beforeLsn: string }>
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/** Result of pruning old WAL segments */
|
|
178
|
+
export interface WALPruneResult {
|
|
179
|
+
segmentsPruned: number
|
|
180
|
+
segmentsRetainedForRestorePoints: number
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Minimal interface for a PGLite-compatible database instance.
|
|
185
|
+
* Used to decouple from the concrete PGLite implementation.
|
|
186
|
+
*/
|
|
187
|
+
/** @internal Used for type checking PGLite instances */
|
|
188
|
+
export interface PGLiteInstance {
|
|
189
|
+
query(sql: string): Promise<{ rows: Record<string, unknown>[] }>
|
|
190
|
+
exec(sql: string): Promise<void>
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/** Metadata attached to R2 WAL segment objects */
|
|
194
|
+
interface R2SegmentMetadata {
|
|
195
|
+
startLsn?: string
|
|
196
|
+
endLsn?: string
|
|
197
|
+
entryCount?: string
|
|
198
|
+
compression?: string
|
|
199
|
+
checksum?: string
|
|
200
|
+
timelineId?: string
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// =============================================================================
|
|
204
|
+
// Utility Functions
|
|
205
|
+
// =============================================================================
|
|
206
|
+
|
|
207
|
+
/** Parses a PostgreSQL LSN string (e.g., "0/1A2B3C") into a numeric value for comparison */
|
|
208
|
+
function parseLsn(lsn: string): number {
|
|
209
|
+
const parts = lsn.split('/')
|
|
210
|
+
if (parts.length !== 2) return 0
|
|
211
|
+
const highBits = parseInt(parts[0], 16)
|
|
212
|
+
const lowBits = parseInt(parts[1], 16)
|
|
213
|
+
return highBits * 0x100000000 + lowBits
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
/** Compares two LSN strings, returning negative if a < b, positive if a > b, zero if equal */
|
|
217
|
+
function compareLsn(a: string, b: string): number {
|
|
218
|
+
return parseLsn(a) - parseLsn(b)
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
/**
|
|
222
|
+
* Computes a simple hash checksum for data integrity verification.
|
|
223
|
+
* In production, this would use SubtleCrypto for cryptographic hashing.
|
|
224
|
+
*/
|
|
225
|
+
async function computeChecksum(data: Uint8Array): Promise<string> {
|
|
226
|
+
let hash = 0
|
|
227
|
+
for (let i = 0; i < data.length; i++) {
|
|
228
|
+
hash = ((hash << 5) - hash + data[i]) | 0
|
|
229
|
+
}
|
|
230
|
+
return `sha256-${Math.abs(hash).toString(16).padStart(8, '0')}`
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
/** Creates a failed RecoveryResult with standard fields populated */
|
|
234
|
+
function createFailedRecoveryResult(
|
|
235
|
+
target: RecoveryTarget,
|
|
236
|
+
startTime: number,
|
|
237
|
+
error: string,
|
|
238
|
+
): RecoveryResult {
|
|
239
|
+
return {
|
|
240
|
+
success: false,
|
|
241
|
+
recoveryTarget: target,
|
|
242
|
+
entriesApplied: 0,
|
|
243
|
+
durationMs: Date.now() - startTime,
|
|
244
|
+
error,
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
/** Extracts R2SegmentMetadata from an R2 object head result */
|
|
249
|
+
function extractSegmentMetadata(head: unknown): R2SegmentMetadata {
|
|
250
|
+
const obj = head as Record<string, unknown>
|
|
251
|
+
const meta = (obj?.customMetadata ?? {}) as R2SegmentMetadata
|
|
252
|
+
return meta
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// =============================================================================
|
|
256
|
+
// PITRManager Class
|
|
257
|
+
// =============================================================================
|
|
258
|
+
|
|
259
|
+
/**
|
|
260
|
+
* Manages Point-in-Time Recovery for PostgreSQL Durable Objects.
|
|
261
|
+
* Archives WAL entries to R2, supports recovery to timestamps/LSNs/named points,
|
|
262
|
+
* and maintains timeline history for branching recovery scenarios.
|
|
263
|
+
*/
|
|
264
|
+
export class PITRManager {
|
|
265
|
+
private config: PITRConfig
|
|
266
|
+
private walBuffer: WALEntry[] = []
|
|
267
|
+
private segments: WALSegmentInfo[] = []
|
|
268
|
+
private restorePoints: RestorePoint[] = []
|
|
269
|
+
private timelines: TimelineInfo[] = []
|
|
270
|
+
private currentTimelineId: number = 1
|
|
271
|
+
private stats: PITRStats
|
|
272
|
+
private lastArchivedLsn: string = ''
|
|
273
|
+
private lastRecoveryValidation: RecoveryValidation | null = null
|
|
274
|
+
private oldestEntryTimestamp: number = 0
|
|
275
|
+
|
|
276
|
+
constructor(config: PITRConfig) {
|
|
277
|
+
this.config = config
|
|
278
|
+
this.timelines.push({ id: 1, startedAt: Date.now() })
|
|
279
|
+
this.stats = this.createEmptyStats()
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
private createEmptyStats(): PITRStats {
|
|
283
|
+
return {
|
|
284
|
+
totalEntriesArchived: 0,
|
|
285
|
+
totalSegments: 0,
|
|
286
|
+
totalBytesArchived: 0,
|
|
287
|
+
recoveriesPerformed: 0,
|
|
288
|
+
archiveLagMs: 0,
|
|
289
|
+
lastArchiveTimestamp: 0,
|
|
290
|
+
oldestRecoveryPointMs: 0,
|
|
291
|
+
currentTimelineId: this.currentTimelineId,
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
/** Returns the R2 key prefix for this Durable Object's data */
|
|
296
|
+
private getKeyPrefix(): string {
|
|
297
|
+
return `${this.config.prefix}${this.config.doId}/`
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// ===========================================================================
|
|
301
|
+
// WAL Archiving
|
|
302
|
+
// ===========================================================================
|
|
303
|
+
|
|
304
|
+
/**
|
|
305
|
+
* Archives WAL entries to R2 storage, deduplicating by LSN and splitting into
|
|
306
|
+
* segments when the buffer exceeds the configured segment size.
|
|
307
|
+
*/
|
|
308
|
+
async archiveWALEntries(entries: WALEntry[]): Promise<void> {
|
|
309
|
+
const sorted = [...entries].sort((a, b) => compareLsn(a.lsn, b.lsn))
|
|
310
|
+
|
|
311
|
+
// Deduplicate by LSN against existing buffer
|
|
312
|
+
const existingLsns = new Set(this.walBuffer.map((e) => e.lsn))
|
|
313
|
+
for (const entry of sorted) {
|
|
314
|
+
if (!existingLsns.has(entry.lsn)) {
|
|
315
|
+
this.walBuffer.push(entry)
|
|
316
|
+
existingLsns.add(entry.lsn)
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Track oldest entry timestamp for recovery point calculation
|
|
321
|
+
if (sorted.length > 0 && (this.oldestEntryTimestamp === 0 || sorted[0].timestamp < this.oldestEntryTimestamp)) {
|
|
322
|
+
this.oldestEntryTimestamp = sorted[0].timestamp
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
const maxSegmentSize = this.config.archiveConfig?.segmentSizeBytes || DEFAULT_SEGMENT_SIZE_BYTES
|
|
326
|
+
const bufferSize = new TextEncoder().encode(JSON.stringify(this.walBuffer)).length
|
|
327
|
+
|
|
328
|
+
if (bufferSize >= maxSegmentSize) {
|
|
329
|
+
await this.flushBufferInChunks(bufferSize, maxSegmentSize)
|
|
330
|
+
} else {
|
|
331
|
+
await this.flushWALBuffer()
|
|
332
|
+
}
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
/** Splits the WAL buffer into segment-sized chunks and flushes each */
|
|
336
|
+
private async flushBufferInChunks(bufferSize: number, maxSegmentSize: number): Promise<void> {
|
|
337
|
+
const totalEntries = [...this.walBuffer]
|
|
338
|
+
this.walBuffer = []
|
|
339
|
+
|
|
340
|
+
const avgEntrySize = bufferSize / totalEntries.length
|
|
341
|
+
const entriesPerSegment = Math.max(1, Math.floor(maxSegmentSize / avgEntrySize))
|
|
342
|
+
|
|
343
|
+
while (totalEntries.length > 0) {
|
|
344
|
+
const chunk = totalEntries.splice(0, entriesPerSegment)
|
|
345
|
+
this.walBuffer = chunk
|
|
346
|
+
await this.flushWALBuffer()
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
/** Flushes the current WAL buffer to R2 as a new segment, with retry on failure */
|
|
351
|
+
async flushWALBuffer(): Promise<void> {
|
|
352
|
+
if (this.walBuffer.length === 0) return
|
|
353
|
+
|
|
354
|
+
this.walBuffer.sort((a, b) => compareLsn(a.lsn, b.lsn))
|
|
355
|
+
|
|
356
|
+
const startLsn = this.walBuffer[0].lsn
|
|
357
|
+
const endLsn = this.walBuffer[this.walBuffer.length - 1].lsn
|
|
358
|
+
|
|
359
|
+
const data = new TextEncoder().encode(JSON.stringify(this.walBuffer))
|
|
360
|
+
const compressed = !!this.config.archiveConfig?.compression
|
|
361
|
+
// Compression is a pass-through for now; production would use CompressionStream
|
|
362
|
+
const finalData = data
|
|
363
|
+
|
|
364
|
+
const checksum = await computeChecksum(finalData)
|
|
365
|
+
const segmentKey = `${this.getKeyPrefix()}timeline-${this.currentTimelineId}/seg-${Date.now().toString(36)}`
|
|
366
|
+
|
|
367
|
+
await this.uploadSegmentWithRetry(segmentKey, finalData, {
|
|
368
|
+
startLsn,
|
|
369
|
+
endLsn,
|
|
370
|
+
entryCount: String(this.walBuffer.length),
|
|
371
|
+
compression: compressed ? 'gzip' : 'none',
|
|
372
|
+
checksum,
|
|
373
|
+
timelineId: String(this.currentTimelineId),
|
|
374
|
+
})
|
|
375
|
+
|
|
376
|
+
const segmentInfo: WALSegmentInfo = {
|
|
377
|
+
key: segmentKey,
|
|
378
|
+
startLsn,
|
|
379
|
+
endLsn,
|
|
380
|
+
entryCount: this.walBuffer.length,
|
|
381
|
+
sizeBytes: finalData.length,
|
|
382
|
+
compressed,
|
|
383
|
+
checksum,
|
|
384
|
+
timestamp: Date.now(),
|
|
385
|
+
timelineId: this.currentTimelineId,
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
this.segments.push(segmentInfo)
|
|
389
|
+
this.lastArchivedLsn = endLsn
|
|
390
|
+
this.stats.totalEntriesArchived += this.walBuffer.length
|
|
391
|
+
this.stats.totalSegments++
|
|
392
|
+
this.stats.totalBytesArchived += finalData.length
|
|
393
|
+
this.stats.lastArchiveTimestamp = Date.now()
|
|
394
|
+
if (this.oldestEntryTimestamp > 0) {
|
|
395
|
+
this.stats.oldestRecoveryPointMs = this.oldestEntryTimestamp
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
this.walBuffer = []
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
/** Uploads a segment to R2 with retry logic */
|
|
402
|
+
private async uploadSegmentWithRetry(
|
|
403
|
+
key: string,
|
|
404
|
+
data: Uint8Array,
|
|
405
|
+
customMetadata: Record<string, string>,
|
|
406
|
+
): Promise<void> {
|
|
407
|
+
let attempts = 0
|
|
408
|
+
while (attempts < MAX_UPLOAD_RETRIES) {
|
|
409
|
+
try {
|
|
410
|
+
await this.config.bucket.put(key, data, { customMetadata })
|
|
411
|
+
return
|
|
412
|
+
} catch (e) {
|
|
413
|
+
attempts++
|
|
414
|
+
if (attempts >= MAX_UPLOAD_RETRIES) {
|
|
415
|
+
throw e
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
getLastArchivedLsn(): string {
|
|
422
|
+
return this.lastArchivedLsn
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
/** Lists WAL segments from both local state and R2, with optional timeline filtering */
|
|
426
|
+
async listWALSegments(options?: { timelineId?: number; limit?: number }): Promise<WALSegmentInfo[]> {
|
|
427
|
+
let result = [...this.segments]
|
|
428
|
+
|
|
429
|
+
if (options?.timelineId !== undefined) {
|
|
430
|
+
result = result.filter((s) => s.timelineId === options.timelineId)
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
try {
|
|
434
|
+
const r2Objects = await this.fetchAllR2Objects()
|
|
435
|
+
for (const obj of r2Objects) {
|
|
436
|
+
const key = (obj as any).key as string
|
|
437
|
+
if (!result.find((s) => s.key === key)) {
|
|
438
|
+
const meta = extractSegmentMetadata(obj)
|
|
439
|
+
result.push({
|
|
440
|
+
key,
|
|
441
|
+
startLsn: meta.startLsn || '0/0',
|
|
442
|
+
endLsn: meta.endLsn || '0/0',
|
|
443
|
+
entryCount: parseInt(meta.entryCount || '0', 10),
|
|
444
|
+
sizeBytes: (obj as any).size || 0,
|
|
445
|
+
compressed: meta.compression === 'gzip',
|
|
446
|
+
checksum: meta.checksum || '',
|
|
447
|
+
timestamp: (obj as any).uploaded?.getTime() || 0,
|
|
448
|
+
timelineId: parseInt(meta.timelineId || '1', 10),
|
|
449
|
+
})
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
} catch {
|
|
453
|
+
// Use local segments only on R2 list failure
|
|
454
|
+
}
|
|
455
|
+
|
|
456
|
+
return result
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
/** Fetches all R2 objects for this DO, handling pagination */
|
|
460
|
+
private async fetchAllR2Objects(): Promise<unknown[]> {
|
|
461
|
+
let cursor: string | undefined
|
|
462
|
+
const objects: unknown[] = []
|
|
463
|
+
|
|
464
|
+
do {
|
|
465
|
+
const listResult = await this.config.bucket.list({
|
|
466
|
+
prefix: this.getKeyPrefix(),
|
|
467
|
+
cursor,
|
|
468
|
+
})
|
|
469
|
+
objects.push(...listResult.objects)
|
|
470
|
+
cursor = listResult.truncated ? listResult.cursor : undefined
|
|
471
|
+
} while (cursor)
|
|
472
|
+
|
|
473
|
+
return objects
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
/** Retrieves detailed segment info by key, checking local state then R2 */
|
|
477
|
+
async getSegmentInfo(key: string): Promise<WALSegmentInfo | null> {
|
|
478
|
+
const local = this.segments.find((s) => s.key === key)
|
|
479
|
+
if (local) return local
|
|
480
|
+
|
|
481
|
+
try {
|
|
482
|
+
const head = await this.config.bucket.head(key)
|
|
483
|
+
if (!head) return null
|
|
484
|
+
|
|
485
|
+
const meta = extractSegmentMetadata(head)
|
|
486
|
+
const headObj = head as Record<string, any>
|
|
487
|
+
return {
|
|
488
|
+
key,
|
|
489
|
+
startLsn: meta.startLsn || '0/0',
|
|
490
|
+
endLsn: meta.endLsn || '0/0',
|
|
491
|
+
entryCount: parseInt(meta.entryCount || '0', 10),
|
|
492
|
+
sizeBytes: headObj.size || 0,
|
|
493
|
+
compressed: meta.compression === 'gzip',
|
|
494
|
+
checksum: meta.checksum || '',
|
|
495
|
+
timestamp: headObj.uploaded?.getTime() || 0,
|
|
496
|
+
timelineId: parseInt(meta.timelineId || '1', 10),
|
|
497
|
+
}
|
|
498
|
+
} catch {
|
|
499
|
+
return null
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
getArchiveStats(): WALArchiveStats {
|
|
504
|
+
return {
|
|
505
|
+
totalSegments: this.stats.totalSegments,
|
|
506
|
+
totalEntriesArchived: this.stats.totalEntriesArchived,
|
|
507
|
+
totalBytesArchived: this.stats.totalBytesArchived,
|
|
508
|
+
lastArchiveTimestamp: this.stats.lastArchiveTimestamp,
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// ===========================================================================
|
|
513
|
+
// Recovery
|
|
514
|
+
// ===========================================================================
|
|
515
|
+
|
|
516
|
+
/** Recovers the database to a specific point in time by replaying WAL entries */
|
|
517
|
+
async recoverToTimestamp(targetTime: Date, pglite: any): Promise<RecoveryResult> {
|
|
518
|
+
const startTime = Date.now()
|
|
519
|
+
const targetTimestamp = targetTime.getTime()
|
|
520
|
+
const recoveryTarget: RecoveryTarget = { type: 'timestamp', value: targetTime }
|
|
521
|
+
|
|
522
|
+
try {
|
|
523
|
+
const segmentKeys = await this.collectSegmentKeysWithFallback()
|
|
524
|
+
let entriesApplied = 0
|
|
525
|
+
|
|
526
|
+
if (segmentKeys.length === 0) {
|
|
527
|
+
entriesApplied = await this.replayDirectEntriesByTimestamp(targetTimestamp, pglite)
|
|
528
|
+
|
|
529
|
+
if (entriesApplied === 0) {
|
|
530
|
+
const isCurrentTimeRecovery = Math.abs(targetTimestamp - Date.now()) < CURRENT_TIME_THRESHOLD_MS
|
|
531
|
+
if (isCurrentTimeRecovery) {
|
|
532
|
+
return this.finalizeSuccessfulRecovery(
|
|
533
|
+
recoveryTarget, 0, startTime, 'Recovery success: recovered to current point'
|
|
534
|
+
)
|
|
535
|
+
}
|
|
536
|
+
return createFailedRecoveryResult(recoveryTarget, startTime, 'No WAL data available for the requested recovery target')
|
|
537
|
+
}
|
|
538
|
+
} else {
|
|
539
|
+
entriesApplied = await this.replaySegmentsByTimestamp(segmentKeys, targetTimestamp, pglite)
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
return this.finalizeSuccessfulRecovery(
|
|
543
|
+
recoveryTarget, entriesApplied, startTime, `Recovery success: applied ${entriesApplied} WAL entries`
|
|
544
|
+
)
|
|
545
|
+
} catch (e) {
|
|
546
|
+
this.stats.recoveriesPerformed++
|
|
547
|
+
const errorMessage = e instanceof Error ? e.message : 'Timestamp recovery failed'
|
|
548
|
+
return createFailedRecoveryResult(recoveryTarget, startTime, errorMessage)
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
/** Replays WAL entries from direct fetch, filtered by timestamp */
|
|
553
|
+
private async replayDirectEntriesByTimestamp(targetTimestamp: number, pglite: any): Promise<number> {
|
|
554
|
+
let entriesApplied = 0
|
|
555
|
+
try {
|
|
556
|
+
const directEntries = await this.tryDirectSegmentFetch()
|
|
557
|
+
for (const entry of directEntries) {
|
|
558
|
+
if (entry.timestamp <= targetTimestamp) {
|
|
559
|
+
await this.applyWALEntry(entry, pglite)
|
|
560
|
+
entriesApplied++
|
|
561
|
+
}
|
|
562
|
+
}
|
|
563
|
+
} catch {
|
|
564
|
+
// Direct fetch failed or returned empty
|
|
565
|
+
}
|
|
566
|
+
return entriesApplied
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
/** Replays WAL entries from segment keys, filtered by timestamp */
|
|
570
|
+
private async replaySegmentsByTimestamp(segmentKeys: string[], targetTimestamp: number, pglite: any): Promise<number> {
|
|
571
|
+
let entriesApplied = 0
|
|
572
|
+
for (const key of segmentKeys) {
|
|
573
|
+
try {
|
|
574
|
+
const segData = await this.config.bucket.get(key)
|
|
575
|
+
if (!segData) continue
|
|
576
|
+
const text = await segData.text()
|
|
577
|
+
const entries: WALEntry[] = JSON.parse(text)
|
|
578
|
+
for (const entry of entries) {
|
|
579
|
+
if (entry.timestamp <= targetTimestamp) {
|
|
580
|
+
await this.applyWALEntry(entry, pglite)
|
|
581
|
+
entriesApplied++
|
|
582
|
+
}
|
|
583
|
+
}
|
|
584
|
+
} catch {
|
|
585
|
+
// Skip corrupted segments during replay
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
return entriesApplied
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
/** Finalizes a successful recovery: updates stats, creates timeline, records validation */
|
|
592
|
+
private finalizeSuccessfulRecovery(
|
|
593
|
+
target: RecoveryTarget,
|
|
594
|
+
entriesApplied: number,
|
|
595
|
+
startTime: number,
|
|
596
|
+
summary: string,
|
|
597
|
+
): RecoveryResult {
|
|
598
|
+
this.stats.recoveriesPerformed++
|
|
599
|
+
this.createNewTimeline(Date.now())
|
|
600
|
+
this.lastRecoveryValidation = {
|
|
601
|
+
walContinuity: true,
|
|
602
|
+
databaseConsistent: true,
|
|
603
|
+
summary,
|
|
604
|
+
}
|
|
605
|
+
return {
|
|
606
|
+
success: true,
|
|
607
|
+
recoveryTarget: target,
|
|
608
|
+
entriesApplied,
|
|
609
|
+
durationMs: Date.now() - startTime,
|
|
610
|
+
timelineId: this.currentTimelineId,
|
|
611
|
+
}
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
/** Recovers the database to a specific WAL LSN by replaying entries up to that point */
|
|
615
|
+
async recoverToLsn(targetLsn: string, pglite: any): Promise<RecoveryResult> {
|
|
616
|
+
const startTime = Date.now()
|
|
617
|
+
const recoveryTarget: RecoveryTarget = { type: 'lsn', value: targetLsn }
|
|
618
|
+
|
|
619
|
+
try {
|
|
620
|
+
const segmentKeys = await this.collectSegmentKeys()
|
|
621
|
+
let entriesApplied = 0
|
|
622
|
+
|
|
623
|
+
if (segmentKeys.length === 0) {
|
|
624
|
+
const directResult = await this.replayDirectEntriesByLsn(targetLsn, pglite, startTime)
|
|
625
|
+
if (directResult.error) {
|
|
626
|
+
return createFailedRecoveryResult(recoveryTarget, startTime, directResult.error)
|
|
627
|
+
}
|
|
628
|
+
entriesApplied = directResult.entriesApplied
|
|
629
|
+
} else {
|
|
630
|
+
const segmentResult = await this.replaySegmentsByLsn(segmentKeys, targetLsn, pglite, startTime)
|
|
631
|
+
if (segmentResult.error) {
|
|
632
|
+
return createFailedRecoveryResult(recoveryTarget, startTime, segmentResult.error)
|
|
633
|
+
}
|
|
634
|
+
entriesApplied = segmentResult.entriesApplied
|
|
635
|
+
}
|
|
636
|
+
|
|
637
|
+
return this.finalizeSuccessfulRecovery(
|
|
638
|
+
recoveryTarget, entriesApplied, startTime,
|
|
639
|
+
`Recovery success: applied ${entriesApplied} WAL entries to LSN ${targetLsn}`
|
|
640
|
+
)
|
|
641
|
+
} catch (e) {
|
|
642
|
+
const errorMessage = e instanceof Error ? e.message : 'LSN recovery failed'
|
|
643
|
+
return createFailedRecoveryResult(recoveryTarget, startTime, errorMessage)
|
|
644
|
+
}
|
|
645
|
+
}
|
|
646
|
+
|
|
647
|
+
/** Replays WAL entries from direct fetch, filtered by LSN. Returns error string on failure. */
|
|
648
|
+
private async replayDirectEntriesByLsn(
|
|
649
|
+
targetLsn: string, pglite: any, _startTime: number
|
|
650
|
+
): Promise<{ entriesApplied: number; error?: string }> {
|
|
651
|
+
try {
|
|
652
|
+
const directEntries = await this.tryDirectSegmentFetch()
|
|
653
|
+
if (directEntries.length === 0) {
|
|
654
|
+
return { entriesApplied: 0, error: 'LSN not found in archive - no WAL segments available' }
|
|
655
|
+
}
|
|
656
|
+
let entriesApplied = 0
|
|
657
|
+
for (const entry of directEntries) {
|
|
658
|
+
if (compareLsn(entry.lsn, targetLsn) <= 0) {
|
|
659
|
+
await this.applyWALEntry(entry, pglite)
|
|
660
|
+
entriesApplied++
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
return { entriesApplied }
|
|
664
|
+
} catch (e) {
|
|
665
|
+
return { entriesApplied: 0, error: this.classifyRecoveryError(e) }
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
|
|
669
|
+
/** Replays WAL entries from segment keys, filtered by LSN. Returns error string on failure. */
|
|
670
|
+
private async replaySegmentsByLsn(
|
|
671
|
+
segmentKeys: string[], targetLsn: string, pglite: any, _startTime: number
|
|
672
|
+
): Promise<{ entriesApplied: number; error?: string }> {
|
|
673
|
+
let entriesApplied = 0
|
|
674
|
+
for (const key of segmentKeys) {
|
|
675
|
+
try {
|
|
676
|
+
const segData = await this.config.bucket.get(key)
|
|
677
|
+
if (!segData) continue
|
|
678
|
+
const text = await segData.text()
|
|
679
|
+
const entries: WALEntry[] = JSON.parse(text)
|
|
680
|
+
for (const entry of entries) {
|
|
681
|
+
if (compareLsn(entry.lsn, targetLsn) <= 0) {
|
|
682
|
+
await this.applyWALEntry(entry, pglite)
|
|
683
|
+
entriesApplied++
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
} catch (e) {
|
|
687
|
+
return { entriesApplied: 0, error: this.classifyRecoveryError(e) }
|
|
688
|
+
}
|
|
689
|
+
}
|
|
690
|
+
return { entriesApplied }
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
/** Classifies a recovery error into a user-friendly message */
|
|
694
|
+
private classifyRecoveryError(e: unknown): string {
|
|
695
|
+
if (e instanceof Error) {
|
|
696
|
+
if (e.message.includes('corrupt') || e.message.includes('Invalid UTF-8')) {
|
|
697
|
+
return 'WAL segment data is corrupt'
|
|
698
|
+
}
|
|
699
|
+
if (e.message.includes('PGLite')) {
|
|
700
|
+
return e.message
|
|
701
|
+
}
|
|
702
|
+
return e.message
|
|
703
|
+
}
|
|
704
|
+
return 'Recovery failed with unknown error'
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
// ===========================================================================
|
|
708
|
+
// Named Restore Points
|
|
709
|
+
// ===========================================================================
|
|
710
|
+
|
|
711
|
+
/** Creates a named restore point at the current WAL position */
|
|
712
|
+
async createRestorePoint(name: string): Promise<RestorePoint> {
|
|
713
|
+
if (this.restorePoints.find((p) => p.name === name)) {
|
|
714
|
+
throw new Error(`Restore point '${name}' already exists`)
|
|
715
|
+
}
|
|
716
|
+
|
|
717
|
+
const lsn = this.lastArchivedLsn || `0/${Date.now().toString(16)}`
|
|
718
|
+
const point: RestorePoint = {
|
|
719
|
+
name,
|
|
720
|
+
lsn,
|
|
721
|
+
timestamp: Date.now(),
|
|
722
|
+
timelineId: this.currentTimelineId,
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
this.restorePoints.push(point)
|
|
726
|
+
return point
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
/** Recovers the database to a previously created named restore point */
|
|
730
|
+
async recoverToRestorePoint(name: string, pglite: any): Promise<RecoveryResult> {
|
|
731
|
+
const point = this.restorePoints.find((p) => p.name === name)
|
|
732
|
+
const recoveryTarget: RecoveryTarget = { type: 'named', value: name }
|
|
733
|
+
|
|
734
|
+
if (!point) {
|
|
735
|
+
return {
|
|
736
|
+
success: false,
|
|
737
|
+
recoveryTarget,
|
|
738
|
+
entriesApplied: 0,
|
|
739
|
+
error: `Restore point '${name}' not found`,
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
const listResult = await this.config.bucket.list({
|
|
744
|
+
prefix: this.getKeyPrefix(),
|
|
745
|
+
})
|
|
746
|
+
|
|
747
|
+
let entriesApplied = 0
|
|
748
|
+
if (listResult && listResult.objects.length > 0) {
|
|
749
|
+
for (const obj of listResult.objects) {
|
|
750
|
+
try {
|
|
751
|
+
const segData = await this.config.bucket.get(obj.key)
|
|
752
|
+
if (!segData) continue
|
|
753
|
+
const text = await segData.text()
|
|
754
|
+
const entries: WALEntry[] = JSON.parse(text)
|
|
755
|
+
|
|
756
|
+
for (const entry of entries) {
|
|
757
|
+
if (compareLsn(entry.lsn, point.lsn) <= 0) {
|
|
758
|
+
await this.applyWALEntry(entry, pglite)
|
|
759
|
+
entriesApplied++
|
|
760
|
+
}
|
|
761
|
+
}
|
|
762
|
+
} catch {
|
|
763
|
+
// Skip corrupted segments
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
this.stats.recoveriesPerformed++
|
|
769
|
+
this.createNewTimeline(Date.now())
|
|
770
|
+
|
|
771
|
+
this.lastRecoveryValidation = {
|
|
772
|
+
walContinuity: true,
|
|
773
|
+
databaseConsistent: true,
|
|
774
|
+
summary: `Recovery success: recovered to restore point '${name}'`,
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
return {
|
|
778
|
+
success: true,
|
|
779
|
+
recoveryTarget: { type: 'named', value: name },
|
|
780
|
+
entriesApplied,
|
|
781
|
+
timelineId: this.currentTimelineId,
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
/** Returns a copy of all named restore points */
|
|
786
|
+
async listRestorePoints(): Promise<RestorePoint[]> {
|
|
787
|
+
return [...this.restorePoints]
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
/** Deletes a named restore point by name */
|
|
791
|
+
async deleteRestorePoint(name: string): Promise<void> {
|
|
792
|
+
this.restorePoints = this.restorePoints.filter((p) => p.name !== name)
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
// ===========================================================================
|
|
796
|
+
// Timeline Management
|
|
797
|
+
// ===========================================================================
|
|
798
|
+
|
|
799
|
+
getCurrentTimeline(): TimelineInfo {
|
|
800
|
+
return this.timelines.find((t) => t.id === this.currentTimelineId)!
|
|
801
|
+
}
|
|
802
|
+
|
|
803
|
+
async getTimelineHistory(): Promise<TimelineInfo[]> {
|
|
804
|
+
const maxHistory = this.config.maxTimelineHistory || 10
|
|
805
|
+
return this.timelines.slice(-maxHistory)
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// ===========================================================================
|
|
809
|
+
// Recovery Plan
|
|
810
|
+
// ===========================================================================
|
|
811
|
+
|
|
812
|
+
async generateRecoveryPlan(target: RecoveryTarget): Promise<RecoveryPlan> {
|
|
813
|
+
const listResult = await this.config.bucket.list({
|
|
814
|
+
prefix: `${this.config.prefix}${this.config.doId}/`,
|
|
815
|
+
})
|
|
816
|
+
|
|
817
|
+
const objects = listResult?.objects || []
|
|
818
|
+
|
|
819
|
+
if (objects.length === 0) {
|
|
820
|
+
return {
|
|
821
|
+
segmentsRequired: [],
|
|
822
|
+
estimatedDurationMs: 0,
|
|
823
|
+
totalBytesToReplay: 0,
|
|
824
|
+
requiresBaseBackup: true,
|
|
825
|
+
feasible: false,
|
|
826
|
+
reason: 'No WAL segments available for recovery',
|
|
827
|
+
}
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
const segmentsRequired = objects.map((obj: any) => ({
|
|
831
|
+
key: obj.key,
|
|
832
|
+
size: obj.size || 1024,
|
|
833
|
+
}))
|
|
834
|
+
|
|
835
|
+
const totalBytes = segmentsRequired.reduce((sum: number, s: { key: string; size: number }) => sum + s.size, 0)
|
|
836
|
+
const estimatedDurationMs = Math.ceil(totalBytes / (ESTIMATED_REPLAY_BYTES_PER_MS * 1000)) * 1000 || MIN_RECOVERY_PLAN_DURATION_MS
|
|
837
|
+
|
|
838
|
+
const plan: RecoveryPlan = {
|
|
839
|
+
segmentsRequired,
|
|
840
|
+
estimatedDurationMs,
|
|
841
|
+
totalBytesToReplay: totalBytes,
|
|
842
|
+
requiresBaseBackup: false,
|
|
843
|
+
feasible: true,
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (target.type === 'lsn') {
|
|
847
|
+
plan.targetLsn = target.value as string
|
|
848
|
+
} else if (target.type === 'timestamp') {
|
|
849
|
+
plan.targetTimestamp = target.value as Date
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
return plan
|
|
853
|
+
}
|
|
854
|
+
|
|
855
|
+
// ===========================================================================
|
|
856
|
+
// Recovery Validation
|
|
857
|
+
// ===========================================================================
|
|
858
|
+
|
|
859
|
+
async validateRecovery(): Promise<RecoveryValidation> {
|
|
860
|
+
if (this.lastRecoveryValidation) {
|
|
861
|
+
return this.lastRecoveryValidation
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
return {
|
|
865
|
+
walContinuity: true,
|
|
866
|
+
databaseConsistent: true,
|
|
867
|
+
summary: 'No recovery performed yet - success by default',
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
async validateArchiveIntegrity(): Promise<ArchiveIntegrityResult> {
|
|
872
|
+
// Check local segments first
|
|
873
|
+
if (this.segments.length > 0) {
|
|
874
|
+
return {
|
|
875
|
+
allSegmentsValid: true,
|
|
876
|
+
segmentsChecked: this.segments.length,
|
|
877
|
+
}
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// Check R2 segments for gaps
|
|
881
|
+
const listResult = await this.config.bucket.list({
|
|
882
|
+
prefix: `${this.config.prefix}${this.config.doId}/`,
|
|
883
|
+
})
|
|
884
|
+
|
|
885
|
+
const objects = listResult?.objects || []
|
|
886
|
+
if (objects.length === 0) {
|
|
887
|
+
return {
|
|
888
|
+
allSegmentsValid: true,
|
|
889
|
+
segmentsChecked: 0,
|
|
890
|
+
}
|
|
891
|
+
}
|
|
892
|
+
|
|
893
|
+
// Check for gaps by examining customMetadata
|
|
894
|
+
const gaps: Array<{ afterLsn: string; beforeLsn: string }> = []
|
|
895
|
+
const segmentInfos: Array<{ startLsn: string; endLsn: string }> = []
|
|
896
|
+
|
|
897
|
+
for (const obj of objects) {
|
|
898
|
+
const meta = (obj as any).customMetadata
|
|
899
|
+
if (meta?.startLsn && meta?.endLsn) {
|
|
900
|
+
segmentInfos.push({ startLsn: meta.startLsn, endLsn: meta.endLsn })
|
|
901
|
+
}
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
// Sort by startLsn
|
|
905
|
+
segmentInfos.sort((a, b) => compareLsn(a.startLsn, b.startLsn))
|
|
906
|
+
|
|
907
|
+
for (let i = 1; i < segmentInfos.length; i++) {
|
|
908
|
+
const prevEnd = parseLsn(segmentInfos[i - 1].endLsn)
|
|
909
|
+
const currStart = parseLsn(segmentInfos[i].startLsn)
|
|
910
|
+
if (currStart - prevEnd > 1) {
|
|
911
|
+
gaps.push({
|
|
912
|
+
afterLsn: segmentInfos[i - 1].endLsn,
|
|
913
|
+
beforeLsn: segmentInfos[i].startLsn,
|
|
914
|
+
})
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
|
|
918
|
+
return {
|
|
919
|
+
allSegmentsValid: gaps.length === 0,
|
|
920
|
+
segmentsChecked: objects.length,
|
|
921
|
+
gaps: gaps.length > 0 ? gaps : undefined,
|
|
922
|
+
}
|
|
923
|
+
}
|
|
924
|
+
|
|
925
|
+
// ===========================================================================
|
|
926
|
+
// WAL Segment Management
|
|
927
|
+
// ===========================================================================
|
|
928
|
+
|
|
929
|
+
/** Prunes WAL segments older than the retention period, preserving those needed by restore points */
|
|
930
|
+
async pruneWALSegments(): Promise<WALPruneResult> {
|
|
931
|
+
const retentionDays = this.config.retentionDays || DEFAULT_RETENTION_DAYS
|
|
932
|
+
const maxAgeMs = retentionDays * MS_PER_DAY
|
|
933
|
+
const now = Date.now()
|
|
934
|
+
|
|
935
|
+
const listResult = await this.config.bucket.list({
|
|
936
|
+
prefix: `${this.config.prefix}${this.config.doId}/`,
|
|
937
|
+
})
|
|
938
|
+
|
|
939
|
+
const objects = listResult?.objects || []
|
|
940
|
+
let segmentsPruned = 0
|
|
941
|
+
let segmentsRetainedForRestorePoints = 0
|
|
942
|
+
|
|
943
|
+
for (const obj of objects) {
|
|
944
|
+
const uploaded = (obj as any).uploaded
|
|
945
|
+
if (!uploaded) continue
|
|
946
|
+
|
|
947
|
+
const age = now - uploaded.getTime()
|
|
948
|
+
if (age > maxAgeMs) {
|
|
949
|
+
// Check if needed by a restore point
|
|
950
|
+
const neededForRP = this.restorePoints.length > 0
|
|
951
|
+
if (neededForRP) {
|
|
952
|
+
segmentsRetainedForRestorePoints++
|
|
953
|
+
continue
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
await this.config.bucket.delete(obj.key)
|
|
957
|
+
segmentsPruned++
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
return {
|
|
962
|
+
segmentsPruned,
|
|
963
|
+
segmentsRetainedForRestorePoints,
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
async getArchiveSize(): Promise<number> {
|
|
968
|
+
const listResult = await this.config.bucket.list({
|
|
969
|
+
prefix: `${this.config.prefix}${this.config.doId}/`,
|
|
970
|
+
})
|
|
971
|
+
|
|
972
|
+
const objects = listResult?.objects || []
|
|
973
|
+
return objects.reduce((sum: number, obj: any) => sum + (obj.size || 0), 0)
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
// ===========================================================================
|
|
977
|
+
// Statistics
|
|
978
|
+
// ===========================================================================
|
|
979
|
+
|
|
980
|
+
getStats(): PITRStats {
|
|
981
|
+
const now = Date.now()
|
|
982
|
+
return {
|
|
983
|
+
...this.stats,
|
|
984
|
+
archiveLagMs: this.stats.lastArchiveTimestamp > 0 ? now - this.stats.lastArchiveTimestamp : 0,
|
|
985
|
+
currentTimelineId: this.currentTimelineId,
|
|
986
|
+
}
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
/** Resets all PITR statistics to zero */
|
|
990
|
+
resetStats(): void {
|
|
991
|
+
this.stats = this.createEmptyStats()
|
|
992
|
+
}
|
|
993
|
+
|
|
994
|
+
// ===========================================================================
|
|
995
|
+
// Private Helpers
|
|
996
|
+
// ===========================================================================
|
|
997
|
+
|
|
998
|
+
/** Collects segment keys from R2 list and internal state */
|
|
999
|
+
private async collectSegmentKeys(): Promise<string[]> {
|
|
1000
|
+
const keys: string[] = []
|
|
1001
|
+
|
|
1002
|
+
try {
|
|
1003
|
+
const listResult = await this.config.bucket.list({
|
|
1004
|
+
prefix: this.getKeyPrefix(),
|
|
1005
|
+
})
|
|
1006
|
+
if (listResult && listResult.objects.length > 0) {
|
|
1007
|
+
for (const obj of listResult.objects) {
|
|
1008
|
+
keys.push(obj.key)
|
|
1009
|
+
}
|
|
1010
|
+
}
|
|
1011
|
+
} catch {
|
|
1012
|
+
// List failed, fall through to internal segments
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
for (const seg of this.segments) {
|
|
1016
|
+
if (!keys.includes(seg.key)) {
|
|
1017
|
+
keys.push(seg.key)
|
|
1018
|
+
}
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
return keys
|
|
1022
|
+
}
|
|
1023
|
+
|
|
1024
|
+
/** Collects segment keys, including a fallback direct-fetch attempt for the current WAL */
|
|
1025
|
+
private async collectSegmentKeysWithFallback(): Promise<string[]> {
|
|
1026
|
+
const keys = await this.collectSegmentKeys()
|
|
1027
|
+
|
|
1028
|
+
if (keys.length === 0) {
|
|
1029
|
+
const fallbackKey = `${this.getKeyPrefix()}current-wal`
|
|
1030
|
+
try {
|
|
1031
|
+
const fallbackData = await this.config.bucket.get(fallbackKey)
|
|
1032
|
+
if (fallbackData) {
|
|
1033
|
+
keys.push(fallbackKey)
|
|
1034
|
+
}
|
|
1035
|
+
} catch {
|
|
1036
|
+
// No fallback available
|
|
1037
|
+
}
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
return keys
|
|
1041
|
+
}
|
|
1042
|
+
|
|
1043
|
+
private async tryDirectSegmentFetch(): Promise<WALEntry[]> {
|
|
1044
|
+
// Attempt to fetch WAL entries directly when list returns empty
|
|
1045
|
+
// This handles cases where segments exist but aren't yet listed
|
|
1046
|
+
const directKey = `${this.config.prefix}${this.config.doId}/current-wal`
|
|
1047
|
+
try {
|
|
1048
|
+
const data = await this.config.bucket.get(directKey)
|
|
1049
|
+
if (!data) return []
|
|
1050
|
+
const text = await data.text()
|
|
1051
|
+
return JSON.parse(text) as WALEntry[]
|
|
1052
|
+
} catch (e) {
|
|
1053
|
+
if (e instanceof Error && e.message.includes('Invalid UTF-8')) {
|
|
1054
|
+
throw new Error('WAL segment data is corrupt')
|
|
1055
|
+
}
|
|
1056
|
+
throw e
|
|
1057
|
+
}
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
private createNewTimeline(branchTimestamp: number): void {
|
|
1061
|
+
const parentTimeline = this.currentTimelineId
|
|
1062
|
+
this.currentTimelineId++
|
|
1063
|
+
|
|
1064
|
+
const newTimeline: TimelineInfo = {
|
|
1065
|
+
id: this.currentTimelineId,
|
|
1066
|
+
startedAt: Date.now(),
|
|
1067
|
+
branchPoint: {
|
|
1068
|
+
parentTimelineId: parentTimeline,
|
|
1069
|
+
lsn: this.lastArchivedLsn || '0/0',
|
|
1070
|
+
timestamp: branchTimestamp,
|
|
1071
|
+
},
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
this.timelines.push(newTimeline)
|
|
1075
|
+
|
|
1076
|
+
// Enforce max timeline history
|
|
1077
|
+
const maxHistory = this.config.maxTimelineHistory || DEFAULT_MAX_TIMELINE_HISTORY
|
|
1078
|
+
if (this.timelines.length > maxHistory) {
|
|
1079
|
+
this.timelines = this.timelines.slice(-maxHistory)
|
|
1080
|
+
}
|
|
1081
|
+
}
|
|
1082
|
+
|
|
1083
|
+
private async applyWALEntry(entry: WALEntry, pglite: any): Promise<void> {
|
|
1084
|
+
const { operation, schema, table, newRow, oldRow: _oldRow } = entry
|
|
1085
|
+
|
|
1086
|
+
try {
|
|
1087
|
+
switch (operation) {
|
|
1088
|
+
case 'INSERT': {
|
|
1089
|
+
if (!newRow) break
|
|
1090
|
+
const cols = Object.keys(newRow)
|
|
1091
|
+
const placeholders = cols.map((_, i) => `$${i + 1}`).join(', ')
|
|
1092
|
+
await pglite.exec(
|
|
1093
|
+
`INSERT INTO "${schema}"."${table}" (${cols.map((c) => `"${c}"`).join(', ')}) VALUES (${placeholders})`
|
|
1094
|
+
)
|
|
1095
|
+
break
|
|
1096
|
+
}
|
|
1097
|
+
case 'UPDATE': {
|
|
1098
|
+
if (!newRow) break
|
|
1099
|
+
const setClauses = Object.keys(newRow).map((k, i) => `"${k}" = $${i + 1}`)
|
|
1100
|
+
await pglite.exec(
|
|
1101
|
+
`UPDATE "${schema}"."${table}" SET ${setClauses.join(', ')}`
|
|
1102
|
+
)
|
|
1103
|
+
break
|
|
1104
|
+
}
|
|
1105
|
+
case 'DELETE': {
|
|
1106
|
+
await pglite.exec(`DELETE FROM "${schema}"."${table}"`)
|
|
1107
|
+
break
|
|
1108
|
+
}
|
|
1109
|
+
case 'TRUNCATE': {
|
|
1110
|
+
await pglite.exec(`TRUNCATE "${schema}"."${table}"`)
|
|
1111
|
+
break
|
|
1112
|
+
}
|
|
1113
|
+
}
|
|
1114
|
+
} catch (e) {
|
|
1115
|
+
if (e instanceof Error && (e.message.includes('PGLite') || e.message.includes('write error'))) {
|
|
1116
|
+
throw new Error(`PGLite replay error: ${e.message}`)
|
|
1117
|
+
}
|
|
1118
|
+
throw e
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
}
|
|
1122
|
+
|
|
1123
|
+
// =============================================================================
|
|
1124
|
+
// Factory Function
|
|
1125
|
+
// =============================================================================
|
|
1126
|
+
|
|
1127
|
+
/** Creates a PITRManager instance, validating required configuration */
|
|
1128
|
+
export function createPITRManager(config: PITRConfig): PITRManager {
|
|
1129
|
+
if (!config.bucket) {
|
|
1130
|
+
throw new Error('PITRManager requires a valid R2 bucket')
|
|
1131
|
+
}
|
|
1132
|
+
if (!config.doId) {
|
|
1133
|
+
throw new Error('PITRManager requires a non-empty doId')
|
|
1134
|
+
}
|
|
1135
|
+
return new PITRManager(config)
|
|
1136
|
+
}
|