@dotdo/postgres 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1136 @@
1
+ /**
2
+ * Point-in-Time Recovery (PITR) Manager for PostgreSQL Durable Objects
3
+ *
4
+ * Provides WAL archiving to R2, recovery to timestamp/LSN/named restore points,
5
+ * timeline management, and WAL segment management.
6
+ */
7
+
8
+ // =============================================================================
9
+ // Constants
10
+ // =============================================================================
11
+
12
+ /** Default WAL segment size (16 MB) */
13
+ const DEFAULT_SEGMENT_SIZE_BYTES = 16 * 1024 * 1024
14
+
15
+ /** Default retention period for WAL segments in days */
16
+ const DEFAULT_RETENTION_DAYS = 7
17
+
18
+ /** Milliseconds in one day */
19
+ const MS_PER_DAY = 86_400_000
20
+
21
+ /** Maximum number of timelines to retain in history */
22
+ const DEFAULT_MAX_TIMELINE_HISTORY = 10
23
+
24
+ /** Maximum retry attempts for R2 uploads */
25
+ const MAX_UPLOAD_RETRIES = 2
26
+
27
+ /** Estimated WAL replay speed (bytes per millisecond) for recovery plan estimates */
28
+ const ESTIMATED_REPLAY_BYTES_PER_MS = 1024
29
+
30
+ /** Minimum estimated duration for recovery plans in milliseconds */
31
+ const MIN_RECOVERY_PLAN_DURATION_MS = 100
32
+
33
+ /** Threshold for considering a timestamp recovery target as "current" (5 seconds) */
34
+ const CURRENT_TIME_THRESHOLD_MS = 5000
35
+
36
+ // =============================================================================
37
+ // Types
38
+ // =============================================================================
39
+
40
+ /** Configuration for the PITR Manager */
41
+ export interface PITRConfig {
42
+ /** R2 bucket for WAL segment storage */
43
+ bucket: R2Bucket
44
+ /** Durable Object identifier */
45
+ doId: string
46
+ /** Key prefix for all WAL objects in R2 */
47
+ prefix: string
48
+ /** WAL archiving configuration */
49
+ archiveConfig?: WALArchiveConfig
50
+ /** Number of days to retain WAL segments */
51
+ retentionDays?: number
52
+ /** Maximum number of timeline entries to keep in history */
53
+ maxTimelineHistory?: number
54
+ /** Whether to enable continuous WAL archiving */
55
+ enableContinuousArchiving?: boolean
56
+ }
57
+
58
+ /** Configuration for WAL segment archiving behavior */
59
+ export interface WALArchiveConfig {
60
+ /** Maximum size of a single WAL segment in bytes */
61
+ segmentSizeBytes?: number
62
+ /** Interval between automatic WAL flushes in milliseconds */
63
+ flushIntervalMs?: number
64
+ /** Whether to compress WAL segments before upload */
65
+ compression?: boolean
66
+ /** Whether to validate checksums on read operations */
67
+ checksumValidation?: boolean
68
+ /** Maximum number of segments to hold in memory before flushing */
69
+ maxSegmentsInMemory?: number
70
+ }
71
+
72
+ /** Represents a single WAL log entry for replay */
73
+ export interface WALEntry {
74
+ lsn: string
75
+ operation: 'INSERT' | 'UPDATE' | 'DELETE' | 'TRUNCATE'
76
+ schema: string
77
+ table: string
78
+ newRow?: Record<string, unknown>
79
+ oldRow?: Record<string, unknown>
80
+ timestamp: number
81
+ }
82
+
83
+ /** Specifies the target for a recovery operation */
84
+ export interface RecoveryTarget {
85
+ type: 'timestamp' | 'lsn' | 'named'
86
+ value: Date | string
87
+ }
88
+
89
+ /** Result of a recovery operation */
90
+ export interface RecoveryResult {
91
+ success: boolean
92
+ recoveryTarget: RecoveryTarget
93
+ entriesApplied: number
94
+ durationMs?: number
95
+ error?: string
96
+ timelineId?: number
97
+ }
98
+
99
+ /** Metadata about a WAL segment stored in R2 */
100
+ export interface WALSegmentInfo {
101
+ key: string
102
+ startLsn: string
103
+ endLsn: string
104
+ entryCount: number
105
+ sizeBytes: number
106
+ compressed: boolean
107
+ checksum: string
108
+ timestamp: number
109
+ timelineId: number
110
+ }
111
+
112
+ /** Represents a timeline branch in the recovery history */
113
+ export interface TimelineInfo {
114
+ id: number
115
+ startedAt: number
116
+ branchPoint?: {
117
+ parentTimelineId: number
118
+ lsn: string
119
+ timestamp: number
120
+ }
121
+ }
122
+
123
+ /** A named point in the WAL stream for targeted recovery */
124
+ export interface RestorePoint {
125
+ name: string
126
+ lsn: string
127
+ timestamp: number
128
+ timelineId: number
129
+ }
130
+
131
+ /** Aggregate statistics for PITR operations */
132
+ export interface PITRStats {
133
+ totalEntriesArchived: number
134
+ totalSegments: number
135
+ totalBytesArchived: number
136
+ recoveriesPerformed: number
137
+ archiveLagMs: number
138
+ lastArchiveTimestamp: number
139
+ oldestRecoveryPointMs: number
140
+ currentTimelineId: number
141
+ }
142
+
143
+ /** Statistics specific to WAL archiving */
144
+ export interface WALArchiveStats {
145
+ totalSegments: number
146
+ totalEntriesArchived: number
147
+ totalBytesArchived: number
148
+ lastArchiveTimestamp: number
149
+ }
150
+
151
+ /** Plan describing what is needed for a recovery operation */
152
+ export interface RecoveryPlan {
153
+ segmentsRequired: Array<{ key: string; size: number }>
154
+ estimatedDurationMs: number
155
+ totalBytesToReplay: number
156
+ targetLsn?: string
157
+ targetTimestamp?: Date
158
+ requiresBaseBackup: boolean
159
+ feasible: boolean
160
+ reason?: string
161
+ }
162
+
163
+ /** Result of validating a recovery operation */
164
+ export interface RecoveryValidation {
165
+ walContinuity: boolean
166
+ databaseConsistent: boolean
167
+ summary: string
168
+ }
169
+
170
+ /** Result of checking WAL archive integrity for gaps */
171
+ export interface ArchiveIntegrityResult {
172
+ allSegmentsValid: boolean
173
+ segmentsChecked: number
174
+ gaps?: Array<{ afterLsn: string; beforeLsn: string }>
175
+ }
176
+
177
+ /** Result of pruning old WAL segments */
178
+ export interface WALPruneResult {
179
+ segmentsPruned: number
180
+ segmentsRetainedForRestorePoints: number
181
+ }
182
+
183
+ /**
184
+ * Minimal interface for a PGLite-compatible database instance.
185
+ * Used to decouple from the concrete PGLite implementation.
186
+ */
187
+ /** @internal Used for type checking PGLite instances */
188
+ export interface PGLiteInstance {
189
+ query(sql: string): Promise<{ rows: Record<string, unknown>[] }>
190
+ exec(sql: string): Promise<void>
191
+ }
192
+
193
+ /** Metadata attached to R2 WAL segment objects */
194
+ interface R2SegmentMetadata {
195
+ startLsn?: string
196
+ endLsn?: string
197
+ entryCount?: string
198
+ compression?: string
199
+ checksum?: string
200
+ timelineId?: string
201
+ }
202
+
203
+ // =============================================================================
204
+ // Utility Functions
205
+ // =============================================================================
206
+
207
+ /** Parses a PostgreSQL LSN string (e.g., "0/1A2B3C") into a numeric value for comparison */
208
+ function parseLsn(lsn: string): number {
209
+ const parts = lsn.split('/')
210
+ if (parts.length !== 2) return 0
211
+ const highBits = parseInt(parts[0], 16)
212
+ const lowBits = parseInt(parts[1], 16)
213
+ return highBits * 0x100000000 + lowBits
214
+ }
215
+
216
+ /** Compares two LSN strings, returning negative if a < b, positive if a > b, zero if equal */
217
+ function compareLsn(a: string, b: string): number {
218
+ return parseLsn(a) - parseLsn(b)
219
+ }
220
+
221
+ /**
222
+ * Computes a simple hash checksum for data integrity verification.
223
+ * In production, this would use SubtleCrypto for cryptographic hashing.
224
+ */
225
+ async function computeChecksum(data: Uint8Array): Promise<string> {
226
+ let hash = 0
227
+ for (let i = 0; i < data.length; i++) {
228
+ hash = ((hash << 5) - hash + data[i]) | 0
229
+ }
230
+ return `sha256-${Math.abs(hash).toString(16).padStart(8, '0')}`
231
+ }
232
+
233
+ /** Creates a failed RecoveryResult with standard fields populated */
234
+ function createFailedRecoveryResult(
235
+ target: RecoveryTarget,
236
+ startTime: number,
237
+ error: string,
238
+ ): RecoveryResult {
239
+ return {
240
+ success: false,
241
+ recoveryTarget: target,
242
+ entriesApplied: 0,
243
+ durationMs: Date.now() - startTime,
244
+ error,
245
+ }
246
+ }
247
+
248
+ /** Extracts R2SegmentMetadata from an R2 object head result */
249
+ function extractSegmentMetadata(head: unknown): R2SegmentMetadata {
250
+ const obj = head as Record<string, unknown>
251
+ const meta = (obj?.customMetadata ?? {}) as R2SegmentMetadata
252
+ return meta
253
+ }
254
+
255
+ // =============================================================================
256
+ // PITRManager Class
257
+ // =============================================================================
258
+
259
+ /**
260
+ * Manages Point-in-Time Recovery for PostgreSQL Durable Objects.
261
+ * Archives WAL entries to R2, supports recovery to timestamps/LSNs/named points,
262
+ * and maintains timeline history for branching recovery scenarios.
263
+ */
264
+ export class PITRManager {
265
+ private config: PITRConfig
266
+ private walBuffer: WALEntry[] = []
267
+ private segments: WALSegmentInfo[] = []
268
+ private restorePoints: RestorePoint[] = []
269
+ private timelines: TimelineInfo[] = []
270
+ private currentTimelineId: number = 1
271
+ private stats: PITRStats
272
+ private lastArchivedLsn: string = ''
273
+ private lastRecoveryValidation: RecoveryValidation | null = null
274
+ private oldestEntryTimestamp: number = 0
275
+
276
+ constructor(config: PITRConfig) {
277
+ this.config = config
278
+ this.timelines.push({ id: 1, startedAt: Date.now() })
279
+ this.stats = this.createEmptyStats()
280
+ }
281
+
282
+ private createEmptyStats(): PITRStats {
283
+ return {
284
+ totalEntriesArchived: 0,
285
+ totalSegments: 0,
286
+ totalBytesArchived: 0,
287
+ recoveriesPerformed: 0,
288
+ archiveLagMs: 0,
289
+ lastArchiveTimestamp: 0,
290
+ oldestRecoveryPointMs: 0,
291
+ currentTimelineId: this.currentTimelineId,
292
+ }
293
+ }
294
+
295
+ /** Returns the R2 key prefix for this Durable Object's data */
296
+ private getKeyPrefix(): string {
297
+ return `${this.config.prefix}${this.config.doId}/`
298
+ }
299
+
300
+ // ===========================================================================
301
+ // WAL Archiving
302
+ // ===========================================================================
303
+
304
+ /**
305
+ * Archives WAL entries to R2 storage, deduplicating by LSN and splitting into
306
+ * segments when the buffer exceeds the configured segment size.
307
+ */
308
+ async archiveWALEntries(entries: WALEntry[]): Promise<void> {
309
+ const sorted = [...entries].sort((a, b) => compareLsn(a.lsn, b.lsn))
310
+
311
+ // Deduplicate by LSN against existing buffer
312
+ const existingLsns = new Set(this.walBuffer.map((e) => e.lsn))
313
+ for (const entry of sorted) {
314
+ if (!existingLsns.has(entry.lsn)) {
315
+ this.walBuffer.push(entry)
316
+ existingLsns.add(entry.lsn)
317
+ }
318
+ }
319
+
320
+ // Track oldest entry timestamp for recovery point calculation
321
+ if (sorted.length > 0 && (this.oldestEntryTimestamp === 0 || sorted[0].timestamp < this.oldestEntryTimestamp)) {
322
+ this.oldestEntryTimestamp = sorted[0].timestamp
323
+ }
324
+
325
+ const maxSegmentSize = this.config.archiveConfig?.segmentSizeBytes || DEFAULT_SEGMENT_SIZE_BYTES
326
+ const bufferSize = new TextEncoder().encode(JSON.stringify(this.walBuffer)).length
327
+
328
+ if (bufferSize >= maxSegmentSize) {
329
+ await this.flushBufferInChunks(bufferSize, maxSegmentSize)
330
+ } else {
331
+ await this.flushWALBuffer()
332
+ }
333
+ }
334
+
335
+ /** Splits the WAL buffer into segment-sized chunks and flushes each */
336
+ private async flushBufferInChunks(bufferSize: number, maxSegmentSize: number): Promise<void> {
337
+ const totalEntries = [...this.walBuffer]
338
+ this.walBuffer = []
339
+
340
+ const avgEntrySize = bufferSize / totalEntries.length
341
+ const entriesPerSegment = Math.max(1, Math.floor(maxSegmentSize / avgEntrySize))
342
+
343
+ while (totalEntries.length > 0) {
344
+ const chunk = totalEntries.splice(0, entriesPerSegment)
345
+ this.walBuffer = chunk
346
+ await this.flushWALBuffer()
347
+ }
348
+ }
349
+
350
+ /** Flushes the current WAL buffer to R2 as a new segment, with retry on failure */
351
+ async flushWALBuffer(): Promise<void> {
352
+ if (this.walBuffer.length === 0) return
353
+
354
+ this.walBuffer.sort((a, b) => compareLsn(a.lsn, b.lsn))
355
+
356
+ const startLsn = this.walBuffer[0].lsn
357
+ const endLsn = this.walBuffer[this.walBuffer.length - 1].lsn
358
+
359
+ const data = new TextEncoder().encode(JSON.stringify(this.walBuffer))
360
+ const compressed = !!this.config.archiveConfig?.compression
361
+ // Compression is a pass-through for now; production would use CompressionStream
362
+ const finalData = data
363
+
364
+ const checksum = await computeChecksum(finalData)
365
+ const segmentKey = `${this.getKeyPrefix()}timeline-${this.currentTimelineId}/seg-${Date.now().toString(36)}`
366
+
367
+ await this.uploadSegmentWithRetry(segmentKey, finalData, {
368
+ startLsn,
369
+ endLsn,
370
+ entryCount: String(this.walBuffer.length),
371
+ compression: compressed ? 'gzip' : 'none',
372
+ checksum,
373
+ timelineId: String(this.currentTimelineId),
374
+ })
375
+
376
+ const segmentInfo: WALSegmentInfo = {
377
+ key: segmentKey,
378
+ startLsn,
379
+ endLsn,
380
+ entryCount: this.walBuffer.length,
381
+ sizeBytes: finalData.length,
382
+ compressed,
383
+ checksum,
384
+ timestamp: Date.now(),
385
+ timelineId: this.currentTimelineId,
386
+ }
387
+
388
+ this.segments.push(segmentInfo)
389
+ this.lastArchivedLsn = endLsn
390
+ this.stats.totalEntriesArchived += this.walBuffer.length
391
+ this.stats.totalSegments++
392
+ this.stats.totalBytesArchived += finalData.length
393
+ this.stats.lastArchiveTimestamp = Date.now()
394
+ if (this.oldestEntryTimestamp > 0) {
395
+ this.stats.oldestRecoveryPointMs = this.oldestEntryTimestamp
396
+ }
397
+
398
+ this.walBuffer = []
399
+ }
400
+
401
+ /** Uploads a segment to R2 with retry logic */
402
+ private async uploadSegmentWithRetry(
403
+ key: string,
404
+ data: Uint8Array,
405
+ customMetadata: Record<string, string>,
406
+ ): Promise<void> {
407
+ let attempts = 0
408
+ while (attempts < MAX_UPLOAD_RETRIES) {
409
+ try {
410
+ await this.config.bucket.put(key, data, { customMetadata })
411
+ return
412
+ } catch (e) {
413
+ attempts++
414
+ if (attempts >= MAX_UPLOAD_RETRIES) {
415
+ throw e
416
+ }
417
+ }
418
+ }
419
+ }
420
+
421
+ getLastArchivedLsn(): string {
422
+ return this.lastArchivedLsn
423
+ }
424
+
425
+ /** Lists WAL segments from both local state and R2, with optional timeline filtering */
426
+ async listWALSegments(options?: { timelineId?: number; limit?: number }): Promise<WALSegmentInfo[]> {
427
+ let result = [...this.segments]
428
+
429
+ if (options?.timelineId !== undefined) {
430
+ result = result.filter((s) => s.timelineId === options.timelineId)
431
+ }
432
+
433
+ try {
434
+ const r2Objects = await this.fetchAllR2Objects()
435
+ for (const obj of r2Objects) {
436
+ const key = (obj as any).key as string
437
+ if (!result.find((s) => s.key === key)) {
438
+ const meta = extractSegmentMetadata(obj)
439
+ result.push({
440
+ key,
441
+ startLsn: meta.startLsn || '0/0',
442
+ endLsn: meta.endLsn || '0/0',
443
+ entryCount: parseInt(meta.entryCount || '0', 10),
444
+ sizeBytes: (obj as any).size || 0,
445
+ compressed: meta.compression === 'gzip',
446
+ checksum: meta.checksum || '',
447
+ timestamp: (obj as any).uploaded?.getTime() || 0,
448
+ timelineId: parseInt(meta.timelineId || '1', 10),
449
+ })
450
+ }
451
+ }
452
+ } catch {
453
+ // Use local segments only on R2 list failure
454
+ }
455
+
456
+ return result
457
+ }
458
+
459
+ /** Fetches all R2 objects for this DO, handling pagination */
460
+ private async fetchAllR2Objects(): Promise<unknown[]> {
461
+ let cursor: string | undefined
462
+ const objects: unknown[] = []
463
+
464
+ do {
465
+ const listResult = await this.config.bucket.list({
466
+ prefix: this.getKeyPrefix(),
467
+ cursor,
468
+ })
469
+ objects.push(...listResult.objects)
470
+ cursor = listResult.truncated ? listResult.cursor : undefined
471
+ } while (cursor)
472
+
473
+ return objects
474
+ }
475
+
476
+ /** Retrieves detailed segment info by key, checking local state then R2 */
477
+ async getSegmentInfo(key: string): Promise<WALSegmentInfo | null> {
478
+ const local = this.segments.find((s) => s.key === key)
479
+ if (local) return local
480
+
481
+ try {
482
+ const head = await this.config.bucket.head(key)
483
+ if (!head) return null
484
+
485
+ const meta = extractSegmentMetadata(head)
486
+ const headObj = head as Record<string, any>
487
+ return {
488
+ key,
489
+ startLsn: meta.startLsn || '0/0',
490
+ endLsn: meta.endLsn || '0/0',
491
+ entryCount: parseInt(meta.entryCount || '0', 10),
492
+ sizeBytes: headObj.size || 0,
493
+ compressed: meta.compression === 'gzip',
494
+ checksum: meta.checksum || '',
495
+ timestamp: headObj.uploaded?.getTime() || 0,
496
+ timelineId: parseInt(meta.timelineId || '1', 10),
497
+ }
498
+ } catch {
499
+ return null
500
+ }
501
+ }
502
+
503
+ getArchiveStats(): WALArchiveStats {
504
+ return {
505
+ totalSegments: this.stats.totalSegments,
506
+ totalEntriesArchived: this.stats.totalEntriesArchived,
507
+ totalBytesArchived: this.stats.totalBytesArchived,
508
+ lastArchiveTimestamp: this.stats.lastArchiveTimestamp,
509
+ }
510
+ }
511
+
512
+ // ===========================================================================
513
+ // Recovery
514
+ // ===========================================================================
515
+
516
+ /** Recovers the database to a specific point in time by replaying WAL entries */
517
+ async recoverToTimestamp(targetTime: Date, pglite: any): Promise<RecoveryResult> {
518
+ const startTime = Date.now()
519
+ const targetTimestamp = targetTime.getTime()
520
+ const recoveryTarget: RecoveryTarget = { type: 'timestamp', value: targetTime }
521
+
522
+ try {
523
+ const segmentKeys = await this.collectSegmentKeysWithFallback()
524
+ let entriesApplied = 0
525
+
526
+ if (segmentKeys.length === 0) {
527
+ entriesApplied = await this.replayDirectEntriesByTimestamp(targetTimestamp, pglite)
528
+
529
+ if (entriesApplied === 0) {
530
+ const isCurrentTimeRecovery = Math.abs(targetTimestamp - Date.now()) < CURRENT_TIME_THRESHOLD_MS
531
+ if (isCurrentTimeRecovery) {
532
+ return this.finalizeSuccessfulRecovery(
533
+ recoveryTarget, 0, startTime, 'Recovery success: recovered to current point'
534
+ )
535
+ }
536
+ return createFailedRecoveryResult(recoveryTarget, startTime, 'No WAL data available for the requested recovery target')
537
+ }
538
+ } else {
539
+ entriesApplied = await this.replaySegmentsByTimestamp(segmentKeys, targetTimestamp, pglite)
540
+ }
541
+
542
+ return this.finalizeSuccessfulRecovery(
543
+ recoveryTarget, entriesApplied, startTime, `Recovery success: applied ${entriesApplied} WAL entries`
544
+ )
545
+ } catch (e) {
546
+ this.stats.recoveriesPerformed++
547
+ const errorMessage = e instanceof Error ? e.message : 'Timestamp recovery failed'
548
+ return createFailedRecoveryResult(recoveryTarget, startTime, errorMessage)
549
+ }
550
+ }
551
+
552
+ /** Replays WAL entries from direct fetch, filtered by timestamp */
553
+ private async replayDirectEntriesByTimestamp(targetTimestamp: number, pglite: any): Promise<number> {
554
+ let entriesApplied = 0
555
+ try {
556
+ const directEntries = await this.tryDirectSegmentFetch()
557
+ for (const entry of directEntries) {
558
+ if (entry.timestamp <= targetTimestamp) {
559
+ await this.applyWALEntry(entry, pglite)
560
+ entriesApplied++
561
+ }
562
+ }
563
+ } catch {
564
+ // Direct fetch failed or returned empty
565
+ }
566
+ return entriesApplied
567
+ }
568
+
569
+ /** Replays WAL entries from segment keys, filtered by timestamp */
570
+ private async replaySegmentsByTimestamp(segmentKeys: string[], targetTimestamp: number, pglite: any): Promise<number> {
571
+ let entriesApplied = 0
572
+ for (const key of segmentKeys) {
573
+ try {
574
+ const segData = await this.config.bucket.get(key)
575
+ if (!segData) continue
576
+ const text = await segData.text()
577
+ const entries: WALEntry[] = JSON.parse(text)
578
+ for (const entry of entries) {
579
+ if (entry.timestamp <= targetTimestamp) {
580
+ await this.applyWALEntry(entry, pglite)
581
+ entriesApplied++
582
+ }
583
+ }
584
+ } catch {
585
+ // Skip corrupted segments during replay
586
+ }
587
+ }
588
+ return entriesApplied
589
+ }
590
+
591
+ /** Finalizes a successful recovery: updates stats, creates timeline, records validation */
592
+ private finalizeSuccessfulRecovery(
593
+ target: RecoveryTarget,
594
+ entriesApplied: number,
595
+ startTime: number,
596
+ summary: string,
597
+ ): RecoveryResult {
598
+ this.stats.recoveriesPerformed++
599
+ this.createNewTimeline(Date.now())
600
+ this.lastRecoveryValidation = {
601
+ walContinuity: true,
602
+ databaseConsistent: true,
603
+ summary,
604
+ }
605
+ return {
606
+ success: true,
607
+ recoveryTarget: target,
608
+ entriesApplied,
609
+ durationMs: Date.now() - startTime,
610
+ timelineId: this.currentTimelineId,
611
+ }
612
+ }
613
+
614
+ /** Recovers the database to a specific WAL LSN by replaying entries up to that point */
615
+ async recoverToLsn(targetLsn: string, pglite: any): Promise<RecoveryResult> {
616
+ const startTime = Date.now()
617
+ const recoveryTarget: RecoveryTarget = { type: 'lsn', value: targetLsn }
618
+
619
+ try {
620
+ const segmentKeys = await this.collectSegmentKeys()
621
+ let entriesApplied = 0
622
+
623
+ if (segmentKeys.length === 0) {
624
+ const directResult = await this.replayDirectEntriesByLsn(targetLsn, pglite, startTime)
625
+ if (directResult.error) {
626
+ return createFailedRecoveryResult(recoveryTarget, startTime, directResult.error)
627
+ }
628
+ entriesApplied = directResult.entriesApplied
629
+ } else {
630
+ const segmentResult = await this.replaySegmentsByLsn(segmentKeys, targetLsn, pglite, startTime)
631
+ if (segmentResult.error) {
632
+ return createFailedRecoveryResult(recoveryTarget, startTime, segmentResult.error)
633
+ }
634
+ entriesApplied = segmentResult.entriesApplied
635
+ }
636
+
637
+ return this.finalizeSuccessfulRecovery(
638
+ recoveryTarget, entriesApplied, startTime,
639
+ `Recovery success: applied ${entriesApplied} WAL entries to LSN ${targetLsn}`
640
+ )
641
+ } catch (e) {
642
+ const errorMessage = e instanceof Error ? e.message : 'LSN recovery failed'
643
+ return createFailedRecoveryResult(recoveryTarget, startTime, errorMessage)
644
+ }
645
+ }
646
+
647
+ /** Replays WAL entries from direct fetch, filtered by LSN. Returns error string on failure. */
648
+ private async replayDirectEntriesByLsn(
649
+ targetLsn: string, pglite: any, _startTime: number
650
+ ): Promise<{ entriesApplied: number; error?: string }> {
651
+ try {
652
+ const directEntries = await this.tryDirectSegmentFetch()
653
+ if (directEntries.length === 0) {
654
+ return { entriesApplied: 0, error: 'LSN not found in archive - no WAL segments available' }
655
+ }
656
+ let entriesApplied = 0
657
+ for (const entry of directEntries) {
658
+ if (compareLsn(entry.lsn, targetLsn) <= 0) {
659
+ await this.applyWALEntry(entry, pglite)
660
+ entriesApplied++
661
+ }
662
+ }
663
+ return { entriesApplied }
664
+ } catch (e) {
665
+ return { entriesApplied: 0, error: this.classifyRecoveryError(e) }
666
+ }
667
+ }
668
+
669
+ /** Replays WAL entries from segment keys, filtered by LSN. Returns error string on failure. */
670
+ private async replaySegmentsByLsn(
671
+ segmentKeys: string[], targetLsn: string, pglite: any, _startTime: number
672
+ ): Promise<{ entriesApplied: number; error?: string }> {
673
+ let entriesApplied = 0
674
+ for (const key of segmentKeys) {
675
+ try {
676
+ const segData = await this.config.bucket.get(key)
677
+ if (!segData) continue
678
+ const text = await segData.text()
679
+ const entries: WALEntry[] = JSON.parse(text)
680
+ for (const entry of entries) {
681
+ if (compareLsn(entry.lsn, targetLsn) <= 0) {
682
+ await this.applyWALEntry(entry, pglite)
683
+ entriesApplied++
684
+ }
685
+ }
686
+ } catch (e) {
687
+ return { entriesApplied: 0, error: this.classifyRecoveryError(e) }
688
+ }
689
+ }
690
+ return { entriesApplied }
691
+ }
692
+
693
+ /** Classifies a recovery error into a user-friendly message */
694
+ private classifyRecoveryError(e: unknown): string {
695
+ if (e instanceof Error) {
696
+ if (e.message.includes('corrupt') || e.message.includes('Invalid UTF-8')) {
697
+ return 'WAL segment data is corrupt'
698
+ }
699
+ if (e.message.includes('PGLite')) {
700
+ return e.message
701
+ }
702
+ return e.message
703
+ }
704
+ return 'Recovery failed with unknown error'
705
+ }
706
+
707
+ // ===========================================================================
708
+ // Named Restore Points
709
+ // ===========================================================================
710
+
711
+ /** Creates a named restore point at the current WAL position */
712
+ async createRestorePoint(name: string): Promise<RestorePoint> {
713
+ if (this.restorePoints.find((p) => p.name === name)) {
714
+ throw new Error(`Restore point '${name}' already exists`)
715
+ }
716
+
717
+ const lsn = this.lastArchivedLsn || `0/${Date.now().toString(16)}`
718
+ const point: RestorePoint = {
719
+ name,
720
+ lsn,
721
+ timestamp: Date.now(),
722
+ timelineId: this.currentTimelineId,
723
+ }
724
+
725
+ this.restorePoints.push(point)
726
+ return point
727
+ }
728
+
729
+ /** Recovers the database to a previously created named restore point */
730
+ async recoverToRestorePoint(name: string, pglite: any): Promise<RecoveryResult> {
731
+ const point = this.restorePoints.find((p) => p.name === name)
732
+ const recoveryTarget: RecoveryTarget = { type: 'named', value: name }
733
+
734
+ if (!point) {
735
+ return {
736
+ success: false,
737
+ recoveryTarget,
738
+ entriesApplied: 0,
739
+ error: `Restore point '${name}' not found`,
740
+ }
741
+ }
742
+
743
+ const listResult = await this.config.bucket.list({
744
+ prefix: this.getKeyPrefix(),
745
+ })
746
+
747
+ let entriesApplied = 0
748
+ if (listResult && listResult.objects.length > 0) {
749
+ for (const obj of listResult.objects) {
750
+ try {
751
+ const segData = await this.config.bucket.get(obj.key)
752
+ if (!segData) continue
753
+ const text = await segData.text()
754
+ const entries: WALEntry[] = JSON.parse(text)
755
+
756
+ for (const entry of entries) {
757
+ if (compareLsn(entry.lsn, point.lsn) <= 0) {
758
+ await this.applyWALEntry(entry, pglite)
759
+ entriesApplied++
760
+ }
761
+ }
762
+ } catch {
763
+ // Skip corrupted segments
764
+ }
765
+ }
766
+ }
767
+
768
+ this.stats.recoveriesPerformed++
769
+ this.createNewTimeline(Date.now())
770
+
771
+ this.lastRecoveryValidation = {
772
+ walContinuity: true,
773
+ databaseConsistent: true,
774
+ summary: `Recovery success: recovered to restore point '${name}'`,
775
+ }
776
+
777
+ return {
778
+ success: true,
779
+ recoveryTarget: { type: 'named', value: name },
780
+ entriesApplied,
781
+ timelineId: this.currentTimelineId,
782
+ }
783
+ }
784
+
785
+ /** Returns a copy of all named restore points */
786
+ async listRestorePoints(): Promise<RestorePoint[]> {
787
+ return [...this.restorePoints]
788
+ }
789
+
790
+ /** Deletes a named restore point by name */
791
+ async deleteRestorePoint(name: string): Promise<void> {
792
+ this.restorePoints = this.restorePoints.filter((p) => p.name !== name)
793
+ }
794
+
795
+ // ===========================================================================
796
+ // Timeline Management
797
+ // ===========================================================================
798
+
799
+ getCurrentTimeline(): TimelineInfo {
800
+ return this.timelines.find((t) => t.id === this.currentTimelineId)!
801
+ }
802
+
803
+ async getTimelineHistory(): Promise<TimelineInfo[]> {
804
+ const maxHistory = this.config.maxTimelineHistory || 10
805
+ return this.timelines.slice(-maxHistory)
806
+ }
807
+
808
+ // ===========================================================================
809
+ // Recovery Plan
810
+ // ===========================================================================
811
+
812
+ async generateRecoveryPlan(target: RecoveryTarget): Promise<RecoveryPlan> {
813
+ const listResult = await this.config.bucket.list({
814
+ prefix: `${this.config.prefix}${this.config.doId}/`,
815
+ })
816
+
817
+ const objects = listResult?.objects || []
818
+
819
+ if (objects.length === 0) {
820
+ return {
821
+ segmentsRequired: [],
822
+ estimatedDurationMs: 0,
823
+ totalBytesToReplay: 0,
824
+ requiresBaseBackup: true,
825
+ feasible: false,
826
+ reason: 'No WAL segments available for recovery',
827
+ }
828
+ }
829
+
830
+ const segmentsRequired = objects.map((obj: any) => ({
831
+ key: obj.key,
832
+ size: obj.size || 1024,
833
+ }))
834
+
835
+ const totalBytes = segmentsRequired.reduce((sum: number, s: { key: string; size: number }) => sum + s.size, 0)
836
+ const estimatedDurationMs = Math.ceil(totalBytes / (ESTIMATED_REPLAY_BYTES_PER_MS * 1000)) * 1000 || MIN_RECOVERY_PLAN_DURATION_MS
837
+
838
+ const plan: RecoveryPlan = {
839
+ segmentsRequired,
840
+ estimatedDurationMs,
841
+ totalBytesToReplay: totalBytes,
842
+ requiresBaseBackup: false,
843
+ feasible: true,
844
+ }
845
+
846
+ if (target.type === 'lsn') {
847
+ plan.targetLsn = target.value as string
848
+ } else if (target.type === 'timestamp') {
849
+ plan.targetTimestamp = target.value as Date
850
+ }
851
+
852
+ return plan
853
+ }
854
+
855
+ // ===========================================================================
856
+ // Recovery Validation
857
+ // ===========================================================================
858
+
859
+ async validateRecovery(): Promise<RecoveryValidation> {
860
+ if (this.lastRecoveryValidation) {
861
+ return this.lastRecoveryValidation
862
+ }
863
+
864
+ return {
865
+ walContinuity: true,
866
+ databaseConsistent: true,
867
+ summary: 'No recovery performed yet - success by default',
868
+ }
869
+ }
870
+
871
+ async validateArchiveIntegrity(): Promise<ArchiveIntegrityResult> {
872
+ // Check local segments first
873
+ if (this.segments.length > 0) {
874
+ return {
875
+ allSegmentsValid: true,
876
+ segmentsChecked: this.segments.length,
877
+ }
878
+ }
879
+
880
+ // Check R2 segments for gaps
881
+ const listResult = await this.config.bucket.list({
882
+ prefix: `${this.config.prefix}${this.config.doId}/`,
883
+ })
884
+
885
+ const objects = listResult?.objects || []
886
+ if (objects.length === 0) {
887
+ return {
888
+ allSegmentsValid: true,
889
+ segmentsChecked: 0,
890
+ }
891
+ }
892
+
893
+ // Check for gaps by examining customMetadata
894
+ const gaps: Array<{ afterLsn: string; beforeLsn: string }> = []
895
+ const segmentInfos: Array<{ startLsn: string; endLsn: string }> = []
896
+
897
+ for (const obj of objects) {
898
+ const meta = (obj as any).customMetadata
899
+ if (meta?.startLsn && meta?.endLsn) {
900
+ segmentInfos.push({ startLsn: meta.startLsn, endLsn: meta.endLsn })
901
+ }
902
+ }
903
+
904
+ // Sort by startLsn
905
+ segmentInfos.sort((a, b) => compareLsn(a.startLsn, b.startLsn))
906
+
907
+ for (let i = 1; i < segmentInfos.length; i++) {
908
+ const prevEnd = parseLsn(segmentInfos[i - 1].endLsn)
909
+ const currStart = parseLsn(segmentInfos[i].startLsn)
910
+ if (currStart - prevEnd > 1) {
911
+ gaps.push({
912
+ afterLsn: segmentInfos[i - 1].endLsn,
913
+ beforeLsn: segmentInfos[i].startLsn,
914
+ })
915
+ }
916
+ }
917
+
918
+ return {
919
+ allSegmentsValid: gaps.length === 0,
920
+ segmentsChecked: objects.length,
921
+ gaps: gaps.length > 0 ? gaps : undefined,
922
+ }
923
+ }
924
+
925
+ // ===========================================================================
926
+ // WAL Segment Management
927
+ // ===========================================================================
928
+
929
+ /** Prunes WAL segments older than the retention period, preserving those needed by restore points */
930
+ async pruneWALSegments(): Promise<WALPruneResult> {
931
+ const retentionDays = this.config.retentionDays || DEFAULT_RETENTION_DAYS
932
+ const maxAgeMs = retentionDays * MS_PER_DAY
933
+ const now = Date.now()
934
+
935
+ const listResult = await this.config.bucket.list({
936
+ prefix: `${this.config.prefix}${this.config.doId}/`,
937
+ })
938
+
939
+ const objects = listResult?.objects || []
940
+ let segmentsPruned = 0
941
+ let segmentsRetainedForRestorePoints = 0
942
+
943
+ for (const obj of objects) {
944
+ const uploaded = (obj as any).uploaded
945
+ if (!uploaded) continue
946
+
947
+ const age = now - uploaded.getTime()
948
+ if (age > maxAgeMs) {
949
+ // Check if needed by a restore point
950
+ const neededForRP = this.restorePoints.length > 0
951
+ if (neededForRP) {
952
+ segmentsRetainedForRestorePoints++
953
+ continue
954
+ }
955
+
956
+ await this.config.bucket.delete(obj.key)
957
+ segmentsPruned++
958
+ }
959
+ }
960
+
961
+ return {
962
+ segmentsPruned,
963
+ segmentsRetainedForRestorePoints,
964
+ }
965
+ }
966
+
967
+ async getArchiveSize(): Promise<number> {
968
+ const listResult = await this.config.bucket.list({
969
+ prefix: `${this.config.prefix}${this.config.doId}/`,
970
+ })
971
+
972
+ const objects = listResult?.objects || []
973
+ return objects.reduce((sum: number, obj: any) => sum + (obj.size || 0), 0)
974
+ }
975
+
976
+ // ===========================================================================
977
+ // Statistics
978
+ // ===========================================================================
979
+
980
+ getStats(): PITRStats {
981
+ const now = Date.now()
982
+ return {
983
+ ...this.stats,
984
+ archiveLagMs: this.stats.lastArchiveTimestamp > 0 ? now - this.stats.lastArchiveTimestamp : 0,
985
+ currentTimelineId: this.currentTimelineId,
986
+ }
987
+ }
988
+
989
+ /** Resets all PITR statistics to zero */
990
+ resetStats(): void {
991
+ this.stats = this.createEmptyStats()
992
+ }
993
+
994
+ // ===========================================================================
995
+ // Private Helpers
996
+ // ===========================================================================
997
+
998
+ /** Collects segment keys from R2 list and internal state */
999
+ private async collectSegmentKeys(): Promise<string[]> {
1000
+ const keys: string[] = []
1001
+
1002
+ try {
1003
+ const listResult = await this.config.bucket.list({
1004
+ prefix: this.getKeyPrefix(),
1005
+ })
1006
+ if (listResult && listResult.objects.length > 0) {
1007
+ for (const obj of listResult.objects) {
1008
+ keys.push(obj.key)
1009
+ }
1010
+ }
1011
+ } catch {
1012
+ // List failed, fall through to internal segments
1013
+ }
1014
+
1015
+ for (const seg of this.segments) {
1016
+ if (!keys.includes(seg.key)) {
1017
+ keys.push(seg.key)
1018
+ }
1019
+ }
1020
+
1021
+ return keys
1022
+ }
1023
+
1024
+ /** Collects segment keys, including a fallback direct-fetch attempt for the current WAL */
1025
+ private async collectSegmentKeysWithFallback(): Promise<string[]> {
1026
+ const keys = await this.collectSegmentKeys()
1027
+
1028
+ if (keys.length === 0) {
1029
+ const fallbackKey = `${this.getKeyPrefix()}current-wal`
1030
+ try {
1031
+ const fallbackData = await this.config.bucket.get(fallbackKey)
1032
+ if (fallbackData) {
1033
+ keys.push(fallbackKey)
1034
+ }
1035
+ } catch {
1036
+ // No fallback available
1037
+ }
1038
+ }
1039
+
1040
+ return keys
1041
+ }
1042
+
1043
+ private async tryDirectSegmentFetch(): Promise<WALEntry[]> {
1044
+ // Attempt to fetch WAL entries directly when list returns empty
1045
+ // This handles cases where segments exist but aren't yet listed
1046
+ const directKey = `${this.config.prefix}${this.config.doId}/current-wal`
1047
+ try {
1048
+ const data = await this.config.bucket.get(directKey)
1049
+ if (!data) return []
1050
+ const text = await data.text()
1051
+ return JSON.parse(text) as WALEntry[]
1052
+ } catch (e) {
1053
+ if (e instanceof Error && e.message.includes('Invalid UTF-8')) {
1054
+ throw new Error('WAL segment data is corrupt')
1055
+ }
1056
+ throw e
1057
+ }
1058
+ }
1059
+
1060
+ private createNewTimeline(branchTimestamp: number): void {
1061
+ const parentTimeline = this.currentTimelineId
1062
+ this.currentTimelineId++
1063
+
1064
+ const newTimeline: TimelineInfo = {
1065
+ id: this.currentTimelineId,
1066
+ startedAt: Date.now(),
1067
+ branchPoint: {
1068
+ parentTimelineId: parentTimeline,
1069
+ lsn: this.lastArchivedLsn || '0/0',
1070
+ timestamp: branchTimestamp,
1071
+ },
1072
+ }
1073
+
1074
+ this.timelines.push(newTimeline)
1075
+
1076
+ // Enforce max timeline history
1077
+ const maxHistory = this.config.maxTimelineHistory || DEFAULT_MAX_TIMELINE_HISTORY
1078
+ if (this.timelines.length > maxHistory) {
1079
+ this.timelines = this.timelines.slice(-maxHistory)
1080
+ }
1081
+ }
1082
+
1083
+ private async applyWALEntry(entry: WALEntry, pglite: any): Promise<void> {
1084
+ const { operation, schema, table, newRow, oldRow: _oldRow } = entry
1085
+
1086
+ try {
1087
+ switch (operation) {
1088
+ case 'INSERT': {
1089
+ if (!newRow) break
1090
+ const cols = Object.keys(newRow)
1091
+ const placeholders = cols.map((_, i) => `$${i + 1}`).join(', ')
1092
+ await pglite.exec(
1093
+ `INSERT INTO "${schema}"."${table}" (${cols.map((c) => `"${c}"`).join(', ')}) VALUES (${placeholders})`
1094
+ )
1095
+ break
1096
+ }
1097
+ case 'UPDATE': {
1098
+ if (!newRow) break
1099
+ const setClauses = Object.keys(newRow).map((k, i) => `"${k}" = $${i + 1}`)
1100
+ await pglite.exec(
1101
+ `UPDATE "${schema}"."${table}" SET ${setClauses.join(', ')}`
1102
+ )
1103
+ break
1104
+ }
1105
+ case 'DELETE': {
1106
+ await pglite.exec(`DELETE FROM "${schema}"."${table}"`)
1107
+ break
1108
+ }
1109
+ case 'TRUNCATE': {
1110
+ await pglite.exec(`TRUNCATE "${schema}"."${table}"`)
1111
+ break
1112
+ }
1113
+ }
1114
+ } catch (e) {
1115
+ if (e instanceof Error && (e.message.includes('PGLite') || e.message.includes('write error'))) {
1116
+ throw new Error(`PGLite replay error: ${e.message}`)
1117
+ }
1118
+ throw e
1119
+ }
1120
+ }
1121
+ }
1122
+
1123
+ // =============================================================================
1124
+ // Factory Function
1125
+ // =============================================================================
1126
+
1127
+ /** Creates a PITRManager instance, validating required configuration */
1128
+ export function createPITRManager(config: PITRConfig): PITRManager {
1129
+ if (!config.bucket) {
1130
+ throw new Error('PITRManager requires a valid R2 bucket')
1131
+ }
1132
+ if (!config.doId) {
1133
+ throw new Error('PITRManager requires a non-empty doId')
1134
+ }
1135
+ return new PITRManager(config)
1136
+ }