fraiseql-confiture 0.3.7__cp311-cp311-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cpython-311-darwin.so +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1893 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +184 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +882 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +24 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +265 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/models/sql_file_migration.py +225 -0
  99. confiture/scenarios/__init__.py +36 -0
  100. confiture/scenarios/compliance.py +586 -0
  101. confiture/scenarios/ecommerce.py +199 -0
  102. confiture/scenarios/financial.py +253 -0
  103. confiture/scenarios/healthcare.py +315 -0
  104. confiture/scenarios/multi_tenant.py +340 -0
  105. confiture/scenarios/saas.py +295 -0
  106. confiture/testing/FRAMEWORK_API.md +722 -0
  107. confiture/testing/__init__.py +100 -0
  108. confiture/testing/fixtures/__init__.py +11 -0
  109. confiture/testing/fixtures/data_validator.py +229 -0
  110. confiture/testing/fixtures/migration_runner.py +167 -0
  111. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  112. confiture/testing/frameworks/__init__.py +10 -0
  113. confiture/testing/frameworks/mutation.py +587 -0
  114. confiture/testing/frameworks/performance.py +479 -0
  115. confiture/testing/loader.py +225 -0
  116. confiture/testing/pytest/__init__.py +38 -0
  117. confiture/testing/pytest_plugin.py +190 -0
  118. confiture/testing/sandbox.py +304 -0
  119. confiture/testing/utils/__init__.py +0 -0
  120. fraiseql_confiture-0.3.7.dist-info/METADATA +438 -0
  121. fraiseql_confiture-0.3.7.dist-info/RECORD +124 -0
  122. fraiseql_confiture-0.3.7.dist-info/WHEEL +4 -0
  123. fraiseql_confiture-0.3.7.dist-info/entry_points.txt +4 -0
  124. fraiseql_confiture-0.3.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,540 @@
1
+ """Production data synchronization.
2
+
3
+ This module provides functionality to sync data from production databases to
4
+ local/staging environments with PII anonymization support.
5
+ """
6
+
7
+ import hashlib
8
+ import json
9
+ import random
10
+ import time
11
+ from dataclasses import dataclass
12
+ from datetime import datetime
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ import psycopg
17
+ from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
18
+
19
+ from confiture.config.environment import DatabaseConfig
20
+ from confiture.core.connection import create_connection
21
+
22
+
23
+ @dataclass
24
+ class TableSelection:
25
+ """Configuration for selecting which tables to sync."""
26
+
27
+ include: list[str] | None = None # Explicit table list or patterns
28
+ exclude: list[str] | None = None # Tables/patterns to exclude
29
+
30
+
31
+ @dataclass
32
+ class AnonymizationRule:
33
+ """Rule for anonymizing a specific column."""
34
+
35
+ column: str
36
+ strategy: str # 'email', 'phone', 'name', 'redact', 'hash'
37
+ seed: int | None = None # For reproducible anonymization
38
+
39
+
40
+ @dataclass
41
+ class SyncConfig:
42
+ """Configuration for data sync operation."""
43
+
44
+ tables: TableSelection
45
+ anonymization: dict[str, list[AnonymizationRule]] | None = None # table -> rules
46
+ batch_size: int = 5000 # Optimized based on benchmarks
47
+ resume: bool = False
48
+ show_progress: bool = False
49
+ checkpoint_file: Path | None = None
50
+
51
+
52
+ @dataclass
53
+ class TableMetrics:
54
+ """Performance metrics for a single table sync."""
55
+
56
+ rows_synced: int
57
+ elapsed_seconds: float
58
+ rows_per_second: float
59
+ synced_at: str
60
+
61
+
62
+ class ProductionSyncer:
63
+ """Synchronize data from production to target database.
64
+
65
+ Features:
66
+ - Table selection with include/exclude patterns
67
+ - Schema-aware data copying
68
+ - PII anonymization
69
+ - Progress reporting
70
+ - Resume support for interrupted syncs
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ source: DatabaseConfig | str,
76
+ target: DatabaseConfig | str,
77
+ ):
78
+ """Initialize syncer with source and target databases.
79
+
80
+ Args:
81
+ source: Source database config or environment name
82
+ target: Target database config or environment name
83
+ """
84
+ from confiture.config.environment import Environment
85
+
86
+ # Load configs if strings provided
87
+ if isinstance(source, str):
88
+ source = Environment.load(source).database
89
+
90
+ if isinstance(target, str):
91
+ target = Environment.load(target).database
92
+
93
+ self.source_config = source
94
+ self.target_config = target
95
+
96
+ self._source_conn: psycopg.Connection[Any] | None = None
97
+ self._target_conn: psycopg.Connection[Any] | None = None
98
+
99
+ # Progress tracking and metrics
100
+ self._metrics: dict[str, TableMetrics] = {}
101
+ self._completed_tables: set[str] = set()
102
+ self._checkpoint_data: dict[str, Any] = {}
103
+
104
+ def __enter__(self) -> "ProductionSyncer":
105
+ """Context manager entry."""
106
+ self._source_conn = create_connection(self.source_config)
107
+ self._target_conn = create_connection(self.target_config)
108
+ return self
109
+
110
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
111
+ """Context manager exit."""
112
+ if self._source_conn:
113
+ self._source_conn.close()
114
+ if self._target_conn:
115
+ self._target_conn.close()
116
+
117
+ def get_all_tables(self) -> list[str]:
118
+ """Get list of all user tables in source database.
119
+
120
+ Returns:
121
+ List of table names in public schema
122
+ """
123
+ if not self._source_conn:
124
+ raise RuntimeError("Not connected. Use context manager.")
125
+
126
+ with self._source_conn.cursor() as cursor:
127
+ cursor.execute("""
128
+ SELECT tablename
129
+ FROM pg_tables
130
+ WHERE schemaname = 'public'
131
+ ORDER BY tablename
132
+ """)
133
+ return [row[0] for row in cursor.fetchall()]
134
+
135
+ def select_tables(self, selection: TableSelection) -> list[str]:
136
+ """Select tables based on include/exclude patterns.
137
+
138
+ Args:
139
+ selection: Table selection configuration
140
+
141
+ Returns:
142
+ List of table names to sync
143
+ """
144
+ all_tables = self.get_all_tables()
145
+
146
+ # If explicit include list, start with those
147
+ if selection.include:
148
+ tables = [t for t in all_tables if t in selection.include]
149
+ else:
150
+ tables = all_tables
151
+
152
+ # Apply exclusions
153
+ if selection.exclude:
154
+ tables = [t for t in tables if t not in selection.exclude]
155
+
156
+ return tables
157
+
158
+ def _anonymize_value(self, value: Any, strategy: str, seed: int | None = None) -> Any:
159
+ """Anonymize a single value based on strategy.
160
+
161
+ Args:
162
+ value: Original value to anonymize
163
+ strategy: Anonymization strategy ('email', 'phone', 'name', 'redact', 'hash')
164
+ seed: Optional seed for deterministic anonymization
165
+
166
+ Returns:
167
+ Anonymized value
168
+ """
169
+ if value is None:
170
+ return None
171
+
172
+ # Set random seed for deterministic anonymization
173
+ if seed is not None:
174
+ random.seed(f"{seed}:{value}")
175
+
176
+ if strategy == "email":
177
+ # Generate deterministic fake email
178
+ hash_value = hashlib.sha256(str(value).encode()).hexdigest()[:8]
179
+ return f"user_{hash_value}@example.com"
180
+
181
+ elif strategy == "phone":
182
+ # Generate fake phone number
183
+ if seed is not None:
184
+ # Deterministic based on seed
185
+ hash_int = int(hashlib.sha256(str(value).encode()).hexdigest()[:8], 16)
186
+ number = hash_int % 10000
187
+ else:
188
+ number = random.randint(1000, 9999)
189
+ return f"+1-555-{number}"
190
+
191
+ elif strategy == "name":
192
+ # Generate fake name
193
+ hash_str = hashlib.sha256(str(value).encode()).hexdigest()[:8]
194
+ return f"User {hash_str[:4].upper()}"
195
+
196
+ elif strategy == "redact":
197
+ # Simply redact the value
198
+ return "[REDACTED]"
199
+
200
+ elif strategy == "hash":
201
+ # One-way hash (preserves uniqueness)
202
+ return hashlib.sha256(str(value).encode()).hexdigest()[:16]
203
+
204
+ else:
205
+ # Unknown strategy, redact by default
206
+ return "[REDACTED]"
207
+
208
+ def sync_table(
209
+ self,
210
+ table_name: str,
211
+ anonymization_rules: list[AnonymizationRule] | None = None,
212
+ batch_size: int = 5000, # Optimized based on benchmarks
213
+ progress_task: Any = None,
214
+ progress: Progress | None = None,
215
+ ) -> int:
216
+ """Sync a single table from source to target.
217
+
218
+ Args:
219
+ table_name: Name of table to sync
220
+ anonymization_rules: Optional anonymization rules for PII
221
+ batch_size: Number of rows per batch (default 5000, optimized via benchmarks)
222
+ progress_task: Rich progress task ID for updating progress
223
+ progress: Progress instance
224
+
225
+ Returns:
226
+ Number of rows synced
227
+ """
228
+ if not self._source_conn or not self._target_conn:
229
+ raise RuntimeError("Not connected. Use context manager.")
230
+
231
+ start_time = time.time()
232
+
233
+ with self._source_conn.cursor() as src_cursor, self._target_conn.cursor() as dst_cursor:
234
+ # Truncate target table first
235
+ dst_cursor.execute(f"TRUNCATE TABLE {table_name} CASCADE")
236
+
237
+ # Get row count for verification
238
+ src_cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
239
+ expected_row = src_cursor.fetchone()
240
+ expected_count: int = expected_row[0] if expected_row else 0
241
+
242
+ # Update progress with total
243
+ if progress and progress_task is not None:
244
+ progress.update(progress_task, total=expected_count)
245
+
246
+ # Temporarily disable triggers to allow FK constraint violations
247
+ dst_cursor.execute(f"ALTER TABLE {table_name} DISABLE TRIGGER ALL")
248
+
249
+ try:
250
+ if anonymization_rules:
251
+ # Anonymization path: fetch, anonymize, insert
252
+ actual_count = self._sync_with_anonymization(
253
+ src_cursor,
254
+ dst_cursor,
255
+ table_name,
256
+ anonymization_rules,
257
+ batch_size,
258
+ progress_task,
259
+ progress,
260
+ )
261
+ else:
262
+ # Fast path: direct COPY
263
+ actual_count = self._sync_with_copy(
264
+ src_cursor,
265
+ dst_cursor,
266
+ table_name,
267
+ progress_task,
268
+ progress,
269
+ )
270
+ finally:
271
+ # Re-enable triggers
272
+ dst_cursor.execute(f"ALTER TABLE {table_name} ENABLE TRIGGER ALL")
273
+
274
+ # Commit target transaction
275
+ self._target_conn.commit()
276
+
277
+ # Verify row count
278
+ if actual_count != expected_count:
279
+ raise RuntimeError(
280
+ f"Row count mismatch for {table_name}: "
281
+ f"expected {expected_count}, got {actual_count}"
282
+ )
283
+
284
+ # Track metrics
285
+ elapsed = time.time() - start_time
286
+ rows_per_second = actual_count / elapsed if elapsed > 0 else 0
287
+ self._metrics[table_name] = TableMetrics(
288
+ rows_synced=actual_count,
289
+ elapsed_seconds=elapsed,
290
+ rows_per_second=rows_per_second,
291
+ synced_at=datetime.now().isoformat(),
292
+ )
293
+ self._completed_tables.add(table_name)
294
+
295
+ return actual_count
296
+
297
+ def _sync_with_copy(
298
+ self,
299
+ src_cursor: Any,
300
+ dst_cursor: Any,
301
+ table_name: str,
302
+ progress_task: Any = None,
303
+ progress: Progress | None = None,
304
+ ) -> int:
305
+ """Fast sync using COPY (no anonymization).
306
+
307
+ Args:
308
+ src_cursor: Source database cursor
309
+ dst_cursor: Target database cursor
310
+ table_name: Name of table to sync
311
+ progress_task: Progress task ID
312
+ progress: Progress instance
313
+
314
+ Returns:
315
+ Number of rows synced
316
+ """
317
+ with (
318
+ src_cursor.copy(f"COPY {table_name} TO STDOUT") as copy_out,
319
+ dst_cursor.copy(f"COPY {table_name} FROM STDIN") as copy_in,
320
+ ):
321
+ for data in copy_out:
322
+ copy_in.write(data)
323
+ if progress and progress_task is not None:
324
+ progress.update(progress_task, advance=1)
325
+
326
+ # Get final count
327
+ dst_cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
328
+ result = dst_cursor.fetchone()
329
+ return int(result[0]) if result else 0
330
+
331
+ def _sync_with_anonymization(
332
+ self,
333
+ src_cursor: Any,
334
+ dst_cursor: Any,
335
+ table_name: str,
336
+ anonymization_rules: list[AnonymizationRule],
337
+ batch_size: int,
338
+ progress_task: Any = None,
339
+ progress: Progress | None = None,
340
+ ) -> int:
341
+ """Sync with anonymization (slower, row-by-row).
342
+
343
+ Args:
344
+ src_cursor: Source database cursor
345
+ dst_cursor: Target database cursor
346
+ table_name: Name of table to sync
347
+ anonymization_rules: List of anonymization rules
348
+ batch_size: Batch size for inserts
349
+ progress_task: Progress task ID
350
+ progress: Progress instance
351
+
352
+ Returns:
353
+ Number of rows synced
354
+ """
355
+ # Get column names
356
+ src_cursor.execute(f"SELECT * FROM {table_name} LIMIT 0")
357
+ column_names = [desc[0] for desc in src_cursor.description]
358
+
359
+ # Build column index map for anonymization
360
+ anonymize_map: dict[int, AnonymizationRule] = {}
361
+ for rule in anonymization_rules:
362
+ if rule.column in column_names:
363
+ col_idx = column_names.index(rule.column)
364
+ anonymize_map[col_idx] = rule
365
+
366
+ # Fetch all rows
367
+ src_cursor.execute(f"SELECT * FROM {table_name}")
368
+
369
+ # Process in batches
370
+ rows_synced = 0
371
+ batch = []
372
+
373
+ for row in src_cursor:
374
+ # Anonymize specified columns
375
+ anonymized_row = list(row)
376
+ for col_idx, rule in anonymize_map.items():
377
+ anonymized_row[col_idx] = self._anonymize_value(
378
+ row[col_idx], rule.strategy, rule.seed
379
+ )
380
+
381
+ batch.append(tuple(anonymized_row))
382
+
383
+ # Insert batch when full
384
+ if len(batch) >= batch_size:
385
+ self._insert_batch(dst_cursor, table_name, column_names, batch)
386
+ rows_synced += len(batch)
387
+ if progress and progress_task is not None:
388
+ progress.update(progress_task, advance=len(batch))
389
+ batch = []
390
+
391
+ # Insert remaining rows
392
+ if batch:
393
+ self._insert_batch(dst_cursor, table_name, column_names, batch)
394
+ rows_synced += len(batch)
395
+ if progress and progress_task is not None:
396
+ progress.update(progress_task, advance=len(batch))
397
+
398
+ return rows_synced
399
+
400
+ def _insert_batch(
401
+ self,
402
+ cursor: Any,
403
+ table_name: str,
404
+ column_names: list[str],
405
+ rows: list[tuple[Any, ...]],
406
+ ) -> None:
407
+ """Insert a batch of rows into target table.
408
+
409
+ Args:
410
+ cursor: Database cursor
411
+ table_name: Name of table
412
+ column_names: List of column names
413
+ rows: List of row tuples to insert
414
+ """
415
+ if not rows:
416
+ return
417
+
418
+ columns_str = ", ".join(column_names)
419
+ placeholders = ", ".join(["%s"] * len(column_names))
420
+ query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders})"
421
+
422
+ cursor.executemany(query, rows)
423
+
424
+ def sync(self, config: SyncConfig) -> dict[str, int]:
425
+ """Sync multiple tables based on configuration.
426
+
427
+ Args:
428
+ config: Sync configuration
429
+
430
+ Returns:
431
+ Dictionary mapping table names to row counts synced
432
+ """
433
+ # Load checkpoint if requested
434
+ if config.resume and config.checkpoint_file and config.checkpoint_file.exists():
435
+ self.load_checkpoint(config.checkpoint_file)
436
+
437
+ tables = self.select_tables(config.tables)
438
+ results = {}
439
+
440
+ # Filter out completed tables if resuming
441
+ if config.resume:
442
+ tables = [t for t in tables if t not in self._completed_tables]
443
+
444
+ if config.show_progress:
445
+ # Use rich progress bar
446
+ with Progress(
447
+ TextColumn("[bold blue]{task.description}"),
448
+ BarColumn(),
449
+ TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
450
+ TextColumn("•"),
451
+ TextColumn("{task.completed}/{task.total} rows"),
452
+ TimeRemainingColumn(),
453
+ ) as progress:
454
+ for table in tables:
455
+ task = progress.add_task(f"Syncing {table}", total=0)
456
+
457
+ anonymization_rules = None
458
+ if config.anonymization and table in config.anonymization:
459
+ anonymization_rules = config.anonymization[table]
460
+
461
+ rows_synced = self.sync_table(
462
+ table,
463
+ anonymization_rules=anonymization_rules,
464
+ batch_size=config.batch_size,
465
+ progress_task=task,
466
+ progress=progress,
467
+ )
468
+ results[table] = rows_synced
469
+ else:
470
+ # No progress bar
471
+ for table in tables:
472
+ anonymization_rules = None
473
+ if config.anonymization and table in config.anonymization:
474
+ anonymization_rules = config.anonymization[table]
475
+
476
+ rows_synced = self.sync_table(
477
+ table,
478
+ anonymization_rules=anonymization_rules,
479
+ batch_size=config.batch_size,
480
+ )
481
+ results[table] = rows_synced
482
+
483
+ # Save checkpoint if requested
484
+ if config.checkpoint_file:
485
+ self.save_checkpoint(config.checkpoint_file)
486
+
487
+ return results
488
+
489
+ def get_metrics(self) -> dict[str, dict[str, Any]]:
490
+ """Get performance metrics for all synced tables.
491
+
492
+ Returns:
493
+ Dictionary mapping table names to metrics
494
+ """
495
+ return {
496
+ table: {
497
+ "rows_synced": metrics.rows_synced,
498
+ "elapsed_seconds": metrics.elapsed_seconds,
499
+ "rows_per_second": metrics.rows_per_second,
500
+ "synced_at": metrics.synced_at,
501
+ }
502
+ for table, metrics in self._metrics.items()
503
+ }
504
+
505
+ def save_checkpoint(self, checkpoint_file: Path) -> None:
506
+ """Save sync checkpoint to file.
507
+
508
+ Args:
509
+ checkpoint_file: Path to checkpoint file
510
+ """
511
+ checkpoint_data = {
512
+ "version": "1.0",
513
+ "timestamp": datetime.now().isoformat(),
514
+ "source_database": f"{self.source_config.host}:{self.source_config.port}/{self.source_config.database}",
515
+ "target_database": f"{self.target_config.host}:{self.target_config.port}/{self.target_config.database}",
516
+ "completed_tables": {
517
+ table: {
518
+ "rows_synced": metrics.rows_synced,
519
+ "synced_at": metrics.synced_at,
520
+ }
521
+ for table, metrics in self._metrics.items()
522
+ },
523
+ }
524
+
525
+ checkpoint_file.parent.mkdir(parents=True, exist_ok=True)
526
+ with open(checkpoint_file, "w") as f:
527
+ json.dump(checkpoint_data, f, indent=2)
528
+
529
+ def load_checkpoint(self, checkpoint_file: Path) -> None:
530
+ """Load sync checkpoint from file.
531
+
532
+ Args:
533
+ checkpoint_file: Path to checkpoint file
534
+ """
535
+ with open(checkpoint_file) as f:
536
+ self._checkpoint_data = json.load(f)
537
+
538
+ # Restore completed tables
539
+ if "completed_tables" in self._checkpoint_data:
540
+ self._completed_tables = set(self._checkpoint_data["completed_tables"].keys())