fraiseql-confiture 0.1.0__cp311-cp311-manylinux_2_34_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of fraiseql-confiture might be problematic. Click here for more details.
- confiture/__init__.py +45 -0
- confiture/_core.cpython-311-x86_64-linux-gnu.so +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/main.py +720 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +190 -0
- confiture/core/__init__.py +0 -0
- confiture/core/builder.py +336 -0
- confiture/core/connection.py +120 -0
- confiture/core/differ.py +522 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +369 -0
- confiture/core/schema_to_schema.py +592 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +141 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +0 -0
- confiture/models/migration.py +95 -0
- confiture/models/schema.py +203 -0
- fraiseql_confiture-0.1.0.dist-info/METADATA +350 -0
- fraiseql_confiture-0.1.0.dist-info/RECORD +24 -0
- fraiseql_confiture-0.1.0.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.1.0.dist-info/entry_points.txt +2 -0
- fraiseql_confiture-0.1.0.dist-info/licenses/LICENSE +21 -0
confiture/core/syncer.py
ADDED
|
@@ -0,0 +1,540 @@
|
|
|
1
|
+
"""Production data synchronization.
|
|
2
|
+
|
|
3
|
+
This module provides functionality to sync data from production databases to
|
|
4
|
+
local/staging environments with PII anonymization support.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import hashlib
|
|
8
|
+
import json
|
|
9
|
+
import random
|
|
10
|
+
import time
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Any
|
|
15
|
+
|
|
16
|
+
import psycopg
|
|
17
|
+
from rich.progress import BarColumn, Progress, TextColumn, TimeRemainingColumn
|
|
18
|
+
|
|
19
|
+
from confiture.config.environment import DatabaseConfig
|
|
20
|
+
from confiture.core.connection import create_connection
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class TableSelection:
|
|
25
|
+
"""Configuration for selecting which tables to sync."""
|
|
26
|
+
|
|
27
|
+
include: list[str] | None = None # Explicit table list or patterns
|
|
28
|
+
exclude: list[str] | None = None # Tables/patterns to exclude
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class AnonymizationRule:
|
|
33
|
+
"""Rule for anonymizing a specific column."""
|
|
34
|
+
|
|
35
|
+
column: str
|
|
36
|
+
strategy: str # 'email', 'phone', 'name', 'redact', 'hash'
|
|
37
|
+
seed: int | None = None # For reproducible anonymization
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class SyncConfig:
|
|
42
|
+
"""Configuration for data sync operation."""
|
|
43
|
+
|
|
44
|
+
tables: TableSelection
|
|
45
|
+
anonymization: dict[str, list[AnonymizationRule]] | None = None # table -> rules
|
|
46
|
+
batch_size: int = 5000 # Optimized based on benchmarks
|
|
47
|
+
resume: bool = False
|
|
48
|
+
show_progress: bool = False
|
|
49
|
+
checkpoint_file: Path | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class TableMetrics:
|
|
54
|
+
"""Performance metrics for a single table sync."""
|
|
55
|
+
|
|
56
|
+
rows_synced: int
|
|
57
|
+
elapsed_seconds: float
|
|
58
|
+
rows_per_second: float
|
|
59
|
+
synced_at: str
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ProductionSyncer:
|
|
63
|
+
"""Synchronize data from production to target database.
|
|
64
|
+
|
|
65
|
+
Features:
|
|
66
|
+
- Table selection with include/exclude patterns
|
|
67
|
+
- Schema-aware data copying
|
|
68
|
+
- PII anonymization
|
|
69
|
+
- Progress reporting
|
|
70
|
+
- Resume support for interrupted syncs
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(
|
|
74
|
+
self,
|
|
75
|
+
source: DatabaseConfig | str,
|
|
76
|
+
target: DatabaseConfig | str,
|
|
77
|
+
):
|
|
78
|
+
"""Initialize syncer with source and target databases.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
source: Source database config or environment name
|
|
82
|
+
target: Target database config or environment name
|
|
83
|
+
"""
|
|
84
|
+
from confiture.config.environment import Environment
|
|
85
|
+
|
|
86
|
+
# Load configs if strings provided
|
|
87
|
+
if isinstance(source, str):
|
|
88
|
+
source = Environment.load(source).database
|
|
89
|
+
|
|
90
|
+
if isinstance(target, str):
|
|
91
|
+
target = Environment.load(target).database
|
|
92
|
+
|
|
93
|
+
self.source_config = source
|
|
94
|
+
self.target_config = target
|
|
95
|
+
|
|
96
|
+
self._source_conn: psycopg.Connection[Any] | None = None
|
|
97
|
+
self._target_conn: psycopg.Connection[Any] | None = None
|
|
98
|
+
|
|
99
|
+
# Progress tracking and metrics
|
|
100
|
+
self._metrics: dict[str, TableMetrics] = {}
|
|
101
|
+
self._completed_tables: set[str] = set()
|
|
102
|
+
self._checkpoint_data: dict[str, Any] = {}
|
|
103
|
+
|
|
104
|
+
def __enter__(self) -> "ProductionSyncer":
|
|
105
|
+
"""Context manager entry."""
|
|
106
|
+
self._source_conn = create_connection(self.source_config)
|
|
107
|
+
self._target_conn = create_connection(self.target_config)
|
|
108
|
+
return self
|
|
109
|
+
|
|
110
|
+
def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
|
|
111
|
+
"""Context manager exit."""
|
|
112
|
+
if self._source_conn:
|
|
113
|
+
self._source_conn.close()
|
|
114
|
+
if self._target_conn:
|
|
115
|
+
self._target_conn.close()
|
|
116
|
+
|
|
117
|
+
def get_all_tables(self) -> list[str]:
|
|
118
|
+
"""Get list of all user tables in source database.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List of table names in public schema
|
|
122
|
+
"""
|
|
123
|
+
if not self._source_conn:
|
|
124
|
+
raise RuntimeError("Not connected. Use context manager.")
|
|
125
|
+
|
|
126
|
+
with self._source_conn.cursor() as cursor:
|
|
127
|
+
cursor.execute("""
|
|
128
|
+
SELECT tablename
|
|
129
|
+
FROM pg_tables
|
|
130
|
+
WHERE schemaname = 'public'
|
|
131
|
+
ORDER BY tablename
|
|
132
|
+
""")
|
|
133
|
+
return [row[0] for row in cursor.fetchall()]
|
|
134
|
+
|
|
135
|
+
def select_tables(self, selection: TableSelection) -> list[str]:
|
|
136
|
+
"""Select tables based on include/exclude patterns.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
selection: Table selection configuration
|
|
140
|
+
|
|
141
|
+
Returns:
|
|
142
|
+
List of table names to sync
|
|
143
|
+
"""
|
|
144
|
+
all_tables = self.get_all_tables()
|
|
145
|
+
|
|
146
|
+
# If explicit include list, start with those
|
|
147
|
+
if selection.include:
|
|
148
|
+
tables = [t for t in all_tables if t in selection.include]
|
|
149
|
+
else:
|
|
150
|
+
tables = all_tables
|
|
151
|
+
|
|
152
|
+
# Apply exclusions
|
|
153
|
+
if selection.exclude:
|
|
154
|
+
tables = [t for t in tables if t not in selection.exclude]
|
|
155
|
+
|
|
156
|
+
return tables
|
|
157
|
+
|
|
158
|
+
def _anonymize_value(self, value: Any, strategy: str, seed: int | None = None) -> Any:
|
|
159
|
+
"""Anonymize a single value based on strategy.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
value: Original value to anonymize
|
|
163
|
+
strategy: Anonymization strategy ('email', 'phone', 'name', 'redact', 'hash')
|
|
164
|
+
seed: Optional seed for deterministic anonymization
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
Anonymized value
|
|
168
|
+
"""
|
|
169
|
+
if value is None:
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
# Set random seed for deterministic anonymization
|
|
173
|
+
if seed is not None:
|
|
174
|
+
random.seed(f"{seed}:{value}")
|
|
175
|
+
|
|
176
|
+
if strategy == "email":
|
|
177
|
+
# Generate deterministic fake email
|
|
178
|
+
hash_value = hashlib.sha256(str(value).encode()).hexdigest()[:8]
|
|
179
|
+
return f"user_{hash_value}@example.com"
|
|
180
|
+
|
|
181
|
+
elif strategy == "phone":
|
|
182
|
+
# Generate fake phone number
|
|
183
|
+
if seed is not None:
|
|
184
|
+
# Deterministic based on seed
|
|
185
|
+
hash_int = int(hashlib.sha256(str(value).encode()).hexdigest()[:8], 16)
|
|
186
|
+
number = hash_int % 10000
|
|
187
|
+
else:
|
|
188
|
+
number = random.randint(1000, 9999)
|
|
189
|
+
return f"+1-555-{number}"
|
|
190
|
+
|
|
191
|
+
elif strategy == "name":
|
|
192
|
+
# Generate fake name
|
|
193
|
+
hash_str = hashlib.sha256(str(value).encode()).hexdigest()[:8]
|
|
194
|
+
return f"User {hash_str[:4].upper()}"
|
|
195
|
+
|
|
196
|
+
elif strategy == "redact":
|
|
197
|
+
# Simply redact the value
|
|
198
|
+
return "[REDACTED]"
|
|
199
|
+
|
|
200
|
+
elif strategy == "hash":
|
|
201
|
+
# One-way hash (preserves uniqueness)
|
|
202
|
+
return hashlib.sha256(str(value).encode()).hexdigest()[:16]
|
|
203
|
+
|
|
204
|
+
else:
|
|
205
|
+
# Unknown strategy, redact by default
|
|
206
|
+
return "[REDACTED]"
|
|
207
|
+
|
|
208
|
+
def sync_table(
|
|
209
|
+
self,
|
|
210
|
+
table_name: str,
|
|
211
|
+
anonymization_rules: list[AnonymizationRule] | None = None,
|
|
212
|
+
batch_size: int = 5000, # Optimized based on benchmarks
|
|
213
|
+
progress_task: Any = None,
|
|
214
|
+
progress: Progress | None = None,
|
|
215
|
+
) -> int:
|
|
216
|
+
"""Sync a single table from source to target.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
table_name: Name of table to sync
|
|
220
|
+
anonymization_rules: Optional anonymization rules for PII
|
|
221
|
+
batch_size: Number of rows per batch (default 5000, optimized via benchmarks)
|
|
222
|
+
progress_task: Rich progress task ID for updating progress
|
|
223
|
+
progress: Progress instance
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Number of rows synced
|
|
227
|
+
"""
|
|
228
|
+
if not self._source_conn or not self._target_conn:
|
|
229
|
+
raise RuntimeError("Not connected. Use context manager.")
|
|
230
|
+
|
|
231
|
+
start_time = time.time()
|
|
232
|
+
|
|
233
|
+
with self._source_conn.cursor() as src_cursor, self._target_conn.cursor() as dst_cursor:
|
|
234
|
+
# Truncate target table first
|
|
235
|
+
dst_cursor.execute(f"TRUNCATE TABLE {table_name} CASCADE")
|
|
236
|
+
|
|
237
|
+
# Get row count for verification
|
|
238
|
+
src_cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
239
|
+
expected_row = src_cursor.fetchone()
|
|
240
|
+
expected_count: int = expected_row[0] if expected_row else 0
|
|
241
|
+
|
|
242
|
+
# Update progress with total
|
|
243
|
+
if progress and progress_task is not None:
|
|
244
|
+
progress.update(progress_task, total=expected_count)
|
|
245
|
+
|
|
246
|
+
# Temporarily disable triggers to allow FK constraint violations
|
|
247
|
+
dst_cursor.execute(f"ALTER TABLE {table_name} DISABLE TRIGGER ALL")
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
if anonymization_rules:
|
|
251
|
+
# Anonymization path: fetch, anonymize, insert
|
|
252
|
+
actual_count = self._sync_with_anonymization(
|
|
253
|
+
src_cursor,
|
|
254
|
+
dst_cursor,
|
|
255
|
+
table_name,
|
|
256
|
+
anonymization_rules,
|
|
257
|
+
batch_size,
|
|
258
|
+
progress_task,
|
|
259
|
+
progress,
|
|
260
|
+
)
|
|
261
|
+
else:
|
|
262
|
+
# Fast path: direct COPY
|
|
263
|
+
actual_count = self._sync_with_copy(
|
|
264
|
+
src_cursor,
|
|
265
|
+
dst_cursor,
|
|
266
|
+
table_name,
|
|
267
|
+
progress_task,
|
|
268
|
+
progress,
|
|
269
|
+
)
|
|
270
|
+
finally:
|
|
271
|
+
# Re-enable triggers
|
|
272
|
+
dst_cursor.execute(f"ALTER TABLE {table_name} ENABLE TRIGGER ALL")
|
|
273
|
+
|
|
274
|
+
# Commit target transaction
|
|
275
|
+
self._target_conn.commit()
|
|
276
|
+
|
|
277
|
+
# Verify row count
|
|
278
|
+
if actual_count != expected_count:
|
|
279
|
+
raise RuntimeError(
|
|
280
|
+
f"Row count mismatch for {table_name}: "
|
|
281
|
+
f"expected {expected_count}, got {actual_count}"
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Track metrics
|
|
285
|
+
elapsed = time.time() - start_time
|
|
286
|
+
rows_per_second = actual_count / elapsed if elapsed > 0 else 0
|
|
287
|
+
self._metrics[table_name] = TableMetrics(
|
|
288
|
+
rows_synced=actual_count,
|
|
289
|
+
elapsed_seconds=elapsed,
|
|
290
|
+
rows_per_second=rows_per_second,
|
|
291
|
+
synced_at=datetime.now().isoformat(),
|
|
292
|
+
)
|
|
293
|
+
self._completed_tables.add(table_name)
|
|
294
|
+
|
|
295
|
+
return actual_count
|
|
296
|
+
|
|
297
|
+
def _sync_with_copy(
|
|
298
|
+
self,
|
|
299
|
+
src_cursor: Any,
|
|
300
|
+
dst_cursor: Any,
|
|
301
|
+
table_name: str,
|
|
302
|
+
progress_task: Any = None,
|
|
303
|
+
progress: Progress | None = None,
|
|
304
|
+
) -> int:
|
|
305
|
+
"""Fast sync using COPY (no anonymization).
|
|
306
|
+
|
|
307
|
+
Args:
|
|
308
|
+
src_cursor: Source database cursor
|
|
309
|
+
dst_cursor: Target database cursor
|
|
310
|
+
table_name: Name of table to sync
|
|
311
|
+
progress_task: Progress task ID
|
|
312
|
+
progress: Progress instance
|
|
313
|
+
|
|
314
|
+
Returns:
|
|
315
|
+
Number of rows synced
|
|
316
|
+
"""
|
|
317
|
+
with (
|
|
318
|
+
src_cursor.copy(f"COPY {table_name} TO STDOUT") as copy_out,
|
|
319
|
+
dst_cursor.copy(f"COPY {table_name} FROM STDIN") as copy_in,
|
|
320
|
+
):
|
|
321
|
+
for data in copy_out:
|
|
322
|
+
copy_in.write(data)
|
|
323
|
+
if progress and progress_task is not None:
|
|
324
|
+
progress.update(progress_task, advance=1)
|
|
325
|
+
|
|
326
|
+
# Get final count
|
|
327
|
+
dst_cursor.execute(f"SELECT COUNT(*) FROM {table_name}")
|
|
328
|
+
result = dst_cursor.fetchone()
|
|
329
|
+
return result[0] if result else 0
|
|
330
|
+
|
|
331
|
+
def _sync_with_anonymization(
|
|
332
|
+
self,
|
|
333
|
+
src_cursor: Any,
|
|
334
|
+
dst_cursor: Any,
|
|
335
|
+
table_name: str,
|
|
336
|
+
anonymization_rules: list[AnonymizationRule],
|
|
337
|
+
batch_size: int,
|
|
338
|
+
progress_task: Any = None,
|
|
339
|
+
progress: Progress | None = None,
|
|
340
|
+
) -> int:
|
|
341
|
+
"""Sync with anonymization (slower, row-by-row).
|
|
342
|
+
|
|
343
|
+
Args:
|
|
344
|
+
src_cursor: Source database cursor
|
|
345
|
+
dst_cursor: Target database cursor
|
|
346
|
+
table_name: Name of table to sync
|
|
347
|
+
anonymization_rules: List of anonymization rules
|
|
348
|
+
batch_size: Batch size for inserts
|
|
349
|
+
progress_task: Progress task ID
|
|
350
|
+
progress: Progress instance
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Number of rows synced
|
|
354
|
+
"""
|
|
355
|
+
# Get column names
|
|
356
|
+
src_cursor.execute(f"SELECT * FROM {table_name} LIMIT 0")
|
|
357
|
+
column_names = [desc[0] for desc in src_cursor.description]
|
|
358
|
+
|
|
359
|
+
# Build column index map for anonymization
|
|
360
|
+
anonymize_map: dict[int, AnonymizationRule] = {}
|
|
361
|
+
for rule in anonymization_rules:
|
|
362
|
+
if rule.column in column_names:
|
|
363
|
+
col_idx = column_names.index(rule.column)
|
|
364
|
+
anonymize_map[col_idx] = rule
|
|
365
|
+
|
|
366
|
+
# Fetch all rows
|
|
367
|
+
src_cursor.execute(f"SELECT * FROM {table_name}")
|
|
368
|
+
|
|
369
|
+
# Process in batches
|
|
370
|
+
rows_synced = 0
|
|
371
|
+
batch = []
|
|
372
|
+
|
|
373
|
+
for row in src_cursor:
|
|
374
|
+
# Anonymize specified columns
|
|
375
|
+
anonymized_row = list(row)
|
|
376
|
+
for col_idx, rule in anonymize_map.items():
|
|
377
|
+
anonymized_row[col_idx] = self._anonymize_value(
|
|
378
|
+
row[col_idx], rule.strategy, rule.seed
|
|
379
|
+
)
|
|
380
|
+
|
|
381
|
+
batch.append(tuple(anonymized_row))
|
|
382
|
+
|
|
383
|
+
# Insert batch when full
|
|
384
|
+
if len(batch) >= batch_size:
|
|
385
|
+
self._insert_batch(dst_cursor, table_name, column_names, batch)
|
|
386
|
+
rows_synced += len(batch)
|
|
387
|
+
if progress and progress_task is not None:
|
|
388
|
+
progress.update(progress_task, advance=len(batch))
|
|
389
|
+
batch = []
|
|
390
|
+
|
|
391
|
+
# Insert remaining rows
|
|
392
|
+
if batch:
|
|
393
|
+
self._insert_batch(dst_cursor, table_name, column_names, batch)
|
|
394
|
+
rows_synced += len(batch)
|
|
395
|
+
if progress and progress_task is not None:
|
|
396
|
+
progress.update(progress_task, advance=len(batch))
|
|
397
|
+
|
|
398
|
+
return rows_synced
|
|
399
|
+
|
|
400
|
+
def _insert_batch(
|
|
401
|
+
self,
|
|
402
|
+
cursor: Any,
|
|
403
|
+
table_name: str,
|
|
404
|
+
column_names: list[str],
|
|
405
|
+
rows: list[tuple[Any, ...]],
|
|
406
|
+
) -> None:
|
|
407
|
+
"""Insert a batch of rows into target table.
|
|
408
|
+
|
|
409
|
+
Args:
|
|
410
|
+
cursor: Database cursor
|
|
411
|
+
table_name: Name of table
|
|
412
|
+
column_names: List of column names
|
|
413
|
+
rows: List of row tuples to insert
|
|
414
|
+
"""
|
|
415
|
+
if not rows:
|
|
416
|
+
return
|
|
417
|
+
|
|
418
|
+
columns_str = ", ".join(column_names)
|
|
419
|
+
placeholders = ", ".join(["%s"] * len(column_names))
|
|
420
|
+
query = f"INSERT INTO {table_name} ({columns_str}) VALUES ({placeholders})"
|
|
421
|
+
|
|
422
|
+
cursor.executemany(query, rows)
|
|
423
|
+
|
|
424
|
+
def sync(self, config: SyncConfig) -> dict[str, int]:
|
|
425
|
+
"""Sync multiple tables based on configuration.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
config: Sync configuration
|
|
429
|
+
|
|
430
|
+
Returns:
|
|
431
|
+
Dictionary mapping table names to row counts synced
|
|
432
|
+
"""
|
|
433
|
+
# Load checkpoint if requested
|
|
434
|
+
if config.resume and config.checkpoint_file and config.checkpoint_file.exists():
|
|
435
|
+
self.load_checkpoint(config.checkpoint_file)
|
|
436
|
+
|
|
437
|
+
tables = self.select_tables(config.tables)
|
|
438
|
+
results = {}
|
|
439
|
+
|
|
440
|
+
# Filter out completed tables if resuming
|
|
441
|
+
if config.resume:
|
|
442
|
+
tables = [t for t in tables if t not in self._completed_tables]
|
|
443
|
+
|
|
444
|
+
if config.show_progress:
|
|
445
|
+
# Use rich progress bar
|
|
446
|
+
with Progress(
|
|
447
|
+
TextColumn("[bold blue]{task.description}"),
|
|
448
|
+
BarColumn(),
|
|
449
|
+
TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
|
|
450
|
+
TextColumn("•"),
|
|
451
|
+
TextColumn("{task.completed}/{task.total} rows"),
|
|
452
|
+
TimeRemainingColumn(),
|
|
453
|
+
) as progress:
|
|
454
|
+
for table in tables:
|
|
455
|
+
task = progress.add_task(f"Syncing {table}", total=0)
|
|
456
|
+
|
|
457
|
+
anonymization_rules = None
|
|
458
|
+
if config.anonymization and table in config.anonymization:
|
|
459
|
+
anonymization_rules = config.anonymization[table]
|
|
460
|
+
|
|
461
|
+
rows_synced = self.sync_table(
|
|
462
|
+
table,
|
|
463
|
+
anonymization_rules=anonymization_rules,
|
|
464
|
+
batch_size=config.batch_size,
|
|
465
|
+
progress_task=task,
|
|
466
|
+
progress=progress,
|
|
467
|
+
)
|
|
468
|
+
results[table] = rows_synced
|
|
469
|
+
else:
|
|
470
|
+
# No progress bar
|
|
471
|
+
for table in tables:
|
|
472
|
+
anonymization_rules = None
|
|
473
|
+
if config.anonymization and table in config.anonymization:
|
|
474
|
+
anonymization_rules = config.anonymization[table]
|
|
475
|
+
|
|
476
|
+
rows_synced = self.sync_table(
|
|
477
|
+
table,
|
|
478
|
+
anonymization_rules=anonymization_rules,
|
|
479
|
+
batch_size=config.batch_size,
|
|
480
|
+
)
|
|
481
|
+
results[table] = rows_synced
|
|
482
|
+
|
|
483
|
+
# Save checkpoint if requested
|
|
484
|
+
if config.checkpoint_file:
|
|
485
|
+
self.save_checkpoint(config.checkpoint_file)
|
|
486
|
+
|
|
487
|
+
return results
|
|
488
|
+
|
|
489
|
+
def get_metrics(self) -> dict[str, dict[str, Any]]:
|
|
490
|
+
"""Get performance metrics for all synced tables.
|
|
491
|
+
|
|
492
|
+
Returns:
|
|
493
|
+
Dictionary mapping table names to metrics
|
|
494
|
+
"""
|
|
495
|
+
return {
|
|
496
|
+
table: {
|
|
497
|
+
"rows_synced": metrics.rows_synced,
|
|
498
|
+
"elapsed_seconds": metrics.elapsed_seconds,
|
|
499
|
+
"rows_per_second": metrics.rows_per_second,
|
|
500
|
+
"synced_at": metrics.synced_at,
|
|
501
|
+
}
|
|
502
|
+
for table, metrics in self._metrics.items()
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
def save_checkpoint(self, checkpoint_file: Path) -> None:
|
|
506
|
+
"""Save sync checkpoint to file.
|
|
507
|
+
|
|
508
|
+
Args:
|
|
509
|
+
checkpoint_file: Path to checkpoint file
|
|
510
|
+
"""
|
|
511
|
+
checkpoint_data = {
|
|
512
|
+
"version": "1.0",
|
|
513
|
+
"timestamp": datetime.now().isoformat(),
|
|
514
|
+
"source_database": f"{self.source_config.host}:{self.source_config.port}/{self.source_config.database}",
|
|
515
|
+
"target_database": f"{self.target_config.host}:{self.target_config.port}/{self.target_config.database}",
|
|
516
|
+
"completed_tables": {
|
|
517
|
+
table: {
|
|
518
|
+
"rows_synced": metrics.rows_synced,
|
|
519
|
+
"synced_at": metrics.synced_at,
|
|
520
|
+
}
|
|
521
|
+
for table, metrics in self._metrics.items()
|
|
522
|
+
},
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
checkpoint_file.parent.mkdir(parents=True, exist_ok=True)
|
|
526
|
+
with open(checkpoint_file, "w") as f:
|
|
527
|
+
json.dump(checkpoint_data, f, indent=2)
|
|
528
|
+
|
|
529
|
+
def load_checkpoint(self, checkpoint_file: Path) -> None:
|
|
530
|
+
"""Load sync checkpoint from file.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
checkpoint_file: Path to checkpoint file
|
|
534
|
+
"""
|
|
535
|
+
with open(checkpoint_file) as f:
|
|
536
|
+
self._checkpoint_data = json.load(f)
|
|
537
|
+
|
|
538
|
+
# Restore completed tables
|
|
539
|
+
if "completed_tables" in self._checkpoint_data:
|
|
540
|
+
self._completed_tables = set(self._checkpoint_data["completed_tables"].keys())
|
confiture/exceptions.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"""Confiture exception hierarchy
|
|
2
|
+
|
|
3
|
+
All exceptions raised by Confiture inherit from ConfiturError.
|
|
4
|
+
This allows users to catch all Confiture-specific errors with a single except clause.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ConfiturError(Exception):
|
|
9
|
+
"""Base exception for all Confiture errors
|
|
10
|
+
|
|
11
|
+
All Confiture-specific exceptions inherit from this base class.
|
|
12
|
+
This allows catching all Confiture errors with:
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
confiture.build()
|
|
16
|
+
except ConfiturError as e:
|
|
17
|
+
# Handle any Confiture error
|
|
18
|
+
pass
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ConfigurationError(ConfiturError):
|
|
25
|
+
"""Invalid configuration (YAML, environment, database connection)
|
|
26
|
+
|
|
27
|
+
Raised when:
|
|
28
|
+
- Environment YAML file is malformed or missing
|
|
29
|
+
- Required configuration fields are missing
|
|
30
|
+
- Database connection string is invalid
|
|
31
|
+
- Include/exclude directory patterns are invalid
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> raise ConfigurationError("Missing database_url in local.yaml")
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
pass
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class MigrationError(ConfiturError):
|
|
41
|
+
"""Migration execution failure
|
|
42
|
+
|
|
43
|
+
Raised when:
|
|
44
|
+
- Migration file cannot be loaded
|
|
45
|
+
- Migration up() or down() fails
|
|
46
|
+
- Migration has already been applied
|
|
47
|
+
- Migration rollback fails
|
|
48
|
+
|
|
49
|
+
Attributes:
|
|
50
|
+
version: Migration version that failed (e.g., "001")
|
|
51
|
+
migration_name: Human-readable migration name
|
|
52
|
+
"""
|
|
53
|
+
|
|
54
|
+
def __init__(
|
|
55
|
+
self,
|
|
56
|
+
message: str,
|
|
57
|
+
version: str | None = None,
|
|
58
|
+
migration_name: str | None = None,
|
|
59
|
+
):
|
|
60
|
+
super().__init__(message)
|
|
61
|
+
self.version = version
|
|
62
|
+
self.migration_name = migration_name
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class SchemaError(ConfiturError):
|
|
66
|
+
"""Invalid schema DDL or schema build failure
|
|
67
|
+
|
|
68
|
+
Raised when:
|
|
69
|
+
- SQL syntax error in DDL files
|
|
70
|
+
- Missing required schema directories
|
|
71
|
+
- Circular dependencies between schema files
|
|
72
|
+
- Schema hash computation fails
|
|
73
|
+
|
|
74
|
+
Example:
|
|
75
|
+
>>> raise SchemaError("Syntax error in 10_tables/users.sql at line 15")
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
pass
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class SyncError(ConfiturError):
|
|
82
|
+
"""Production data sync failure
|
|
83
|
+
|
|
84
|
+
Raised when:
|
|
85
|
+
- Cannot connect to source database
|
|
86
|
+
- Table does not exist in source or target
|
|
87
|
+
- Anonymization rule fails
|
|
88
|
+
- Data copy operation fails
|
|
89
|
+
|
|
90
|
+
Example:
|
|
91
|
+
>>> raise SyncError("Table 'users' not found in source database")
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
pass
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class DifferError(ConfiturError):
|
|
98
|
+
"""Schema diff detection error
|
|
99
|
+
|
|
100
|
+
Raised when:
|
|
101
|
+
- Cannot parse SQL DDL
|
|
102
|
+
- Schema comparison fails
|
|
103
|
+
- Ambiguous schema changes detected
|
|
104
|
+
|
|
105
|
+
Example:
|
|
106
|
+
>>> raise DifferError("Cannot parse CREATE TABLE statement")
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
pass
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
class ValidationError(ConfiturError):
|
|
113
|
+
"""Data or schema validation error
|
|
114
|
+
|
|
115
|
+
Raised when:
|
|
116
|
+
- Row count mismatch after migration
|
|
117
|
+
- Foreign key constraints violated
|
|
118
|
+
- Custom validation rules fail
|
|
119
|
+
|
|
120
|
+
Example:
|
|
121
|
+
>>> raise ValidationError("Row count mismatch: expected 10000, got 9999")
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
pass
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class RollbackError(ConfiturError):
|
|
128
|
+
"""Migration rollback failure
|
|
129
|
+
|
|
130
|
+
Raised when:
|
|
131
|
+
- Cannot rollback migration (irreversible change)
|
|
132
|
+
- Rollback SQL fails
|
|
133
|
+
- Database state is inconsistent after rollback
|
|
134
|
+
|
|
135
|
+
This is a critical error that may require manual intervention.
|
|
136
|
+
|
|
137
|
+
Example:
|
|
138
|
+
>>> raise RollbackError("Cannot rollback: data already deleted")
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
pass
|
|
File without changes
|
|
File without changes
|