fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- confiture/__init__.py +48 -0
- confiture/_core.cp311-win_amd64.pyd +0 -0
- confiture/cli/__init__.py +0 -0
- confiture/cli/dry_run.py +116 -0
- confiture/cli/lint_formatter.py +193 -0
- confiture/cli/main.py +1656 -0
- confiture/config/__init__.py +0 -0
- confiture/config/environment.py +263 -0
- confiture/core/__init__.py +51 -0
- confiture/core/anonymization/__init__.py +0 -0
- confiture/core/anonymization/audit.py +485 -0
- confiture/core/anonymization/benchmarking.py +372 -0
- confiture/core/anonymization/breach_notification.py +652 -0
- confiture/core/anonymization/compliance.py +617 -0
- confiture/core/anonymization/composer.py +298 -0
- confiture/core/anonymization/data_subject_rights.py +669 -0
- confiture/core/anonymization/factory.py +319 -0
- confiture/core/anonymization/governance.py +737 -0
- confiture/core/anonymization/performance.py +1092 -0
- confiture/core/anonymization/profile.py +284 -0
- confiture/core/anonymization/registry.py +195 -0
- confiture/core/anonymization/security/kms_manager.py +547 -0
- confiture/core/anonymization/security/lineage.py +888 -0
- confiture/core/anonymization/security/token_store.py +686 -0
- confiture/core/anonymization/strategies/__init__.py +41 -0
- confiture/core/anonymization/strategies/address.py +359 -0
- confiture/core/anonymization/strategies/credit_card.py +374 -0
- confiture/core/anonymization/strategies/custom.py +161 -0
- confiture/core/anonymization/strategies/date.py +218 -0
- confiture/core/anonymization/strategies/differential_privacy.py +398 -0
- confiture/core/anonymization/strategies/email.py +141 -0
- confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
- confiture/core/anonymization/strategies/hash.py +150 -0
- confiture/core/anonymization/strategies/ip_address.py +235 -0
- confiture/core/anonymization/strategies/masking_retention.py +252 -0
- confiture/core/anonymization/strategies/name.py +298 -0
- confiture/core/anonymization/strategies/phone.py +119 -0
- confiture/core/anonymization/strategies/preserve.py +85 -0
- confiture/core/anonymization/strategies/redact.py +101 -0
- confiture/core/anonymization/strategies/salted_hashing.py +322 -0
- confiture/core/anonymization/strategies/text_redaction.py +183 -0
- confiture/core/anonymization/strategies/tokenization.py +334 -0
- confiture/core/anonymization/strategy.py +241 -0
- confiture/core/anonymization/syncer_audit.py +357 -0
- confiture/core/blue_green.py +683 -0
- confiture/core/builder.py +500 -0
- confiture/core/checksum.py +358 -0
- confiture/core/connection.py +132 -0
- confiture/core/differ.py +522 -0
- confiture/core/drift.py +564 -0
- confiture/core/dry_run.py +182 -0
- confiture/core/health.py +313 -0
- confiture/core/hooks/__init__.py +87 -0
- confiture/core/hooks/base.py +232 -0
- confiture/core/hooks/context.py +146 -0
- confiture/core/hooks/execution_strategies.py +57 -0
- confiture/core/hooks/observability.py +220 -0
- confiture/core/hooks/phases.py +53 -0
- confiture/core/hooks/registry.py +295 -0
- confiture/core/large_tables.py +775 -0
- confiture/core/linting/__init__.py +70 -0
- confiture/core/linting/composer.py +192 -0
- confiture/core/linting/libraries/__init__.py +17 -0
- confiture/core/linting/libraries/gdpr.py +168 -0
- confiture/core/linting/libraries/general.py +184 -0
- confiture/core/linting/libraries/hipaa.py +144 -0
- confiture/core/linting/libraries/pci_dss.py +104 -0
- confiture/core/linting/libraries/sox.py +120 -0
- confiture/core/linting/schema_linter.py +491 -0
- confiture/core/linting/versioning.py +151 -0
- confiture/core/locking.py +389 -0
- confiture/core/migration_generator.py +298 -0
- confiture/core/migrator.py +793 -0
- confiture/core/observability/__init__.py +44 -0
- confiture/core/observability/audit.py +323 -0
- confiture/core/observability/logging.py +187 -0
- confiture/core/observability/metrics.py +174 -0
- confiture/core/observability/tracing.py +192 -0
- confiture/core/pg_version.py +418 -0
- confiture/core/pool.py +406 -0
- confiture/core/risk/__init__.py +39 -0
- confiture/core/risk/predictor.py +188 -0
- confiture/core/risk/scoring.py +248 -0
- confiture/core/rollback_generator.py +388 -0
- confiture/core/schema_analyzer.py +769 -0
- confiture/core/schema_to_schema.py +590 -0
- confiture/core/security/__init__.py +32 -0
- confiture/core/security/logging.py +201 -0
- confiture/core/security/validation.py +416 -0
- confiture/core/signals.py +371 -0
- confiture/core/syncer.py +540 -0
- confiture/exceptions.py +192 -0
- confiture/integrations/__init__.py +0 -0
- confiture/models/__init__.py +0 -0
- confiture/models/lint.py +193 -0
- confiture/models/migration.py +180 -0
- confiture/models/schema.py +203 -0
- confiture/scenarios/__init__.py +36 -0
- confiture/scenarios/compliance.py +586 -0
- confiture/scenarios/ecommerce.py +199 -0
- confiture/scenarios/financial.py +253 -0
- confiture/scenarios/healthcare.py +315 -0
- confiture/scenarios/multi_tenant.py +340 -0
- confiture/scenarios/saas.py +295 -0
- confiture/testing/FRAMEWORK_API.md +722 -0
- confiture/testing/__init__.py +38 -0
- confiture/testing/fixtures/__init__.py +11 -0
- confiture/testing/fixtures/data_validator.py +229 -0
- confiture/testing/fixtures/migration_runner.py +167 -0
- confiture/testing/fixtures/schema_snapshotter.py +352 -0
- confiture/testing/frameworks/__init__.py +10 -0
- confiture/testing/frameworks/mutation.py +587 -0
- confiture/testing/frameworks/performance.py +479 -0
- confiture/testing/utils/__init__.py +0 -0
- fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
- fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
- fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
- fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
- fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,590 @@
|
|
|
1
|
+
"""Schema-to-Schema Migration using Foreign Data Wrapper (FDW).
|
|
2
|
+
|
|
3
|
+
This module implements Medium 4: Schema-to-Schema migration for zero-downtime
|
|
4
|
+
database migrations. It supports two strategies:
|
|
5
|
+
|
|
6
|
+
1. FDW Strategy: Best for small-medium tables (<10M rows), complex transformations
|
|
7
|
+
2. COPY Strategy: Best for large tables (>10M rows), 10-20x faster
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from io import BytesIO
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
import psycopg
|
|
14
|
+
from psycopg import sql
|
|
15
|
+
|
|
16
|
+
from confiture.exceptions import MigrationError
|
|
17
|
+
|
|
18
|
+
# Constants for FDW configuration
|
|
19
|
+
DEFAULT_FOREIGN_SCHEMA_NAME = "old_schema"
|
|
20
|
+
DEFAULT_SERVER_NAME = "confiture_source_server"
|
|
21
|
+
DEFAULT_HOST = "localhost"
|
|
22
|
+
DEFAULT_PORT = "5432"
|
|
23
|
+
|
|
24
|
+
# Constants for migration strategy
|
|
25
|
+
LARGE_TABLE_THRESHOLD = 10_000_000 # 10M rows
|
|
26
|
+
FDW_THROUGHPUT = 500_000 # rows/second for FDW
|
|
27
|
+
COPY_THROUGHPUT = 6_000_000 # rows/second for COPY (10-20x faster)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class SchemaToSchemaMigrator:
|
|
31
|
+
"""Migrator for schema-to-schema migrations using FDW.
|
|
32
|
+
|
|
33
|
+
This class manages the migration of data from an old database schema to a
|
|
34
|
+
new database schema using PostgreSQL Foreign Data Wrapper (FDW).
|
|
35
|
+
|
|
36
|
+
Attributes:
|
|
37
|
+
source_connection: Connection to source (old) database
|
|
38
|
+
target_connection: Connection to target (new) database
|
|
39
|
+
foreign_schema_name: Name for the imported foreign schema
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
def __init__(
|
|
43
|
+
self,
|
|
44
|
+
source_connection: psycopg.Connection,
|
|
45
|
+
target_connection: psycopg.Connection,
|
|
46
|
+
foreign_schema_name: str = DEFAULT_FOREIGN_SCHEMA_NAME,
|
|
47
|
+
server_name: str = DEFAULT_SERVER_NAME,
|
|
48
|
+
):
|
|
49
|
+
"""Initialize schema-to-schema migrator.
|
|
50
|
+
|
|
51
|
+
Args:
|
|
52
|
+
source_connection: PostgreSQL connection to source database
|
|
53
|
+
target_connection: PostgreSQL connection to target database
|
|
54
|
+
foreign_schema_name: Name for imported foreign schema
|
|
55
|
+
server_name: Name for the foreign server
|
|
56
|
+
"""
|
|
57
|
+
self.source_connection = source_connection
|
|
58
|
+
self.target_connection = target_connection
|
|
59
|
+
self.foreign_schema_name = foreign_schema_name
|
|
60
|
+
self.server_name = server_name
|
|
61
|
+
|
|
62
|
+
def _get_connection_params(self) -> tuple[str, str]:
|
|
63
|
+
"""Extract database connection parameters from source connection.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Tuple of (dbname, user)
|
|
67
|
+
"""
|
|
68
|
+
source_info = self.source_connection.info
|
|
69
|
+
source_params = source_info.get_parameters()
|
|
70
|
+
dbname = source_params.get("dbname", "postgres")
|
|
71
|
+
user = source_params.get("user", "postgres")
|
|
72
|
+
return dbname, user
|
|
73
|
+
|
|
74
|
+
def _create_fdw_extension(self, cursor: psycopg.Cursor) -> None:
|
|
75
|
+
"""Create postgres_fdw extension if not exists.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
cursor: Database cursor
|
|
79
|
+
"""
|
|
80
|
+
cursor.execute("CREATE EXTENSION IF NOT EXISTS postgres_fdw")
|
|
81
|
+
|
|
82
|
+
def _create_foreign_server(self, cursor: psycopg.Cursor, dbname: str) -> None:
|
|
83
|
+
"""Create foreign server pointing to source database.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
cursor: Database cursor
|
|
87
|
+
dbname: Source database name
|
|
88
|
+
"""
|
|
89
|
+
cursor.execute(
|
|
90
|
+
sql.SQL("""
|
|
91
|
+
CREATE SERVER IF NOT EXISTS {server}
|
|
92
|
+
FOREIGN DATA WRAPPER postgres_fdw
|
|
93
|
+
OPTIONS (
|
|
94
|
+
host {host},
|
|
95
|
+
dbname {dbname},
|
|
96
|
+
port {port}
|
|
97
|
+
)
|
|
98
|
+
""").format(
|
|
99
|
+
server=sql.Identifier(self.server_name),
|
|
100
|
+
host=sql.Literal(DEFAULT_HOST),
|
|
101
|
+
dbname=sql.Literal(dbname),
|
|
102
|
+
port=sql.Literal(DEFAULT_PORT),
|
|
103
|
+
)
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
def _create_user_mapping(self, cursor: psycopg.Cursor, user: str) -> None:
|
|
107
|
+
"""Create user mapping for foreign server authentication.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
cursor: Database cursor
|
|
111
|
+
user: Source database user
|
|
112
|
+
"""
|
|
113
|
+
cursor.execute(
|
|
114
|
+
sql.SQL("""
|
|
115
|
+
CREATE USER MAPPING IF NOT EXISTS FOR CURRENT_USER
|
|
116
|
+
SERVER {server}
|
|
117
|
+
OPTIONS (
|
|
118
|
+
user {user},
|
|
119
|
+
password ''
|
|
120
|
+
)
|
|
121
|
+
""").format(server=sql.Identifier(self.server_name), user=sql.Literal(user))
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
def _create_foreign_schema(self, cursor: psycopg.Cursor) -> None:
|
|
125
|
+
"""Create foreign schema container.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
cursor: Database cursor
|
|
129
|
+
"""
|
|
130
|
+
cursor.execute(
|
|
131
|
+
sql.SQL("CREATE SCHEMA IF NOT EXISTS {schema}").format(
|
|
132
|
+
schema=sql.Identifier(self.foreign_schema_name)
|
|
133
|
+
)
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
def _import_foreign_schema(self, cursor: psycopg.Cursor) -> None:
|
|
137
|
+
"""Import foreign schema tables from source database.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
cursor: Database cursor
|
|
141
|
+
"""
|
|
142
|
+
cursor.execute(
|
|
143
|
+
sql.SQL("""
|
|
144
|
+
IMPORT FOREIGN SCHEMA public
|
|
145
|
+
FROM SERVER {server}
|
|
146
|
+
INTO {schema}
|
|
147
|
+
""").format(
|
|
148
|
+
server=sql.Identifier(self.server_name),
|
|
149
|
+
schema=sql.Identifier(self.foreign_schema_name),
|
|
150
|
+
)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
def setup_fdw(self, skip_import: bool = False) -> None:
|
|
154
|
+
"""Setup Foreign Data Wrapper to source database.
|
|
155
|
+
|
|
156
|
+
This method performs the following steps:
|
|
157
|
+
1. Creates postgres_fdw extension if not exists
|
|
158
|
+
2. Creates foreign server pointing to source database
|
|
159
|
+
3. Creates user mapping for authentication
|
|
160
|
+
4. Creates foreign schema
|
|
161
|
+
5. Optionally imports foreign schema from source database
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
skip_import: If True, skip importing foreign schema (useful for testing)
|
|
165
|
+
|
|
166
|
+
Raises:
|
|
167
|
+
MigrationError: If FDW setup fails
|
|
168
|
+
"""
|
|
169
|
+
try:
|
|
170
|
+
with self.target_connection.cursor() as cursor:
|
|
171
|
+
# Get connection parameters
|
|
172
|
+
dbname, user = self._get_connection_params()
|
|
173
|
+
|
|
174
|
+
# Setup FDW infrastructure
|
|
175
|
+
self._create_fdw_extension(cursor)
|
|
176
|
+
self._create_foreign_server(cursor, dbname)
|
|
177
|
+
self._create_user_mapping(cursor, user)
|
|
178
|
+
self._create_foreign_schema(cursor)
|
|
179
|
+
|
|
180
|
+
# Import schema if requested
|
|
181
|
+
if not skip_import:
|
|
182
|
+
self._import_foreign_schema(cursor)
|
|
183
|
+
|
|
184
|
+
self.target_connection.commit()
|
|
185
|
+
|
|
186
|
+
except psycopg.Error as e:
|
|
187
|
+
self.target_connection.rollback()
|
|
188
|
+
raise MigrationError(f"Failed to setup FDW: {e}") from e
|
|
189
|
+
|
|
190
|
+
def cleanup_fdw(self) -> None:
|
|
191
|
+
"""Clean up FDW resources (server, mappings, schema).
|
|
192
|
+
|
|
193
|
+
This method removes all FDW-related resources created by setup_fdw().
|
|
194
|
+
Useful for testing or manual cleanup.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
MigrationError: If cleanup fails
|
|
198
|
+
"""
|
|
199
|
+
try:
|
|
200
|
+
with self.target_connection.cursor() as cursor:
|
|
201
|
+
cursor.execute(
|
|
202
|
+
sql.SQL("DROP SCHEMA IF EXISTS {schema} CASCADE").format(
|
|
203
|
+
schema=sql.Identifier(self.foreign_schema_name)
|
|
204
|
+
)
|
|
205
|
+
)
|
|
206
|
+
cursor.execute(
|
|
207
|
+
sql.SQL("DROP USER MAPPING IF EXISTS FOR CURRENT_USER SERVER {server}").format(
|
|
208
|
+
server=sql.Identifier(self.server_name)
|
|
209
|
+
)
|
|
210
|
+
)
|
|
211
|
+
cursor.execute(
|
|
212
|
+
sql.SQL("DROP SERVER IF EXISTS {server} CASCADE").format(
|
|
213
|
+
server=sql.Identifier(self.server_name)
|
|
214
|
+
)
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
self.target_connection.commit()
|
|
218
|
+
|
|
219
|
+
except psycopg.Error as e:
|
|
220
|
+
self.target_connection.rollback()
|
|
221
|
+
raise MigrationError(f"Failed to cleanup FDW: {e}") from e
|
|
222
|
+
|
|
223
|
+
def migrate_table(
|
|
224
|
+
self,
|
|
225
|
+
source_table: str,
|
|
226
|
+
target_table: str,
|
|
227
|
+
column_mapping: dict[str, str],
|
|
228
|
+
) -> int:
|
|
229
|
+
"""Migrate data from source table to target table with column mapping.
|
|
230
|
+
|
|
231
|
+
Uses the FDW foreign schema to read from source and INSERT into target.
|
|
232
|
+
Applies column name mappings during the SELECT.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
source_table: Name of source table in foreign schema
|
|
236
|
+
target_table: Name of target table in current database
|
|
237
|
+
column_mapping: Mapping of source column names to target column names
|
|
238
|
+
e.g., {"old_name": "new_name", "id": "id"}
|
|
239
|
+
|
|
240
|
+
Returns:
|
|
241
|
+
Number of rows migrated
|
|
242
|
+
|
|
243
|
+
Raises:
|
|
244
|
+
MigrationError: If migration fails
|
|
245
|
+
|
|
246
|
+
Example:
|
|
247
|
+
>>> migrator.migrate_table(
|
|
248
|
+
... source_table="users",
|
|
249
|
+
... target_table="users",
|
|
250
|
+
... column_mapping={"full_name": "display_name", "id": "id"}
|
|
251
|
+
... )
|
|
252
|
+
1000
|
|
253
|
+
"""
|
|
254
|
+
if not column_mapping:
|
|
255
|
+
raise MigrationError("column_mapping cannot be empty")
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
with self.target_connection.cursor() as cursor:
|
|
259
|
+
# Build SELECT clause with column mapping
|
|
260
|
+
# Maps: old_col AS new_col, old_col AS new_col, ...
|
|
261
|
+
select_items = []
|
|
262
|
+
for source_col, target_col in column_mapping.items():
|
|
263
|
+
select_items.append(
|
|
264
|
+
sql.SQL("{source} AS {target}").format(
|
|
265
|
+
source=sql.Identifier(source_col),
|
|
266
|
+
target=sql.Identifier(target_col),
|
|
267
|
+
)
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
# Build target column list
|
|
271
|
+
target_cols = [sql.Identifier(col) for col in column_mapping.values()]
|
|
272
|
+
|
|
273
|
+
# Build INSERT ... SELECT statement
|
|
274
|
+
insert_query = sql.SQL("""
|
|
275
|
+
INSERT INTO {target_table} ({target_cols})
|
|
276
|
+
SELECT {select_items}
|
|
277
|
+
FROM {foreign_schema}.{source_table}
|
|
278
|
+
""").format(
|
|
279
|
+
target_table=sql.Identifier(target_table),
|
|
280
|
+
target_cols=sql.SQL(", ").join(target_cols),
|
|
281
|
+
select_items=sql.SQL(", ").join(select_items),
|
|
282
|
+
foreign_schema=sql.Identifier(self.foreign_schema_name),
|
|
283
|
+
source_table=sql.Identifier(source_table),
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
cursor.execute(insert_query)
|
|
287
|
+
rows_migrated = cursor.rowcount or 0
|
|
288
|
+
|
|
289
|
+
self.target_connection.commit()
|
|
290
|
+
return rows_migrated
|
|
291
|
+
|
|
292
|
+
except psycopg.Error as e:
|
|
293
|
+
self.target_connection.rollback()
|
|
294
|
+
raise MigrationError(
|
|
295
|
+
f"Failed to migrate table {source_table} → {target_table}: {e}"
|
|
296
|
+
) from e
|
|
297
|
+
|
|
298
|
+
def migrate_table_copy(
|
|
299
|
+
self,
|
|
300
|
+
source_table: str,
|
|
301
|
+
target_table: str,
|
|
302
|
+
column_mapping: dict[str, str],
|
|
303
|
+
) -> int:
|
|
304
|
+
"""Migrate data using COPY strategy (10-20x faster for large tables).
|
|
305
|
+
|
|
306
|
+
This method uses PostgreSQL's COPY command to stream data from source
|
|
307
|
+
to target with minimal memory usage. It's optimized for large tables
|
|
308
|
+
(>10M rows) and supports column mapping.
|
|
309
|
+
|
|
310
|
+
The COPY strategy:
|
|
311
|
+
1. Builds a SELECT query with column mapping on source table
|
|
312
|
+
2. Uses COPY ... TO STDOUT to export data from source
|
|
313
|
+
3. Buffers data in memory
|
|
314
|
+
4. Uses COPY ... FROM STDIN to load data into target
|
|
315
|
+
5. All in one transaction for safety
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
source_table: Name of source table in foreign schema
|
|
319
|
+
target_table: Name of target table in current database
|
|
320
|
+
column_mapping: Mapping of source column names to target column names
|
|
321
|
+
e.g., {"old_name": "new_name", "id": "id"}
|
|
322
|
+
|
|
323
|
+
Returns:
|
|
324
|
+
Number of rows migrated
|
|
325
|
+
|
|
326
|
+
Raises:
|
|
327
|
+
MigrationError: If migration fails
|
|
328
|
+
|
|
329
|
+
Example:
|
|
330
|
+
>>> migrator.migrate_table_copy(
|
|
331
|
+
... source_table="large_events",
|
|
332
|
+
... target_table="events",
|
|
333
|
+
... column_mapping={"event_type": "type", "id": "id"}
|
|
334
|
+
... )
|
|
335
|
+
100000000 # 100M rows migrated
|
|
336
|
+
|
|
337
|
+
Note:
|
|
338
|
+
This is 10-20x faster than the FDW strategy for large tables,
|
|
339
|
+
but requires the source table to be in the foreign schema.
|
|
340
|
+
"""
|
|
341
|
+
if not column_mapping:
|
|
342
|
+
raise MigrationError("column_mapping cannot be empty")
|
|
343
|
+
|
|
344
|
+
buffer = BytesIO()
|
|
345
|
+
|
|
346
|
+
try:
|
|
347
|
+
# Build SELECT query with column mapping for COPY
|
|
348
|
+
# We select from the foreign schema with source column names
|
|
349
|
+
select_items = []
|
|
350
|
+
for source_col in column_mapping:
|
|
351
|
+
select_items.append(sql.SQL("{source}").format(source=sql.Identifier(source_col)))
|
|
352
|
+
|
|
353
|
+
select_query = sql.SQL(
|
|
354
|
+
"SELECT {select_items} FROM {foreign_schema}.{source_table}"
|
|
355
|
+
).format(
|
|
356
|
+
select_items=sql.SQL(", ").join(select_items),
|
|
357
|
+
foreign_schema=sql.Identifier(self.foreign_schema_name),
|
|
358
|
+
source_table=sql.Identifier(source_table),
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
# Build target column list (using mapped target names)
|
|
362
|
+
target_cols = [sql.Identifier(col) for col in column_mapping.values()]
|
|
363
|
+
|
|
364
|
+
# Step 1: COPY data from source to buffer
|
|
365
|
+
with self.target_connection.cursor() as cursor:
|
|
366
|
+
copy_to_query = sql.SQL("COPY ({select_query}) TO STDOUT WITH (FORMAT csv)").format(
|
|
367
|
+
select_query=select_query
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
with cursor.copy(copy_to_query.as_string(cursor)) as copy:
|
|
371
|
+
# Read all data into buffer
|
|
372
|
+
for chunk in copy:
|
|
373
|
+
buffer.write(chunk)
|
|
374
|
+
|
|
375
|
+
# Reset buffer to beginning for reading
|
|
376
|
+
buffer.seek(0)
|
|
377
|
+
|
|
378
|
+
# Step 2: COPY data from buffer to target table
|
|
379
|
+
with self.target_connection.cursor() as cursor:
|
|
380
|
+
copy_from_query = sql.SQL(
|
|
381
|
+
"COPY {target_table} ({target_cols}) FROM STDIN WITH (FORMAT csv)"
|
|
382
|
+
).format(
|
|
383
|
+
target_table=sql.Identifier(target_table),
|
|
384
|
+
target_cols=sql.SQL(", ").join(target_cols),
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
with cursor.copy(copy_from_query.as_string(cursor)) as copy:
|
|
388
|
+
# Write data from buffer
|
|
389
|
+
copy.write(buffer.getvalue())
|
|
390
|
+
|
|
391
|
+
# Get row count
|
|
392
|
+
cursor.execute(
|
|
393
|
+
sql.SQL("SELECT COUNT(*) FROM {table}").format(
|
|
394
|
+
table=sql.Identifier(target_table)
|
|
395
|
+
)
|
|
396
|
+
)
|
|
397
|
+
result = cursor.fetchone()
|
|
398
|
+
rows_migrated = int(result[0]) if result else 0
|
|
399
|
+
|
|
400
|
+
self.target_connection.commit()
|
|
401
|
+
return rows_migrated
|
|
402
|
+
|
|
403
|
+
except psycopg.Error as e:
|
|
404
|
+
self.target_connection.rollback()
|
|
405
|
+
raise MigrationError(
|
|
406
|
+
f"Failed to migrate table {source_table} → {target_table} using COPY: {e}"
|
|
407
|
+
) from e
|
|
408
|
+
finally:
|
|
409
|
+
buffer.close()
|
|
410
|
+
|
|
411
|
+
def analyze_tables(self, schema: str = "public") -> dict[str, dict[str, Any]]:
|
|
412
|
+
"""Analyze table sizes and recommend optimal migration strategy.
|
|
413
|
+
|
|
414
|
+
This method queries the target database to get row counts for all tables,
|
|
415
|
+
then recommends the optimal migration strategy (FDW or COPY) based on
|
|
416
|
+
table size.
|
|
417
|
+
|
|
418
|
+
Strategy selection:
|
|
419
|
+
- Tables with < 10M rows → FDW strategy (better for complex transformations)
|
|
420
|
+
- Tables with ≥ 10M rows → COPY strategy (10-20x faster)
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
schema: Schema name to analyze (default: "public")
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
Dictionary mapping table names to analysis results:
|
|
427
|
+
{
|
|
428
|
+
"table_name": {
|
|
429
|
+
"strategy": "fdw" | "copy",
|
|
430
|
+
"row_count": int,
|
|
431
|
+
"estimated_seconds": float
|
|
432
|
+
}
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
Raises:
|
|
436
|
+
MigrationError: If analysis fails
|
|
437
|
+
|
|
438
|
+
Example:
|
|
439
|
+
>>> migrator = SchemaToSchemaMigrator(...)
|
|
440
|
+
>>> recommendations = migrator.analyze_tables()
|
|
441
|
+
>>> print(recommendations)
|
|
442
|
+
{
|
|
443
|
+
"users": {
|
|
444
|
+
"strategy": "fdw",
|
|
445
|
+
"row_count": 50000,
|
|
446
|
+
"estimated_seconds": 0.1
|
|
447
|
+
},
|
|
448
|
+
"events": {
|
|
449
|
+
"strategy": "copy",
|
|
450
|
+
"row_count": 50000000,
|
|
451
|
+
"estimated_seconds": 8.3
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
"""
|
|
455
|
+
try:
|
|
456
|
+
recommendations = {}
|
|
457
|
+
|
|
458
|
+
with self.target_connection.cursor() as cursor:
|
|
459
|
+
# Get all tables in the schema with their row counts
|
|
460
|
+
cursor.execute(
|
|
461
|
+
sql.SQL("""
|
|
462
|
+
SELECT
|
|
463
|
+
relname AS tablename,
|
|
464
|
+
n_live_tup AS estimated_rows
|
|
465
|
+
FROM pg_stat_user_tables
|
|
466
|
+
WHERE schemaname = %s
|
|
467
|
+
ORDER BY relname
|
|
468
|
+
"""),
|
|
469
|
+
(schema,),
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
for table_name, estimated_rows in cursor.fetchall():
|
|
473
|
+
# For tables without statistics, do a count
|
|
474
|
+
if estimated_rows is None or estimated_rows == 0:
|
|
475
|
+
cursor.execute(
|
|
476
|
+
sql.SQL("SELECT COUNT(*) FROM {schema}.{table}").format(
|
|
477
|
+
schema=sql.Identifier(schema),
|
|
478
|
+
table=sql.Identifier(table_name),
|
|
479
|
+
)
|
|
480
|
+
)
|
|
481
|
+
result = cursor.fetchone()
|
|
482
|
+
row_count = int(result[0]) if result else 0
|
|
483
|
+
else:
|
|
484
|
+
row_count = int(estimated_rows)
|
|
485
|
+
|
|
486
|
+
# Determine strategy based on row count threshold
|
|
487
|
+
if row_count >= LARGE_TABLE_THRESHOLD:
|
|
488
|
+
strategy = "copy"
|
|
489
|
+
estimated_seconds = row_count / COPY_THROUGHPUT
|
|
490
|
+
else:
|
|
491
|
+
strategy = "fdw"
|
|
492
|
+
estimated_seconds = row_count / FDW_THROUGHPUT
|
|
493
|
+
|
|
494
|
+
# Round to 3 decimal places, with minimum 0.001 for non-empty tables
|
|
495
|
+
if row_count > 0:
|
|
496
|
+
estimated_seconds = max(0.001, round(estimated_seconds, 3))
|
|
497
|
+
else:
|
|
498
|
+
estimated_seconds = 0.0
|
|
499
|
+
|
|
500
|
+
recommendations[table_name] = {
|
|
501
|
+
"strategy": strategy,
|
|
502
|
+
"row_count": row_count,
|
|
503
|
+
"estimated_seconds": estimated_seconds,
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
return recommendations
|
|
507
|
+
|
|
508
|
+
except psycopg.Error as e:
|
|
509
|
+
raise MigrationError(f"Failed to analyze tables in schema '{schema}': {e}") from e
|
|
510
|
+
|
|
511
|
+
def verify_migration(
|
|
512
|
+
self,
|
|
513
|
+
tables: list[str],
|
|
514
|
+
source_schema: str = "old_schema",
|
|
515
|
+
target_schema: str = "public",
|
|
516
|
+
) -> dict[str, dict[str, Any]]:
|
|
517
|
+
"""Verify migration completeness by comparing row counts.
|
|
518
|
+
|
|
519
|
+
This method compares row counts between source and target tables to ensure
|
|
520
|
+
data migration completed successfully. It's a critical verification step
|
|
521
|
+
before cutover to ensure no data loss.
|
|
522
|
+
|
|
523
|
+
Args:
|
|
524
|
+
tables: List of table names to verify
|
|
525
|
+
source_schema: Schema name containing source tables (default: "old_schema")
|
|
526
|
+
target_schema: Schema name containing target tables (default: "public")
|
|
527
|
+
|
|
528
|
+
Returns:
|
|
529
|
+
Dictionary mapping table names to verification results:
|
|
530
|
+
{
|
|
531
|
+
"table_name": {
|
|
532
|
+
"source_count": int,
|
|
533
|
+
"target_count": int,
|
|
534
|
+
"match": bool,
|
|
535
|
+
"difference": int (target - source, negative means missing rows)
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
Raises:
|
|
540
|
+
MigrationError: If verification queries fail
|
|
541
|
+
|
|
542
|
+
Example:
|
|
543
|
+
>>> migrator = SchemaToSchemaMigrator(...)
|
|
544
|
+
>>> results = migrator.verify_migration(["users", "posts"])
|
|
545
|
+
>>> for table, result in results.items():
|
|
546
|
+
... if not result["match"]:
|
|
547
|
+
... print(f"❌ {table}: {result['difference']} rows missing!")
|
|
548
|
+
... else:
|
|
549
|
+
... print(f"✅ {table}: {result['source_count']} rows verified")
|
|
550
|
+
"""
|
|
551
|
+
try:
|
|
552
|
+
verification_results = {}
|
|
553
|
+
|
|
554
|
+
with self.target_connection.cursor() as cursor:
|
|
555
|
+
for table_name in tables:
|
|
556
|
+
# Count rows in source table (via foreign schema)
|
|
557
|
+
cursor.execute(
|
|
558
|
+
sql.SQL("SELECT COUNT(*) FROM {schema}.{table}").format(
|
|
559
|
+
schema=sql.Identifier(source_schema),
|
|
560
|
+
table=sql.Identifier(table_name),
|
|
561
|
+
)
|
|
562
|
+
)
|
|
563
|
+
source_result = cursor.fetchone()
|
|
564
|
+
source_count = int(source_result[0]) if source_result else 0
|
|
565
|
+
|
|
566
|
+
# Count rows in target table
|
|
567
|
+
cursor.execute(
|
|
568
|
+
sql.SQL("SELECT COUNT(*) FROM {schema}.{table}").format(
|
|
569
|
+
schema=sql.Identifier(target_schema),
|
|
570
|
+
table=sql.Identifier(table_name),
|
|
571
|
+
)
|
|
572
|
+
)
|
|
573
|
+
target_result = cursor.fetchone()
|
|
574
|
+
target_count = int(target_result[0]) if target_result else 0
|
|
575
|
+
|
|
576
|
+
# Calculate difference and match status
|
|
577
|
+
difference = target_count - source_count
|
|
578
|
+
match = source_count == target_count
|
|
579
|
+
|
|
580
|
+
verification_results[table_name] = {
|
|
581
|
+
"source_count": source_count,
|
|
582
|
+
"target_count": target_count,
|
|
583
|
+
"match": match,
|
|
584
|
+
"difference": difference,
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
return verification_results
|
|
588
|
+
|
|
589
|
+
except psycopg.Error as e:
|
|
590
|
+
raise MigrationError(f"Failed to verify migration for tables {tables}: {e}") from e
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
"""Security utilities for Confiture.
|
|
2
|
+
|
|
3
|
+
This module provides security hardening features including:
|
|
4
|
+
- Input validation for SQL identifiers, paths, and configuration
|
|
5
|
+
- Secure logging with automatic secret redaction
|
|
6
|
+
- Defense-in-depth SQL safety checks
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from confiture.core.security.logging import SecureFormatter, configure_secure_logging
|
|
10
|
+
from confiture.core.security.validation import (
|
|
11
|
+
ValidationError,
|
|
12
|
+
sanitize_log_message,
|
|
13
|
+
validate_config,
|
|
14
|
+
validate_environment,
|
|
15
|
+
validate_identifier,
|
|
16
|
+
validate_path,
|
|
17
|
+
validate_sql,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
# Validation
|
|
22
|
+
"ValidationError",
|
|
23
|
+
"validate_identifier",
|
|
24
|
+
"validate_path",
|
|
25
|
+
"validate_environment",
|
|
26
|
+
"validate_sql",
|
|
27
|
+
"validate_config",
|
|
28
|
+
"sanitize_log_message",
|
|
29
|
+
# Logging
|
|
30
|
+
"SecureFormatter",
|
|
31
|
+
"configure_secure_logging",
|
|
32
|
+
]
|