fraiseql-confiture 0.3.4__cp311-cp311-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. confiture/__init__.py +48 -0
  2. confiture/_core.cp311-win_amd64.pyd +0 -0
  3. confiture/cli/__init__.py +0 -0
  4. confiture/cli/dry_run.py +116 -0
  5. confiture/cli/lint_formatter.py +193 -0
  6. confiture/cli/main.py +1656 -0
  7. confiture/config/__init__.py +0 -0
  8. confiture/config/environment.py +263 -0
  9. confiture/core/__init__.py +51 -0
  10. confiture/core/anonymization/__init__.py +0 -0
  11. confiture/core/anonymization/audit.py +485 -0
  12. confiture/core/anonymization/benchmarking.py +372 -0
  13. confiture/core/anonymization/breach_notification.py +652 -0
  14. confiture/core/anonymization/compliance.py +617 -0
  15. confiture/core/anonymization/composer.py +298 -0
  16. confiture/core/anonymization/data_subject_rights.py +669 -0
  17. confiture/core/anonymization/factory.py +319 -0
  18. confiture/core/anonymization/governance.py +737 -0
  19. confiture/core/anonymization/performance.py +1092 -0
  20. confiture/core/anonymization/profile.py +284 -0
  21. confiture/core/anonymization/registry.py +195 -0
  22. confiture/core/anonymization/security/kms_manager.py +547 -0
  23. confiture/core/anonymization/security/lineage.py +888 -0
  24. confiture/core/anonymization/security/token_store.py +686 -0
  25. confiture/core/anonymization/strategies/__init__.py +41 -0
  26. confiture/core/anonymization/strategies/address.py +359 -0
  27. confiture/core/anonymization/strategies/credit_card.py +374 -0
  28. confiture/core/anonymization/strategies/custom.py +161 -0
  29. confiture/core/anonymization/strategies/date.py +218 -0
  30. confiture/core/anonymization/strategies/differential_privacy.py +398 -0
  31. confiture/core/anonymization/strategies/email.py +141 -0
  32. confiture/core/anonymization/strategies/format_preserving_encryption.py +310 -0
  33. confiture/core/anonymization/strategies/hash.py +150 -0
  34. confiture/core/anonymization/strategies/ip_address.py +235 -0
  35. confiture/core/anonymization/strategies/masking_retention.py +252 -0
  36. confiture/core/anonymization/strategies/name.py +298 -0
  37. confiture/core/anonymization/strategies/phone.py +119 -0
  38. confiture/core/anonymization/strategies/preserve.py +85 -0
  39. confiture/core/anonymization/strategies/redact.py +101 -0
  40. confiture/core/anonymization/strategies/salted_hashing.py +322 -0
  41. confiture/core/anonymization/strategies/text_redaction.py +183 -0
  42. confiture/core/anonymization/strategies/tokenization.py +334 -0
  43. confiture/core/anonymization/strategy.py +241 -0
  44. confiture/core/anonymization/syncer_audit.py +357 -0
  45. confiture/core/blue_green.py +683 -0
  46. confiture/core/builder.py +500 -0
  47. confiture/core/checksum.py +358 -0
  48. confiture/core/connection.py +132 -0
  49. confiture/core/differ.py +522 -0
  50. confiture/core/drift.py +564 -0
  51. confiture/core/dry_run.py +182 -0
  52. confiture/core/health.py +313 -0
  53. confiture/core/hooks/__init__.py +87 -0
  54. confiture/core/hooks/base.py +232 -0
  55. confiture/core/hooks/context.py +146 -0
  56. confiture/core/hooks/execution_strategies.py +57 -0
  57. confiture/core/hooks/observability.py +220 -0
  58. confiture/core/hooks/phases.py +53 -0
  59. confiture/core/hooks/registry.py +295 -0
  60. confiture/core/large_tables.py +775 -0
  61. confiture/core/linting/__init__.py +70 -0
  62. confiture/core/linting/composer.py +192 -0
  63. confiture/core/linting/libraries/__init__.py +17 -0
  64. confiture/core/linting/libraries/gdpr.py +168 -0
  65. confiture/core/linting/libraries/general.py +184 -0
  66. confiture/core/linting/libraries/hipaa.py +144 -0
  67. confiture/core/linting/libraries/pci_dss.py +104 -0
  68. confiture/core/linting/libraries/sox.py +120 -0
  69. confiture/core/linting/schema_linter.py +491 -0
  70. confiture/core/linting/versioning.py +151 -0
  71. confiture/core/locking.py +389 -0
  72. confiture/core/migration_generator.py +298 -0
  73. confiture/core/migrator.py +793 -0
  74. confiture/core/observability/__init__.py +44 -0
  75. confiture/core/observability/audit.py +323 -0
  76. confiture/core/observability/logging.py +187 -0
  77. confiture/core/observability/metrics.py +174 -0
  78. confiture/core/observability/tracing.py +192 -0
  79. confiture/core/pg_version.py +418 -0
  80. confiture/core/pool.py +406 -0
  81. confiture/core/risk/__init__.py +39 -0
  82. confiture/core/risk/predictor.py +188 -0
  83. confiture/core/risk/scoring.py +248 -0
  84. confiture/core/rollback_generator.py +388 -0
  85. confiture/core/schema_analyzer.py +769 -0
  86. confiture/core/schema_to_schema.py +590 -0
  87. confiture/core/security/__init__.py +32 -0
  88. confiture/core/security/logging.py +201 -0
  89. confiture/core/security/validation.py +416 -0
  90. confiture/core/signals.py +371 -0
  91. confiture/core/syncer.py +540 -0
  92. confiture/exceptions.py +192 -0
  93. confiture/integrations/__init__.py +0 -0
  94. confiture/models/__init__.py +0 -0
  95. confiture/models/lint.py +193 -0
  96. confiture/models/migration.py +180 -0
  97. confiture/models/schema.py +203 -0
  98. confiture/scenarios/__init__.py +36 -0
  99. confiture/scenarios/compliance.py +586 -0
  100. confiture/scenarios/ecommerce.py +199 -0
  101. confiture/scenarios/financial.py +253 -0
  102. confiture/scenarios/healthcare.py +315 -0
  103. confiture/scenarios/multi_tenant.py +340 -0
  104. confiture/scenarios/saas.py +295 -0
  105. confiture/testing/FRAMEWORK_API.md +722 -0
  106. confiture/testing/__init__.py +38 -0
  107. confiture/testing/fixtures/__init__.py +11 -0
  108. confiture/testing/fixtures/data_validator.py +229 -0
  109. confiture/testing/fixtures/migration_runner.py +167 -0
  110. confiture/testing/fixtures/schema_snapshotter.py +352 -0
  111. confiture/testing/frameworks/__init__.py +10 -0
  112. confiture/testing/frameworks/mutation.py +587 -0
  113. confiture/testing/frameworks/performance.py +479 -0
  114. confiture/testing/utils/__init__.py +0 -0
  115. fraiseql_confiture-0.3.4.dist-info/METADATA +438 -0
  116. fraiseql_confiture-0.3.4.dist-info/RECORD +119 -0
  117. fraiseql_confiture-0.3.4.dist-info/WHEEL +4 -0
  118. fraiseql_confiture-0.3.4.dist-info/entry_points.txt +2 -0
  119. fraiseql_confiture-0.3.4.dist-info/licenses/LICENSE +21 -0
confiture/core/pool.py ADDED
@@ -0,0 +1,406 @@
1
+ """Connection pooling support for production workloads.
2
+
3
+ Provides connection pool management with:
4
+ - Configurable pool size (min/max connections)
5
+ - Connection health checks
6
+ - Statement timeout configuration
7
+ - Automatic reconnection
8
+ - PgBouncer awareness (transaction pooling mode)
9
+
10
+ Example:
11
+ >>> from confiture.core.pool import ConnectionPool, PoolConfig
12
+ >>> config = PoolConfig(min_size=2, max_size=10)
13
+ >>> pool = ConnectionPool(database_url="postgresql://localhost/mydb", config=config)
14
+ >>> with pool.connection() as conn:
15
+ ... with conn.cursor() as cur:
16
+ ... cur.execute("SELECT 1")
17
+ """
18
+
19
+ import logging
20
+ from collections.abc import Iterator
21
+ from contextlib import contextmanager
22
+ from dataclasses import dataclass, field
23
+ from typing import TYPE_CHECKING, Any
24
+
25
+ import psycopg
26
+ from psycopg_pool import ConnectionPool as PsycopgPool
27
+ from psycopg_pool import PoolTimeout
28
+
29
+ if TYPE_CHECKING:
30
+ pass
31
+
32
+ logger = logging.getLogger(__name__)
33
+
34
+
35
+ @dataclass
36
+ class PoolConfig:
37
+ """Configuration for connection pooling.
38
+
39
+ Attributes:
40
+ min_size: Minimum number of connections to maintain (default: 1)
41
+ max_size: Maximum number of connections allowed (default: 10)
42
+ timeout: Timeout in seconds to get a connection (default: 30.0)
43
+ max_idle: Maximum time a connection can be idle before being closed (default: 600.0)
44
+ max_lifetime: Maximum time a connection can exist before being recycled (default: 3600.0)
45
+ statement_timeout_ms: Default statement timeout in milliseconds (default: 0 = no timeout)
46
+ check_connection: Whether to check connection health before returning (default: True)
47
+ reconnect_timeout: Timeout for reconnection attempts (default: 300.0)
48
+ pgbouncer_mode: Enable PgBouncer compatibility mode (default: False)
49
+ When True, disables prepared statements and uses transaction pooling compatible settings
50
+
51
+ Example:
52
+ >>> config = PoolConfig(min_size=2, max_size=20, statement_timeout_ms=30000)
53
+ >>> config = PoolConfig(pgbouncer_mode=True) # For PgBouncer setups
54
+ """
55
+
56
+ min_size: int = 1
57
+ max_size: int = 10
58
+ timeout: float = 30.0
59
+ max_idle: float = 600.0
60
+ max_lifetime: float = 3600.0
61
+ statement_timeout_ms: int = 0 # 0 = no timeout
62
+ check_connection: bool = True
63
+ reconnect_timeout: float = 300.0
64
+ pgbouncer_mode: bool = False
65
+
66
+ def __post_init__(self) -> None:
67
+ """Validate configuration values."""
68
+ if self.min_size < 0:
69
+ raise ValueError("min_size must be >= 0")
70
+ if self.max_size < 1:
71
+ raise ValueError("max_size must be >= 1")
72
+ if self.min_size > self.max_size:
73
+ raise ValueError("min_size cannot exceed max_size")
74
+ if self.timeout <= 0:
75
+ raise ValueError("timeout must be > 0")
76
+ if self.statement_timeout_ms < 0:
77
+ raise ValueError("statement_timeout_ms must be >= 0")
78
+
79
+
80
+ @dataclass
81
+ class PoolStats:
82
+ """Statistics about the connection pool.
83
+
84
+ Attributes:
85
+ pool_size: Current number of connections in the pool
86
+ pool_available: Number of available (idle) connections
87
+ requests_waiting: Number of requests waiting for a connection
88
+ connections_used: Number of connections currently in use
89
+ """
90
+
91
+ pool_size: int
92
+ pool_available: int
93
+ requests_waiting: int
94
+ connections_used: int = field(init=False)
95
+
96
+ def __post_init__(self) -> None:
97
+ """Calculate derived statistics."""
98
+ self.connections_used = self.pool_size - self.pool_available
99
+
100
+
101
+ class PoolExhaustedError(Exception):
102
+ """Raised when connection pool is exhausted and timeout expires."""
103
+
104
+ pass
105
+
106
+
107
+ class ConnectionHealthError(Exception):
108
+ """Raised when connection health check fails."""
109
+
110
+ pass
111
+
112
+
113
+ class ConnectionPool:
114
+ """Managed connection pool for PostgreSQL.
115
+
116
+ Wraps psycopg_pool.ConnectionPool with additional features:
117
+ - Health checking
118
+ - Statement timeout configuration
119
+ - PgBouncer compatibility
120
+ - Graceful reconnection
121
+
122
+ Example:
123
+ >>> pool = ConnectionPool("postgresql://localhost/mydb")
124
+ >>> with pool.connection() as conn:
125
+ ... # Connection is automatically returned to pool after use
126
+ ... pass
127
+ >>> pool.close()
128
+
129
+ Example with configuration:
130
+ >>> config = PoolConfig(min_size=5, max_size=20)
131
+ >>> pool = ConnectionPool("postgresql://localhost/mydb", config=config)
132
+ >>> stats = pool.get_stats()
133
+ >>> print(f"Using {stats.connections_used} of {stats.pool_size} connections")
134
+ """
135
+
136
+ def __init__(
137
+ self,
138
+ database_url: str | None = None,
139
+ config: PoolConfig | None = None,
140
+ **connection_kwargs: Any,
141
+ ):
142
+ """Initialize connection pool.
143
+
144
+ Args:
145
+ database_url: PostgreSQL connection URL
146
+ config: Pool configuration (uses defaults if None)
147
+ **connection_kwargs: Additional arguments passed to psycopg.connect()
148
+ (host, port, dbname, user, password, etc.)
149
+
150
+ Raises:
151
+ ValueError: If neither database_url nor connection_kwargs provided
152
+ """
153
+ self.config = config or PoolConfig()
154
+ self._database_url = database_url
155
+ self._connection_kwargs = connection_kwargs
156
+ self._pool: PsycopgPool | None = None
157
+
158
+ # Build connection string
159
+ if database_url:
160
+ self._conninfo = database_url
161
+ elif connection_kwargs:
162
+ self._conninfo = self._build_conninfo(connection_kwargs)
163
+ else:
164
+ raise ValueError("Either database_url or connection_kwargs required")
165
+
166
+ self._initialize_pool()
167
+
168
+ def _build_conninfo(self, kwargs: dict[str, Any]) -> str:
169
+ """Build connection string from kwargs."""
170
+ parts = []
171
+ mapping = {
172
+ "host": "host",
173
+ "port": "port",
174
+ "dbname": "dbname",
175
+ "database": "dbname", # alias
176
+ "user": "user",
177
+ "password": "password",
178
+ }
179
+ for key, conninfo_key in mapping.items():
180
+ if key in kwargs:
181
+ value = kwargs[key]
182
+ # Escape single quotes in values
183
+ if isinstance(value, str) and "'" in value:
184
+ value = value.replace("'", "\\'")
185
+ parts.append(f"{conninfo_key}={value}")
186
+
187
+ return " ".join(parts)
188
+
189
+ def _initialize_pool(self) -> None:
190
+ """Initialize the underlying psycopg pool."""
191
+ # Configure connection options
192
+ kwargs: dict[str, Any] = {}
193
+
194
+ # For PgBouncer mode, disable prepared statements
195
+ if self.config.pgbouncer_mode:
196
+ kwargs["prepare_threshold"] = None
197
+ logger.info("PgBouncer mode enabled: prepared statements disabled")
198
+
199
+ self._pool = PsycopgPool(
200
+ conninfo=self._conninfo,
201
+ min_size=self.config.min_size,
202
+ max_size=self.config.max_size,
203
+ timeout=self.config.timeout,
204
+ max_idle=self.config.max_idle,
205
+ max_lifetime=self.config.max_lifetime,
206
+ check=PsycopgPool.check_connection if self.config.check_connection else None,
207
+ kwargs=kwargs if kwargs else None,
208
+ open=True,
209
+ )
210
+
211
+ logger.info(
212
+ f"Connection pool initialized: min={self.config.min_size}, max={self.config.max_size}"
213
+ )
214
+
215
+ @contextmanager
216
+ def connection(self, timeout: float | None = None) -> Iterator[psycopg.Connection]:
217
+ """Get a connection from the pool.
218
+
219
+ The connection is automatically returned to the pool when the context
220
+ manager exits. If an exception occurs, the connection is still returned
221
+ but may be discarded if it's in a bad state.
222
+
223
+ Args:
224
+ timeout: Override default timeout (seconds) for getting a connection
225
+
226
+ Yields:
227
+ PostgreSQL connection
228
+
229
+ Raises:
230
+ PoolExhaustedError: If no connection available within timeout
231
+ ConnectionHealthError: If connection fails health check
232
+
233
+ Example:
234
+ >>> with pool.connection() as conn:
235
+ ... with conn.cursor() as cur:
236
+ ... cur.execute("SELECT 1")
237
+ ... result = cur.fetchone()
238
+ """
239
+ if self._pool is None:
240
+ raise RuntimeError("Connection pool not initialized")
241
+
242
+ effective_timeout = timeout if timeout is not None else self.config.timeout
243
+
244
+ try:
245
+ with self._pool.connection(timeout=effective_timeout) as conn:
246
+ # Apply statement timeout if configured
247
+ if self.config.statement_timeout_ms > 0:
248
+ self._set_statement_timeout(conn, self.config.statement_timeout_ms)
249
+
250
+ yield conn
251
+
252
+ except PoolTimeout as e:
253
+ stats = self.get_stats()
254
+ raise PoolExhaustedError(
255
+ f"Connection pool exhausted after {effective_timeout}s. "
256
+ f"Pool stats: {stats.connections_used}/{stats.pool_size} in use, "
257
+ f"{stats.requests_waiting} waiting"
258
+ ) from e
259
+
260
+ def _set_statement_timeout(self, conn: psycopg.Connection, timeout_ms: int) -> None:
261
+ """Set statement timeout on connection.
262
+
263
+ Args:
264
+ conn: Database connection
265
+ timeout_ms: Timeout in milliseconds
266
+ """
267
+ try:
268
+ with conn.cursor() as cur:
269
+ cur.execute(f"SET statement_timeout = {timeout_ms}")
270
+ except psycopg.Error as e:
271
+ logger.warning(f"Failed to set statement timeout: {e}")
272
+
273
+ def get_stats(self) -> PoolStats:
274
+ """Get current pool statistics.
275
+
276
+ Returns:
277
+ PoolStats with current pool state
278
+
279
+ Example:
280
+ >>> stats = pool.get_stats()
281
+ >>> if stats.connections_used > stats.pool_size * 0.8:
282
+ ... print("Pool is running hot!")
283
+ """
284
+ if self._pool is None:
285
+ return PoolStats(pool_size=0, pool_available=0, requests_waiting=0)
286
+
287
+ return PoolStats(
288
+ pool_size=self._pool.get_stats().pool_size,
289
+ pool_available=self._pool.get_stats().pool_available,
290
+ requests_waiting=self._pool.get_stats().requests_waiting,
291
+ )
292
+
293
+ def check_health(self) -> bool:
294
+ """Check if pool is healthy by testing a connection.
295
+
296
+ Returns:
297
+ True if pool is healthy, False otherwise
298
+
299
+ Example:
300
+ >>> if not pool.check_health():
301
+ ... logger.error("Database connection pool unhealthy!")
302
+ """
303
+ try:
304
+ with self.connection(timeout=5.0) as conn, conn.cursor() as cur:
305
+ cur.execute("SELECT 1")
306
+ result = cur.fetchone()
307
+ return result is not None and result[0] == 1
308
+ except Exception as e:
309
+ logger.warning(f"Health check failed: {e}")
310
+ return False
311
+
312
+ def resize(self, min_size: int | None = None, max_size: int | None = None) -> None:
313
+ """Resize the connection pool.
314
+
315
+ Args:
316
+ min_size: New minimum size (or None to keep current)
317
+ max_size: New maximum size (or None to keep current)
318
+
319
+ Example:
320
+ >>> pool.resize(min_size=5, max_size=50) # Scale up
321
+ """
322
+ if self._pool is None:
323
+ return
324
+
325
+ new_min = min_size if min_size is not None else self.config.min_size
326
+ new_max = max_size if max_size is not None else self.config.max_size
327
+
328
+ if new_min > new_max:
329
+ raise ValueError("min_size cannot exceed max_size")
330
+
331
+ self._pool.resize(min_size=new_min, max_size=new_max)
332
+ self.config.min_size = new_min
333
+ self.config.max_size = new_max
334
+
335
+ logger.info(f"Pool resized: min={new_min}, max={new_max}")
336
+
337
+ def close(self) -> None:
338
+ """Close the connection pool.
339
+
340
+ Waits for all connections to be returned, then closes them.
341
+ Call this during application shutdown.
342
+
343
+ Example:
344
+ >>> try:
345
+ ... # Use pool
346
+ ... finally:
347
+ ... pool.close()
348
+ """
349
+ if self._pool is not None:
350
+ self._pool.close()
351
+ self._pool = None
352
+ logger.info("Connection pool closed")
353
+
354
+ def __enter__(self) -> "ConnectionPool":
355
+ """Support using pool as context manager."""
356
+ return self
357
+
358
+ def __exit__(self, *args: Any) -> None:
359
+ """Close pool when exiting context."""
360
+ self.close()
361
+
362
+
363
+ def create_pool_from_config(config: dict[str, Any]) -> ConnectionPool:
364
+ """Create a connection pool from configuration dictionary.
365
+
366
+ Args:
367
+ config: Configuration with 'database' and optional 'pool' sections
368
+
369
+ Returns:
370
+ Configured ConnectionPool
371
+
372
+ Example:
373
+ >>> config = {
374
+ ... "database_url": "postgresql://localhost/mydb",
375
+ ... "pool": {"min_size": 2, "max_size": 20}
376
+ ... }
377
+ >>> pool = create_pool_from_config(config)
378
+ """
379
+ # Get pool configuration
380
+ pool_config_dict = config.get("pool", {})
381
+ pool_config = PoolConfig(
382
+ min_size=pool_config_dict.get("min_size", 1),
383
+ max_size=pool_config_dict.get("max_size", 10),
384
+ timeout=pool_config_dict.get("timeout", 30.0),
385
+ max_idle=pool_config_dict.get("max_idle", 600.0),
386
+ max_lifetime=pool_config_dict.get("max_lifetime", 3600.0),
387
+ statement_timeout_ms=pool_config_dict.get("statement_timeout_ms", 0),
388
+ check_connection=pool_config_dict.get("check_connection", True),
389
+ pgbouncer_mode=pool_config_dict.get("pgbouncer_mode", False),
390
+ )
391
+
392
+ # Get database connection info
393
+ database_url = config.get("database_url")
394
+ if database_url:
395
+ return ConnectionPool(database_url=database_url, config=pool_config)
396
+
397
+ # Fall back to database section
398
+ db_config = config.get("database", {})
399
+ return ConnectionPool(
400
+ config=pool_config,
401
+ host=db_config.get("host", "localhost"),
402
+ port=db_config.get("port", 5432),
403
+ dbname=db_config.get("database", "postgres"),
404
+ user=db_config.get("user", "postgres"),
405
+ password=db_config.get("password", ""),
406
+ )
@@ -0,0 +1,39 @@
1
+ """Advanced Risk Assessment System.
2
+
3
+ Provides:
4
+ - Transparent risk scoring formula with explicit weights
5
+ - Downtime predictions with confidence bounds
6
+ - Historical migration tracking
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from .predictor import (
12
+ DowntimeEstimate,
13
+ DowntimePredictor,
14
+ HistoricalMigration,
15
+ HistoricalMigrations,
16
+ MigrationOperation,
17
+ )
18
+ from .scoring import (
19
+ DataAnomaly,
20
+ RiskFactor,
21
+ RiskLevel,
22
+ RiskScoringFormula,
23
+ Severity,
24
+ )
25
+
26
+ __all__ = [
27
+ # Scoring
28
+ "RiskLevel",
29
+ "Severity",
30
+ "DataAnomaly",
31
+ "RiskFactor",
32
+ "RiskScoringFormula",
33
+ # Prediction
34
+ "DowntimePredictor",
35
+ "DowntimeEstimate",
36
+ "MigrationOperation",
37
+ "HistoricalMigrations",
38
+ "HistoricalMigration",
39
+ ]
@@ -0,0 +1,188 @@
1
+ """Downtime prediction with confidence bounds."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import statistics
7
+ from dataclasses import dataclass, field
8
+ from typing import Any
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ @dataclass
14
+ class MigrationOperation:
15
+ """Represents a migration operation."""
16
+
17
+ id: str
18
+ type: str # "ADD_COLUMN", "ALTER_TYPE", etc.
19
+ table_size_mb: int
20
+ table_name: str
21
+
22
+
23
+ @dataclass
24
+ class HistoricalMigration:
25
+ """Historical migration record."""
26
+
27
+ operation_type: str
28
+ table_size_mb: int
29
+ actual_downtime_ms: int
30
+ recorded_at: str = ""
31
+
32
+
33
+ @dataclass
34
+ class DowntimeEstimate:
35
+ """Downtime estimate with explicit uncertainty."""
36
+
37
+ estimated_downtime_ms: int # Point estimate
38
+ lower_bound_ms: int # 80% confidence lower
39
+ upper_bound_ms: int # 80% confidence upper
40
+ confidence_level: float # 0.0-1.0
41
+ estimate_method: str # "heuristic", "historical"
42
+ contributing_factors: dict[str, Any] = field(default_factory=dict)
43
+ caveats: list[str] = field(default_factory=list)
44
+
45
+
46
+ class HistoricalMigrations:
47
+ """Manage historical migration data."""
48
+
49
+ def __init__(self):
50
+ self.migrations: list[HistoricalMigration] = []
51
+
52
+ def add(self, migration: HistoricalMigration) -> None:
53
+ """Add migration record."""
54
+ self.migrations.append(migration)
55
+
56
+ def find_similar(
57
+ self,
58
+ table_size_mb: int,
59
+ operation_type: str,
60
+ max_results: int = 10,
61
+ ) -> list[HistoricalMigration]:
62
+ """Find similar past migrations."""
63
+ similar = [
64
+ m
65
+ for m in self.migrations
66
+ if m.operation_type == operation_type
67
+ and abs(m.table_size_mb - table_size_mb) / max(table_size_mb, 1) < 0.2
68
+ ]
69
+ return similar[:max_results]
70
+
71
+
72
+ class DowntimePredictor:
73
+ """Predict migration downtime with confidence."""
74
+
75
+ def __init__(self, historical_data: HistoricalMigrations | None = None):
76
+ self.historical_data = historical_data
77
+ self.prediction_method = "historical" if historical_data else "heuristic"
78
+
79
+ async def predict_downtime(
80
+ self,
81
+ operation: MigrationOperation,
82
+ ) -> DowntimeEstimate:
83
+ """Predict downtime with confidence intervals."""
84
+
85
+ if self.prediction_method == "historical":
86
+ return await self._predict_from_history(operation)
87
+ else:
88
+ return await self._predict_heuristic(operation)
89
+
90
+ async def _predict_from_history(
91
+ self,
92
+ operation: MigrationOperation,
93
+ ) -> DowntimeEstimate:
94
+ """Use historical data to predict downtime."""
95
+
96
+ # Find similar past migrations
97
+ similar = self.historical_data.find_similar(
98
+ table_size_mb=operation.table_size_mb,
99
+ operation_type=operation.type,
100
+ max_results=10,
101
+ )
102
+
103
+ if not similar:
104
+ # Fall back to heuristic
105
+ return await self._predict_heuristic(operation)
106
+
107
+ actual_downtimes = [m.actual_downtime_ms for m in similar]
108
+
109
+ mean = statistics.mean(actual_downtimes)
110
+ stdev = statistics.stdev(actual_downtimes) if len(actual_downtimes) > 1 else 0
111
+
112
+ return DowntimeEstimate(
113
+ estimated_downtime_ms=int(mean),
114
+ lower_bound_ms=max(0, int(mean - 2 * stdev)),
115
+ upper_bound_ms=int(mean + 2 * stdev),
116
+ confidence_level=1.0 - (stdev / mean) if mean > 0 else 0.5,
117
+ estimate_method="historical",
118
+ contributing_factors={
119
+ "similar_migrations": len(similar),
120
+ "average_actual_downtime_ms": int(mean),
121
+ "std_deviation_ms": int(stdev),
122
+ },
123
+ caveats=[
124
+ f"Based on {len(similar)} similar migrations",
125
+ f"Standard deviation: {stdev:.0f}ms",
126
+ "System load on current date may differ",
127
+ "Database statistics may have changed",
128
+ ],
129
+ )
130
+
131
+ async def _predict_heuristic(
132
+ self,
133
+ operation: MigrationOperation,
134
+ ) -> DowntimeEstimate:
135
+ """Heuristic prediction (no historical data)."""
136
+
137
+ # Base times in milliseconds
138
+ base_time_ms = {
139
+ "ADD_COLUMN": 100,
140
+ "DROP_COLUMN": 100,
141
+ "RENAME_COLUMN": 50,
142
+ "ALTER_TYPE": 500,
143
+ "ADD_INDEX": 50,
144
+ "DROP_INDEX": 20,
145
+ "ADD_CONSTRAINT": 200,
146
+ "DROP_CONSTRAINT": 50,
147
+ }.get(operation.type, 100)
148
+
149
+ # Adjust by table size (size in GB)
150
+ size_gb = operation.table_size_mb / 1024
151
+
152
+ # Different operation types scale differently
153
+ if operation.type == "ALTER_TYPE":
154
+ # Full table rewrite - 2ms per GB
155
+ size_adjustment = int(size_gb * 2000)
156
+ elif operation.type == "ADD_INDEX":
157
+ # Index build - 0.5ms per GB
158
+ size_adjustment = int(size_gb * 500)
159
+ else:
160
+ # Most operations - 1ms per GB
161
+ size_adjustment = int(size_gb * 1000)
162
+
163
+ estimated = base_time_ms + size_adjustment
164
+
165
+ # High uncertainty for heuristic
166
+ return DowntimeEstimate(
167
+ estimated_downtime_ms=estimated,
168
+ lower_bound_ms=max(0, int(estimated * 0.5)), # -50%
169
+ upper_bound_ms=int(estimated * 2.0), # +100%
170
+ confidence_level=0.3, # Low confidence (heuristic only)
171
+ estimate_method="heuristic",
172
+ contributing_factors={
173
+ "base_time_ms": base_time_ms,
174
+ "size_adjustment_ms": size_adjustment,
175
+ "table_size_mb": operation.table_size_mb,
176
+ },
177
+ caveats=[
178
+ "⚠️ HEURISTIC ESTIMATE - Low confidence (0.3/1.0)",
179
+ "No historical data available for calibration",
180
+ "Actual downtime depends on:",
181
+ " - System load and concurrent queries",
182
+ " - Database configuration (work_mem, etc.)",
183
+ " - Lock contention from other operations",
184
+ " - Hardware capabilities (SSD vs HDD)",
185
+ "RECOMMENDATION: Record actual downtime to improve predictions",
186
+ "Next prediction will be more accurate if historical data collected",
187
+ ],
188
+ )