duckguard 2.0.0__py3-none-any.whl → 2.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. duckguard/__init__.py +55 -28
  2. duckguard/anomaly/__init__.py +29 -1
  3. duckguard/anomaly/baselines.py +294 -0
  4. duckguard/anomaly/detector.py +1 -5
  5. duckguard/anomaly/methods.py +17 -5
  6. duckguard/anomaly/ml_methods.py +724 -0
  7. duckguard/cli/main.py +561 -56
  8. duckguard/connectors/__init__.py +2 -2
  9. duckguard/connectors/bigquery.py +1 -1
  10. duckguard/connectors/databricks.py +1 -1
  11. duckguard/connectors/factory.py +2 -3
  12. duckguard/connectors/files.py +1 -1
  13. duckguard/connectors/kafka.py +2 -2
  14. duckguard/connectors/mongodb.py +1 -1
  15. duckguard/connectors/mysql.py +1 -1
  16. duckguard/connectors/oracle.py +1 -1
  17. duckguard/connectors/postgres.py +1 -2
  18. duckguard/connectors/redshift.py +1 -1
  19. duckguard/connectors/snowflake.py +1 -2
  20. duckguard/connectors/sqlite.py +1 -1
  21. duckguard/connectors/sqlserver.py +10 -13
  22. duckguard/contracts/__init__.py +6 -6
  23. duckguard/contracts/diff.py +1 -1
  24. duckguard/contracts/generator.py +5 -6
  25. duckguard/contracts/loader.py +4 -4
  26. duckguard/contracts/validator.py +3 -4
  27. duckguard/core/__init__.py +3 -3
  28. duckguard/core/column.py +588 -5
  29. duckguard/core/dataset.py +708 -3
  30. duckguard/core/result.py +328 -1
  31. duckguard/core/scoring.py +1 -2
  32. duckguard/errors.py +362 -0
  33. duckguard/freshness/__init__.py +33 -0
  34. duckguard/freshness/monitor.py +429 -0
  35. duckguard/history/__init__.py +44 -0
  36. duckguard/history/schema.py +301 -0
  37. duckguard/history/storage.py +479 -0
  38. duckguard/history/trends.py +348 -0
  39. duckguard/integrations/__init__.py +31 -0
  40. duckguard/integrations/airflow.py +387 -0
  41. duckguard/integrations/dbt.py +458 -0
  42. duckguard/notifications/__init__.py +61 -0
  43. duckguard/notifications/email.py +508 -0
  44. duckguard/notifications/formatter.py +118 -0
  45. duckguard/notifications/notifiers.py +357 -0
  46. duckguard/profiler/auto_profile.py +3 -3
  47. duckguard/pytest_plugin/__init__.py +1 -1
  48. duckguard/pytest_plugin/plugin.py +1 -1
  49. duckguard/reporting/console.py +2 -2
  50. duckguard/reports/__init__.py +42 -0
  51. duckguard/reports/html_reporter.py +514 -0
  52. duckguard/reports/pdf_reporter.py +114 -0
  53. duckguard/rules/__init__.py +3 -3
  54. duckguard/rules/executor.py +3 -4
  55. duckguard/rules/generator.py +8 -5
  56. duckguard/rules/loader.py +5 -5
  57. duckguard/rules/schema.py +23 -0
  58. duckguard/schema_history/__init__.py +40 -0
  59. duckguard/schema_history/analyzer.py +414 -0
  60. duckguard/schema_history/tracker.py +288 -0
  61. duckguard/semantic/__init__.py +1 -1
  62. duckguard/semantic/analyzer.py +0 -2
  63. duckguard/semantic/detector.py +17 -1
  64. duckguard/semantic/validators.py +2 -1
  65. duckguard-2.3.0.dist-info/METADATA +953 -0
  66. duckguard-2.3.0.dist-info/RECORD +77 -0
  67. duckguard-2.0.0.dist-info/METADATA +0 -221
  68. duckguard-2.0.0.dist-info/RECORD +0 -55
  69. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/WHEEL +0 -0
  70. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/entry_points.txt +0 -0
  71. {duckguard-2.0.0.dist-info → duckguard-2.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -10,14 +10,15 @@ from typing import Any
10
10
 
11
11
  import yaml
12
12
 
13
- from duckguard.core.dataset import Dataset
14
13
  from duckguard.connectors import connect
14
+ from duckguard.core.dataset import Dataset
15
15
  from duckguard.rules.schema import (
16
- RuleSet,
17
- ColumnRules,
16
+ BUILTIN_PATTERNS,
17
+ CASE_SENSITIVE_PATTERNS,
18
18
  Check,
19
19
  CheckType,
20
- BUILTIN_PATTERNS,
20
+ ColumnRules,
21
+ RuleSet,
21
22
  )
22
23
 
23
24
 
@@ -215,9 +216,11 @@ class RuleGenerator:
215
216
 
216
217
  for pattern_name, pattern in self._patterns.items():
217
218
  try:
219
+ # Use case-sensitive matching for certain patterns (slug, identifier)
220
+ flags = 0 if pattern_name in CASE_SENSITIVE_PATTERNS else re.IGNORECASE
218
221
  matches = sum(
219
222
  1 for v in sample
220
- if re.match(pattern, str(v), re.IGNORECASE)
223
+ if re.match(pattern, str(v), flags)
221
224
  )
222
225
  match_rate = matches / len(sample)
223
226
 
duckguard/rules/loader.py CHANGED
@@ -35,13 +35,13 @@ from typing import Any
35
35
  import yaml
36
36
 
37
37
  from duckguard.rules.schema import (
38
- RuleSet,
39
- ColumnRules,
40
- TableRules,
38
+ BUILTIN_PATTERNS,
41
39
  Check,
42
40
  CheckType,
41
+ ColumnRules,
42
+ RuleSet,
43
43
  Severity,
44
- BUILTIN_PATTERNS,
44
+ TableRules,
45
45
  )
46
46
 
47
47
 
@@ -71,7 +71,7 @@ def load_rules(path: str | Path) -> RuleSet:
71
71
  if not path.exists():
72
72
  raise FileNotFoundError(f"Rules file not found: {path}")
73
73
 
74
- with open(path, "r", encoding="utf-8") as f:
74
+ with open(path, encoding="utf-8") as f:
75
75
  content = f.read()
76
76
 
77
77
  return load_rules_from_string(content, source_file=str(path))
duckguard/rules/schema.py CHANGED
@@ -60,6 +60,12 @@ class CheckType(Enum):
60
60
  # Custom SQL
61
61
  CUSTOM_SQL = "custom_sql"
62
62
 
63
+ # Cross-dataset / Reference checks
64
+ EXISTS_IN = "exists_in" # FK check: all values exist in reference
65
+ REFERENCES = "references" # FK check with options (allow_nulls, etc.)
66
+ MATCHES_VALUES = "matches_values" # Column values match between datasets
67
+ ROW_COUNT_MATCHES = "row_count_matches" # Row counts match between datasets
68
+
63
69
 
64
70
  class Severity(Enum):
65
71
  """Severity levels for rule violations."""
@@ -136,6 +142,19 @@ class Check:
136
142
  return f"{col} matches '{self.value}'" if col else f"matches '{self.value}'"
137
143
  elif self.type == CheckType.ALLOWED_VALUES or self.type == CheckType.ISIN:
138
144
  return f"{col} in {self.value}" if col else f"in {self.value}"
145
+ elif self.type == CheckType.EXISTS_IN:
146
+ ref = self.params.get("dataset", "?") + "." + self.params.get("column", "?")
147
+ return f"{col} exists in {ref}" if col else f"exists in {ref}"
148
+ elif self.type == CheckType.REFERENCES:
149
+ ref = self.params.get("dataset", "?") + "." + self.params.get("column", "?")
150
+ return f"{col} references {ref}" if col else f"references {ref}"
151
+ elif self.type == CheckType.MATCHES_VALUES:
152
+ ref = self.params.get("dataset", "?") + "." + self.params.get("column", "?")
153
+ return f"{col} matches values in {ref}" if col else f"matches values in {ref}"
154
+ elif self.type == CheckType.ROW_COUNT_MATCHES:
155
+ ref = self.params.get("dataset", "?")
156
+ tolerance = self.params.get("tolerance", 0)
157
+ return f"row_count matches {ref} (tolerance: {tolerance})"
139
158
 
140
159
  # Fallback
141
160
  if col:
@@ -282,8 +301,12 @@ BUILTIN_PATTERNS = {
282
301
  "ssn": r"^\d{3}-\d{2}-\d{4}$",
283
302
  "zip_us": r"^\d{5}(-\d{4})?$",
284
303
  "credit_card": r"^\d{4}[\s\-]?\d{4}[\s\-]?\d{4}[\s\-]?\d{4}$",
304
+ "identifier": r"^[A-Z][A-Z0-9]*[-_]?\d+$|^[A-Z]{2,}[-_][A-Z0-9]+$",
285
305
  "slug": r"^[a-z0-9]+(?:-[a-z0-9]+)*$",
286
306
  "alpha": r"^[a-zA-Z]+$",
287
307
  "alphanumeric": r"^[a-zA-Z0-9]+$",
288
308
  "numeric": r"^-?\d+\.?\d*$",
289
309
  }
310
+
311
+ # Patterns that must be matched case-sensitively
312
+ CASE_SENSITIVE_PATTERNS = {"slug", "identifier"}
@@ -0,0 +1,40 @@
1
+ """Schema evolution tracking for DuckGuard.
2
+
3
+ This module provides functionality to track schema changes over time,
4
+ enabling detection of breaking changes and schema drift.
5
+
6
+ Usage:
7
+ from duckguard.schema_history import SchemaTracker, SchemaChangeAnalyzer
8
+
9
+ # Track schema
10
+ tracker = SchemaTracker()
11
+ snapshot = tracker.capture(dataset)
12
+
13
+ # Detect changes
14
+ analyzer = SchemaChangeAnalyzer()
15
+ report = analyzer.detect_changes(dataset)
16
+ if report.has_breaking_changes:
17
+ print("Breaking changes detected!")
18
+ """
19
+
20
+ from duckguard.schema_history.analyzer import (
21
+ SchemaChange,
22
+ SchemaChangeAnalyzer,
23
+ SchemaEvolutionReport,
24
+ )
25
+ from duckguard.schema_history.tracker import (
26
+ ColumnSchema,
27
+ SchemaSnapshot,
28
+ SchemaTracker,
29
+ )
30
+
31
+ __all__ = [
32
+ # Tracker
33
+ "SchemaTracker",
34
+ "SchemaSnapshot",
35
+ "ColumnSchema",
36
+ # Analyzer
37
+ "SchemaChangeAnalyzer",
38
+ "SchemaChange",
39
+ "SchemaEvolutionReport",
40
+ ]
@@ -0,0 +1,414 @@
1
+ """Schema change analysis implementation.
2
+
3
+ Provides functionality to detect and analyze schema changes between snapshots.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass, field
9
+ from datetime import datetime
10
+ from enum import Enum
11
+ from typing import TYPE_CHECKING, Any
12
+
13
+ from duckguard.history.schema import QUERIES
14
+ from duckguard.history.storage import HistoryStorage
15
+ from duckguard.schema_history.tracker import SchemaSnapshot, SchemaTracker
16
+
17
+ if TYPE_CHECKING:
18
+ from duckguard.core.dataset import Dataset
19
+
20
+
21
+ class ChangeType(str, Enum):
22
+ """Types of schema changes."""
23
+
24
+ COLUMN_ADDED = "column_added"
25
+ COLUMN_REMOVED = "column_removed"
26
+ TYPE_CHANGED = "type_changed"
27
+ NULLABLE_CHANGED = "nullable_changed"
28
+ POSITION_CHANGED = "position_changed"
29
+
30
+
31
+ class ChangeSeverity(str, Enum):
32
+ """Severity levels for schema changes."""
33
+
34
+ INFO = "info"
35
+ WARNING = "warning"
36
+ CRITICAL = "critical"
37
+
38
+
39
+ @dataclass
40
+ class SchemaChange:
41
+ """Represents a single schema change.
42
+
43
+ Attributes:
44
+ change_type: Type of change
45
+ column_name: Name of affected column (None for table-level changes)
46
+ previous_value: Previous value (type, nullable, etc.)
47
+ current_value: Current value
48
+ is_breaking: Whether this is a breaking change
49
+ severity: Change severity level
50
+ """
51
+
52
+ change_type: ChangeType
53
+ column_name: str | None
54
+ previous_value: str | None
55
+ current_value: str | None
56
+ is_breaking: bool
57
+ severity: ChangeSeverity
58
+
59
+ def to_dict(self) -> dict[str, Any]:
60
+ """Convert to dictionary."""
61
+ return {
62
+ "change_type": self.change_type.value,
63
+ "column_name": self.column_name,
64
+ "previous_value": self.previous_value,
65
+ "current_value": self.current_value,
66
+ "is_breaking": self.is_breaking,
67
+ "severity": self.severity.value,
68
+ }
69
+
70
+ def __str__(self) -> str:
71
+ """Human-readable string representation."""
72
+ if self.change_type == ChangeType.COLUMN_ADDED:
73
+ return f"Column '{self.column_name}' added (type: {self.current_value})"
74
+ elif self.change_type == ChangeType.COLUMN_REMOVED:
75
+ return f"Column '{self.column_name}' removed (was type: {self.previous_value})"
76
+ elif self.change_type == ChangeType.TYPE_CHANGED:
77
+ return f"Column '{self.column_name}' type changed: {self.previous_value} -> {self.current_value}"
78
+ elif self.change_type == ChangeType.NULLABLE_CHANGED:
79
+ return f"Column '{self.column_name}' nullable changed: {self.previous_value} -> {self.current_value}"
80
+ elif self.change_type == ChangeType.POSITION_CHANGED:
81
+ return f"Column '{self.column_name}' position changed: {self.previous_value} -> {self.current_value}"
82
+ return f"{self.change_type.value}: {self.column_name}"
83
+
84
+
85
+ @dataclass
86
+ class SchemaEvolutionReport:
87
+ """Report of schema changes between snapshots.
88
+
89
+ Attributes:
90
+ source: Data source path
91
+ previous_snapshot: Previous schema snapshot (None if first)
92
+ current_snapshot: Current schema snapshot
93
+ changes: List of detected changes
94
+ analyzed_at: When the analysis was performed
95
+ """
96
+
97
+ source: str
98
+ previous_snapshot: SchemaSnapshot | None
99
+ current_snapshot: SchemaSnapshot
100
+ changes: list[SchemaChange] = field(default_factory=list)
101
+ analyzed_at: datetime = field(default_factory=datetime.now)
102
+
103
+ @property
104
+ def has_changes(self) -> bool:
105
+ """Check if any changes were detected."""
106
+ return len(self.changes) > 0
107
+
108
+ @property
109
+ def has_breaking_changes(self) -> bool:
110
+ """Check if any breaking changes were detected."""
111
+ return any(c.is_breaking for c in self.changes)
112
+
113
+ @property
114
+ def breaking_changes(self) -> list[SchemaChange]:
115
+ """Get only breaking changes."""
116
+ return [c for c in self.changes if c.is_breaking]
117
+
118
+ @property
119
+ def non_breaking_changes(self) -> list[SchemaChange]:
120
+ """Get only non-breaking changes."""
121
+ return [c for c in self.changes if not c.is_breaking]
122
+
123
+ def summary(self) -> str:
124
+ """Generate a human-readable summary."""
125
+ lines = [f"Schema Evolution Report for: {self.source}"]
126
+ lines.append(f"Analyzed at: {self.analyzed_at.strftime('%Y-%m-%d %H:%M:%S')}")
127
+ lines.append("")
128
+
129
+ if not self.has_changes:
130
+ lines.append("No schema changes detected.")
131
+ return "\n".join(lines)
132
+
133
+ lines.append(f"Total changes: {len(self.changes)}")
134
+ lines.append(f"Breaking changes: {len(self.breaking_changes)}")
135
+ lines.append("")
136
+
137
+ # Group by type
138
+ by_type: dict[ChangeType, list[SchemaChange]] = {}
139
+ for change in self.changes:
140
+ by_type.setdefault(change.change_type, []).append(change)
141
+
142
+ for change_type, type_changes in by_type.items():
143
+ lines.append(f"{change_type.value.replace('_', ' ').title()}:")
144
+ for change in type_changes:
145
+ marker = "[BREAKING]" if change.is_breaking else ""
146
+ lines.append(f" - {change} {marker}")
147
+
148
+ return "\n".join(lines)
149
+
150
+ def to_dict(self) -> dict[str, Any]:
151
+ """Convert to dictionary."""
152
+ return {
153
+ "source": self.source,
154
+ "previous_snapshot_id": self.previous_snapshot.snapshot_id if self.previous_snapshot else None,
155
+ "current_snapshot_id": self.current_snapshot.snapshot_id,
156
+ "has_changes": self.has_changes,
157
+ "has_breaking_changes": self.has_breaking_changes,
158
+ "total_changes": len(self.changes),
159
+ "breaking_changes_count": len(self.breaking_changes),
160
+ "changes": [c.to_dict() for c in self.changes],
161
+ "analyzed_at": self.analyzed_at.isoformat(),
162
+ }
163
+
164
+
165
+ class SchemaChangeAnalyzer:
166
+ """Analyze schema changes between snapshots.
167
+
168
+ Usage:
169
+ from duckguard import connect
170
+ from duckguard.schema_history import SchemaChangeAnalyzer
171
+
172
+ analyzer = SchemaChangeAnalyzer()
173
+ data = connect("data.csv")
174
+
175
+ # Detect changes (captures snapshot and compares to previous)
176
+ report = analyzer.detect_changes(data)
177
+ if report.has_breaking_changes:
178
+ print("Breaking changes detected!")
179
+ for change in report.breaking_changes:
180
+ print(f" - {change}")
181
+
182
+ # Compare two specific snapshots
183
+ changes = analyzer.compare(snapshot1, snapshot2)
184
+ """
185
+
186
+ # Type changes that are typically safe (widening)
187
+ SAFE_TYPE_CHANGES = {
188
+ ("INTEGER", "BIGINT"),
189
+ ("FLOAT", "DOUBLE"),
190
+ ("VARCHAR", "TEXT"),
191
+ ("SMALLINT", "INTEGER"),
192
+ ("SMALLINT", "BIGINT"),
193
+ ("INTEGER", "DOUBLE"),
194
+ ("FLOAT", "DECIMAL"),
195
+ }
196
+
197
+ def __init__(self, storage: HistoryStorage | None = None):
198
+ """Initialize schema change analyzer.
199
+
200
+ Args:
201
+ storage: Optional HistoryStorage instance. Uses default if not provided.
202
+ """
203
+ self._storage = storage or HistoryStorage()
204
+ self._tracker = SchemaTracker(self._storage)
205
+
206
+ @property
207
+ def storage(self) -> HistoryStorage:
208
+ """Get the underlying storage."""
209
+ return self._storage
210
+
211
+ def compare(
212
+ self,
213
+ previous: SchemaSnapshot,
214
+ current: SchemaSnapshot,
215
+ ) -> list[SchemaChange]:
216
+ """Compare two schema snapshots and return changes.
217
+
218
+ Args:
219
+ previous: Previous schema snapshot
220
+ current: Current schema snapshot
221
+
222
+ Returns:
223
+ List of SchemaChange objects
224
+ """
225
+ changes: list[SchemaChange] = []
226
+
227
+ prev_cols = {c.name: c for c in previous.columns}
228
+ curr_cols = {c.name: c for c in current.columns}
229
+
230
+ prev_names = set(prev_cols.keys())
231
+ curr_names = set(curr_cols.keys())
232
+
233
+ # Detect removed columns (breaking change)
234
+ for name in prev_names - curr_names:
235
+ col = prev_cols[name]
236
+ changes.append(SchemaChange(
237
+ change_type=ChangeType.COLUMN_REMOVED,
238
+ column_name=name,
239
+ previous_value=col.dtype,
240
+ current_value=None,
241
+ is_breaking=True,
242
+ severity=ChangeSeverity.CRITICAL,
243
+ ))
244
+
245
+ # Detect added columns (usually not breaking)
246
+ for name in curr_names - prev_names:
247
+ col = curr_cols[name]
248
+ # Adding a non-nullable column without default is breaking
249
+ is_breaking = not col.nullable
250
+ changes.append(SchemaChange(
251
+ change_type=ChangeType.COLUMN_ADDED,
252
+ column_name=name,
253
+ previous_value=None,
254
+ current_value=col.dtype,
255
+ is_breaking=is_breaking,
256
+ severity=ChangeSeverity.WARNING if is_breaking else ChangeSeverity.INFO,
257
+ ))
258
+
259
+ # Detect changes to existing columns
260
+ for name in prev_names & curr_names:
261
+ prev_col = prev_cols[name]
262
+ curr_col = curr_cols[name]
263
+
264
+ # Type change
265
+ if prev_col.dtype != curr_col.dtype:
266
+ is_breaking = not self._is_safe_type_change(prev_col.dtype, curr_col.dtype)
267
+ changes.append(SchemaChange(
268
+ change_type=ChangeType.TYPE_CHANGED,
269
+ column_name=name,
270
+ previous_value=prev_col.dtype,
271
+ current_value=curr_col.dtype,
272
+ is_breaking=is_breaking,
273
+ severity=ChangeSeverity.CRITICAL if is_breaking else ChangeSeverity.WARNING,
274
+ ))
275
+
276
+ # Nullable change
277
+ if prev_col.nullable != curr_col.nullable:
278
+ # Changing from nullable to non-nullable is breaking
279
+ is_breaking = prev_col.nullable and not curr_col.nullable
280
+ changes.append(SchemaChange(
281
+ change_type=ChangeType.NULLABLE_CHANGED,
282
+ column_name=name,
283
+ previous_value=str(prev_col.nullable),
284
+ current_value=str(curr_col.nullable),
285
+ is_breaking=is_breaking,
286
+ severity=ChangeSeverity.WARNING if is_breaking else ChangeSeverity.INFO,
287
+ ))
288
+
289
+ # Position change (usually not breaking, just informational)
290
+ if prev_col.position != curr_col.position:
291
+ changes.append(SchemaChange(
292
+ change_type=ChangeType.POSITION_CHANGED,
293
+ column_name=name,
294
+ previous_value=str(prev_col.position),
295
+ current_value=str(curr_col.position),
296
+ is_breaking=False,
297
+ severity=ChangeSeverity.INFO,
298
+ ))
299
+
300
+ return changes
301
+
302
+ def detect_changes(self, dataset: Dataset) -> SchemaEvolutionReport:
303
+ """Detect schema changes for a dataset.
304
+
305
+ Captures current schema and compares to the most recent snapshot.
306
+
307
+ Args:
308
+ dataset: Dataset to analyze
309
+
310
+ Returns:
311
+ SchemaEvolutionReport with detected changes
312
+ """
313
+ # Get the latest snapshot before capturing new one
314
+ previous = self._tracker.get_latest(dataset.source)
315
+
316
+ # Capture current schema
317
+ current = self._tracker.capture(dataset)
318
+
319
+ # Compare if we have a previous snapshot
320
+ changes: list[SchemaChange] = []
321
+ if previous:
322
+ changes = self.compare(previous, current)
323
+
324
+ # Store detected changes
325
+ self._store_changes(dataset.source, previous, current, changes)
326
+
327
+ return SchemaEvolutionReport(
328
+ source=dataset.source,
329
+ previous_snapshot=previous,
330
+ current_snapshot=current,
331
+ changes=changes,
332
+ )
333
+
334
+ def analyze_evolution(
335
+ self,
336
+ source: str,
337
+ since: datetime | None = None,
338
+ limit: int = 100,
339
+ ) -> list[SchemaChange]:
340
+ """Get all schema changes for a source.
341
+
342
+ Args:
343
+ source: Data source path
344
+ since: Only get changes since this datetime
345
+ limit: Maximum changes to return
346
+
347
+ Returns:
348
+ List of SchemaChange objects, most recent first
349
+ """
350
+ conn = self._storage._get_connection()
351
+
352
+ if since:
353
+ cursor = conn.execute(
354
+ QUERIES["get_schema_changes_since"],
355
+ (source, since.isoformat()),
356
+ )
357
+ else:
358
+ cursor = conn.execute(
359
+ QUERIES["get_schema_changes"],
360
+ (source, limit),
361
+ )
362
+
363
+ changes = []
364
+ for row in cursor.fetchall():
365
+ changes.append(SchemaChange(
366
+ change_type=ChangeType(row["change_type"]),
367
+ column_name=row["column_name"],
368
+ previous_value=row["previous_value"],
369
+ current_value=row["current_value"],
370
+ is_breaking=bool(row["is_breaking"]),
371
+ severity=ChangeSeverity(row["severity"]),
372
+ ))
373
+
374
+ return changes
375
+
376
+ def _is_safe_type_change(self, from_type: str, to_type: str) -> bool:
377
+ """Check if a type change is safe (widening conversion)."""
378
+ from_normalized = from_type.upper().split("(")[0].strip()
379
+ to_normalized = to_type.upper().split("(")[0].strip()
380
+
381
+ return (from_normalized, to_normalized) in self.SAFE_TYPE_CHANGES
382
+
383
+ def _store_changes(
384
+ self,
385
+ source: str,
386
+ previous: SchemaSnapshot,
387
+ current: SchemaSnapshot,
388
+ changes: list[SchemaChange],
389
+ ) -> None:
390
+ """Store detected changes in the database."""
391
+ if not changes:
392
+ return
393
+
394
+ conn = self._storage._get_connection()
395
+ now = datetime.now().isoformat()
396
+
397
+ for change in changes:
398
+ conn.execute(
399
+ QUERIES["insert_schema_change"],
400
+ (
401
+ source,
402
+ now,
403
+ previous.snapshot_id,
404
+ current.snapshot_id,
405
+ change.change_type.value,
406
+ change.column_name,
407
+ change.previous_value,
408
+ change.current_value,
409
+ 1 if change.is_breaking else 0,
410
+ change.severity.value,
411
+ ),
412
+ )
413
+
414
+ conn.commit()