duckguard 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. duckguard/__init__.py +110 -0
  2. duckguard/anomaly/__init__.py +34 -0
  3. duckguard/anomaly/detector.py +394 -0
  4. duckguard/anomaly/methods.py +432 -0
  5. duckguard/cli/__init__.py +5 -0
  6. duckguard/cli/main.py +706 -0
  7. duckguard/connectors/__init__.py +58 -0
  8. duckguard/connectors/base.py +80 -0
  9. duckguard/connectors/bigquery.py +171 -0
  10. duckguard/connectors/databricks.py +201 -0
  11. duckguard/connectors/factory.py +292 -0
  12. duckguard/connectors/files.py +135 -0
  13. duckguard/connectors/kafka.py +343 -0
  14. duckguard/connectors/mongodb.py +236 -0
  15. duckguard/connectors/mysql.py +121 -0
  16. duckguard/connectors/oracle.py +196 -0
  17. duckguard/connectors/postgres.py +99 -0
  18. duckguard/connectors/redshift.py +154 -0
  19. duckguard/connectors/snowflake.py +226 -0
  20. duckguard/connectors/sqlite.py +112 -0
  21. duckguard/connectors/sqlserver.py +242 -0
  22. duckguard/contracts/__init__.py +48 -0
  23. duckguard/contracts/diff.py +432 -0
  24. duckguard/contracts/generator.py +334 -0
  25. duckguard/contracts/loader.py +367 -0
  26. duckguard/contracts/schema.py +242 -0
  27. duckguard/contracts/validator.py +453 -0
  28. duckguard/core/__init__.py +8 -0
  29. duckguard/core/column.py +437 -0
  30. duckguard/core/dataset.py +284 -0
  31. duckguard/core/engine.py +261 -0
  32. duckguard/core/result.py +119 -0
  33. duckguard/core/scoring.py +508 -0
  34. duckguard/profiler/__init__.py +5 -0
  35. duckguard/profiler/auto_profile.py +350 -0
  36. duckguard/pytest_plugin/__init__.py +5 -0
  37. duckguard/pytest_plugin/plugin.py +161 -0
  38. duckguard/reporting/__init__.py +6 -0
  39. duckguard/reporting/console.py +88 -0
  40. duckguard/reporting/json_report.py +96 -0
  41. duckguard/rules/__init__.py +28 -0
  42. duckguard/rules/executor.py +616 -0
  43. duckguard/rules/generator.py +341 -0
  44. duckguard/rules/loader.py +483 -0
  45. duckguard/rules/schema.py +289 -0
  46. duckguard/semantic/__init__.py +31 -0
  47. duckguard/semantic/analyzer.py +270 -0
  48. duckguard/semantic/detector.py +459 -0
  49. duckguard/semantic/validators.py +354 -0
  50. duckguard/validators/__init__.py +7 -0
  51. duckguard-2.0.0.dist-info/METADATA +221 -0
  52. duckguard-2.0.0.dist-info/RECORD +55 -0
  53. duckguard-2.0.0.dist-info/WHEEL +4 -0
  54. duckguard-2.0.0.dist-info/entry_points.txt +5 -0
  55. duckguard-2.0.0.dist-info/licenses/LICENSE +55 -0
@@ -0,0 +1,432 @@
1
+ """Contract diff for DuckGuard.
2
+
3
+ Detects and categorizes changes between contract versions.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from dataclasses import dataclass, field
9
+ from enum import Enum
10
+ from typing import Any
11
+
12
+ from duckguard.contracts.schema import DataContract, SchemaField, FieldType
13
+
14
+
15
+ class ChangeType(Enum):
16
+ """Types of schema changes."""
17
+
18
+ # Field changes
19
+ FIELD_ADDED = "field_added"
20
+ FIELD_REMOVED = "field_removed"
21
+ FIELD_TYPE_CHANGED = "field_type_changed"
22
+ FIELD_REQUIRED_CHANGED = "field_required_changed"
23
+ FIELD_UNIQUE_CHANGED = "field_unique_changed"
24
+ FIELD_CONSTRAINT_ADDED = "field_constraint_added"
25
+ FIELD_CONSTRAINT_REMOVED = "field_constraint_removed"
26
+ FIELD_CONSTRAINT_CHANGED = "field_constraint_changed"
27
+
28
+ # Metadata changes
29
+ FIELD_DESCRIPTION_CHANGED = "field_description_changed"
30
+ FIELD_DEPRECATED = "field_deprecated"
31
+
32
+ # Quality changes
33
+ QUALITY_SLA_CHANGED = "quality_sla_changed"
34
+
35
+
36
+ class BreakingChangeLevel(Enum):
37
+ """Level of breaking change."""
38
+
39
+ NONE = "none" # Non-breaking
40
+ MINOR = "minor" # Potentially breaking for some consumers
41
+ MAJOR = "major" # Breaking change
42
+
43
+
44
+ @dataclass
45
+ class SchemaChange:
46
+ """A single schema change.
47
+
48
+ Attributes:
49
+ type: Type of change
50
+ field: Field name (if applicable)
51
+ breaking_level: How breaking this change is
52
+ old_value: Previous value
53
+ new_value: New value
54
+ message: Human-readable description
55
+ """
56
+
57
+ type: ChangeType
58
+ field: str | None
59
+ breaking_level: BreakingChangeLevel
60
+ old_value: Any
61
+ new_value: Any
62
+ message: str
63
+
64
+
65
+ @dataclass
66
+ class SchemaDiff:
67
+ """Difference between two contract versions.
68
+
69
+ Attributes:
70
+ old_contract: Original contract
71
+ new_contract: New contract
72
+ changes: List of changes
73
+ """
74
+
75
+ old_contract: DataContract
76
+ new_contract: DataContract
77
+ changes: list[SchemaChange] = field(default_factory=list)
78
+
79
+ @property
80
+ def has_changes(self) -> bool:
81
+ return len(self.changes) > 0
82
+
83
+ @property
84
+ def has_breaking_changes(self) -> bool:
85
+ return any(c.breaking_level == BreakingChangeLevel.MAJOR for c in self.changes)
86
+
87
+ @property
88
+ def breaking_changes(self) -> list[SchemaChange]:
89
+ return [c for c in self.changes if c.breaking_level == BreakingChangeLevel.MAJOR]
90
+
91
+ @property
92
+ def minor_changes(self) -> list[SchemaChange]:
93
+ return [c for c in self.changes if c.breaking_level == BreakingChangeLevel.MINOR]
94
+
95
+ @property
96
+ def non_breaking_changes(self) -> list[SchemaChange]:
97
+ return [c for c in self.changes if c.breaking_level == BreakingChangeLevel.NONE]
98
+
99
+ def summary(self) -> str:
100
+ """Generate a summary of changes."""
101
+ if not self.has_changes:
102
+ return "No changes detected."
103
+
104
+ lines = [
105
+ f"Changes from v{self.old_contract.version} to v{self.new_contract.version}:",
106
+ f" Total: {len(self.changes)} changes",
107
+ f" Breaking: {len(self.breaking_changes)}",
108
+ f" Minor: {len(self.minor_changes)}",
109
+ f" Non-breaking: {len(self.non_breaking_changes)}",
110
+ ]
111
+
112
+ if self.breaking_changes:
113
+ lines.append("\nBreaking changes:")
114
+ for change in self.breaking_changes:
115
+ lines.append(f" ❌ {change.message}")
116
+
117
+ return "\n".join(lines)
118
+
119
+ def suggest_version_bump(self) -> str:
120
+ """Suggest the appropriate version bump type."""
121
+ if self.has_breaking_changes:
122
+ return "major"
123
+ if self.minor_changes:
124
+ return "minor"
125
+ return "patch"
126
+
127
+
128
+ class ContractDiffer:
129
+ """Compares two contract versions and detects changes."""
130
+
131
+ # Breaking change classification
132
+ BREAKING_CHANGES = {
133
+ ChangeType.FIELD_REMOVED,
134
+ ChangeType.FIELD_TYPE_CHANGED,
135
+ ChangeType.FIELD_CONSTRAINT_ADDED, # New constraint can break existing data
136
+ }
137
+
138
+ MINOR_CHANGES = {
139
+ ChangeType.FIELD_REQUIRED_CHANGED, # Made required
140
+ ChangeType.FIELD_UNIQUE_CHANGED, # Made unique
141
+ }
142
+
143
+ def diff(
144
+ self,
145
+ old_contract: DataContract,
146
+ new_contract: DataContract
147
+ ) -> SchemaDiff:
148
+ """Compare two contracts and return differences.
149
+
150
+ Args:
151
+ old_contract: Original contract version
152
+ new_contract: New contract version
153
+
154
+ Returns:
155
+ SchemaDiff with all changes
156
+ """
157
+ changes: list[SchemaChange] = []
158
+
159
+ # Compare schema fields
160
+ old_fields = {f.name: f for f in old_contract.schema}
161
+ new_fields = {f.name: f for f in new_contract.schema}
162
+
163
+ # Find removed fields
164
+ for name in old_fields:
165
+ if name not in new_fields:
166
+ changes.append(SchemaChange(
167
+ type=ChangeType.FIELD_REMOVED,
168
+ field=name,
169
+ breaking_level=BreakingChangeLevel.MAJOR,
170
+ old_value=old_fields[name],
171
+ new_value=None,
172
+ message=f"Field '{name}' was removed",
173
+ ))
174
+
175
+ # Find added fields
176
+ for name in new_fields:
177
+ if name not in old_fields:
178
+ new_field = new_fields[name]
179
+ # Adding a required field is breaking
180
+ breaking = BreakingChangeLevel.MAJOR if new_field.required else BreakingChangeLevel.NONE
181
+
182
+ changes.append(SchemaChange(
183
+ type=ChangeType.FIELD_ADDED,
184
+ field=name,
185
+ breaking_level=breaking,
186
+ old_value=None,
187
+ new_value=new_field,
188
+ message=f"Field '{name}' was added" + (" (required)" if new_field.required else ""),
189
+ ))
190
+
191
+ # Find modified fields
192
+ for name in old_fields:
193
+ if name in new_fields:
194
+ field_changes = self._compare_fields(old_fields[name], new_fields[name])
195
+ changes.extend(field_changes)
196
+
197
+ # Compare quality SLA
198
+ quality_changes = self._compare_quality(
199
+ old_contract.quality,
200
+ new_contract.quality
201
+ )
202
+ changes.extend(quality_changes)
203
+
204
+ return SchemaDiff(
205
+ old_contract=old_contract,
206
+ new_contract=new_contract,
207
+ changes=changes,
208
+ )
209
+
210
+ def _compare_fields(
211
+ self,
212
+ old_field: SchemaField,
213
+ new_field: SchemaField
214
+ ) -> list[SchemaChange]:
215
+ """Compare two field definitions."""
216
+ changes = []
217
+ name = old_field.name
218
+
219
+ # Type change
220
+ old_type = old_field.type.value if isinstance(old_field.type, FieldType) else str(old_field.type)
221
+ new_type = new_field.type.value if isinstance(new_field.type, FieldType) else str(new_field.type)
222
+
223
+ if old_type != new_type:
224
+ changes.append(SchemaChange(
225
+ type=ChangeType.FIELD_TYPE_CHANGED,
226
+ field=name,
227
+ breaking_level=BreakingChangeLevel.MAJOR,
228
+ old_value=old_type,
229
+ new_value=new_type,
230
+ message=f"Field '{name}' type changed from '{old_type}' to '{new_type}'",
231
+ ))
232
+
233
+ # Required change
234
+ if old_field.required != new_field.required:
235
+ # Making field required is potentially breaking
236
+ breaking = BreakingChangeLevel.MINOR if new_field.required else BreakingChangeLevel.NONE
237
+
238
+ changes.append(SchemaChange(
239
+ type=ChangeType.FIELD_REQUIRED_CHANGED,
240
+ field=name,
241
+ breaking_level=breaking,
242
+ old_value=old_field.required,
243
+ new_value=new_field.required,
244
+ message=f"Field '{name}' required changed from {old_field.required} to {new_field.required}",
245
+ ))
246
+
247
+ # Unique change
248
+ if old_field.unique != new_field.unique:
249
+ # Making field unique is potentially breaking
250
+ breaking = BreakingChangeLevel.MINOR if new_field.unique else BreakingChangeLevel.NONE
251
+
252
+ changes.append(SchemaChange(
253
+ type=ChangeType.FIELD_UNIQUE_CHANGED,
254
+ field=name,
255
+ breaking_level=breaking,
256
+ old_value=old_field.unique,
257
+ new_value=new_field.unique,
258
+ message=f"Field '{name}' unique changed from {old_field.unique} to {new_field.unique}",
259
+ ))
260
+
261
+ # Deprecated change
262
+ if not old_field.deprecated and new_field.deprecated:
263
+ changes.append(SchemaChange(
264
+ type=ChangeType.FIELD_DEPRECATED,
265
+ field=name,
266
+ breaking_level=BreakingChangeLevel.MINOR,
267
+ old_value=False,
268
+ new_value=True,
269
+ message=f"Field '{name}' was deprecated",
270
+ ))
271
+
272
+ # Constraint changes
273
+ constraint_changes = self._compare_constraints(name, old_field, new_field)
274
+ changes.extend(constraint_changes)
275
+
276
+ return changes
277
+
278
+ def _compare_constraints(
279
+ self,
280
+ field_name: str,
281
+ old_field: SchemaField,
282
+ new_field: SchemaField
283
+ ) -> list[SchemaChange]:
284
+ """Compare constraints between field versions."""
285
+ changes = []
286
+
287
+ old_constraints = {c.type: c for c in old_field.constraints}
288
+ new_constraints = {c.type: c for c in new_field.constraints}
289
+
290
+ # Find removed constraints
291
+ for ctype in old_constraints:
292
+ if ctype not in new_constraints:
293
+ changes.append(SchemaChange(
294
+ type=ChangeType.FIELD_CONSTRAINT_REMOVED,
295
+ field=field_name,
296
+ breaking_level=BreakingChangeLevel.NONE, # Removing constraint is usually safe
297
+ old_value=old_constraints[ctype],
298
+ new_value=None,
299
+ message=f"Field '{field_name}' constraint '{ctype}' was removed",
300
+ ))
301
+
302
+ # Find added constraints
303
+ for ctype in new_constraints:
304
+ if ctype not in old_constraints:
305
+ changes.append(SchemaChange(
306
+ type=ChangeType.FIELD_CONSTRAINT_ADDED,
307
+ field=field_name,
308
+ breaking_level=BreakingChangeLevel.MAJOR, # Adding constraint can break existing data
309
+ old_value=None,
310
+ new_value=new_constraints[ctype],
311
+ message=f"Field '{field_name}' constraint '{ctype}' was added",
312
+ ))
313
+
314
+ # Find changed constraints
315
+ for ctype in old_constraints:
316
+ if ctype in new_constraints:
317
+ old_c = old_constraints[ctype]
318
+ new_c = new_constraints[ctype]
319
+
320
+ if old_c.value != new_c.value:
321
+ # Determine if change is breaking
322
+ # Making constraints more strict is breaking
323
+ breaking = self._is_constraint_more_strict(ctype, old_c.value, new_c.value)
324
+
325
+ changes.append(SchemaChange(
326
+ type=ChangeType.FIELD_CONSTRAINT_CHANGED,
327
+ field=field_name,
328
+ breaking_level=breaking,
329
+ old_value=old_c.value,
330
+ new_value=new_c.value,
331
+ message=f"Field '{field_name}' constraint '{ctype}' changed from {old_c.value} to {new_c.value}",
332
+ ))
333
+
334
+ return changes
335
+
336
+ def _is_constraint_more_strict(
337
+ self,
338
+ constraint_type: str,
339
+ old_value: Any,
340
+ new_value: Any
341
+ ) -> BreakingChangeLevel:
342
+ """Determine if a constraint change makes it more strict."""
343
+ try:
344
+ if constraint_type == "range":
345
+ # Smaller range is more strict
346
+ if isinstance(old_value, list) and isinstance(new_value, list):
347
+ old_min, old_max = old_value
348
+ new_min, new_max = new_value
349
+
350
+ if new_min > old_min or new_max < old_max:
351
+ return BreakingChangeLevel.MAJOR
352
+
353
+ elif constraint_type in ("min", "min_length"):
354
+ # Higher min is more strict
355
+ if new_value > old_value:
356
+ return BreakingChangeLevel.MAJOR
357
+
358
+ elif constraint_type in ("max", "max_length"):
359
+ # Lower max is more strict
360
+ if new_value < old_value:
361
+ return BreakingChangeLevel.MAJOR
362
+
363
+ elif constraint_type in ("allowed_values", "enum"):
364
+ # Fewer allowed values is more strict
365
+ if isinstance(old_value, list) and isinstance(new_value, list):
366
+ if not set(new_value).issuperset(set(old_value)):
367
+ return BreakingChangeLevel.MAJOR
368
+
369
+ except Exception:
370
+ pass
371
+
372
+ return BreakingChangeLevel.NONE
373
+
374
+ def _compare_quality(self, old_quality, new_quality) -> list[SchemaChange]:
375
+ """Compare quality SLA changes."""
376
+ changes = []
377
+
378
+ # Completeness
379
+ if old_quality.completeness != new_quality.completeness:
380
+ # Higher completeness requirement is more strict
381
+ breaking = BreakingChangeLevel.NONE
382
+ if new_quality.completeness and (
383
+ old_quality.completeness is None or
384
+ new_quality.completeness > old_quality.completeness
385
+ ):
386
+ breaking = BreakingChangeLevel.MINOR
387
+
388
+ changes.append(SchemaChange(
389
+ type=ChangeType.QUALITY_SLA_CHANGED,
390
+ field=None,
391
+ breaking_level=breaking,
392
+ old_value=old_quality.completeness,
393
+ new_value=new_quality.completeness,
394
+ message=f"Completeness SLA changed from {old_quality.completeness}% to {new_quality.completeness}%",
395
+ ))
396
+
397
+ # Row count min
398
+ if old_quality.row_count_min != new_quality.row_count_min:
399
+ breaking = BreakingChangeLevel.NONE
400
+ if new_quality.row_count_min and (
401
+ old_quality.row_count_min is None or
402
+ new_quality.row_count_min > old_quality.row_count_min
403
+ ):
404
+ breaking = BreakingChangeLevel.MINOR
405
+
406
+ changes.append(SchemaChange(
407
+ type=ChangeType.QUALITY_SLA_CHANGED,
408
+ field=None,
409
+ breaking_level=breaking,
410
+ old_value=old_quality.row_count_min,
411
+ new_value=new_quality.row_count_min,
412
+ message=f"Row count minimum changed from {old_quality.row_count_min} to {new_quality.row_count_min}",
413
+ ))
414
+
415
+ return changes
416
+
417
+
418
+ def diff_contracts(
419
+ old_contract: DataContract,
420
+ new_contract: DataContract
421
+ ) -> SchemaDiff:
422
+ """Compare two contract versions.
423
+
424
+ Args:
425
+ old_contract: Original contract
426
+ new_contract: New contract
427
+
428
+ Returns:
429
+ SchemaDiff with all changes
430
+ """
431
+ differ = ContractDiffer()
432
+ return differ.diff(old_contract, new_contract)