kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,969 @@
1
+ # src/kontra/state/backends/sqlserver.py
2
+ """
3
+ SQL Server state storage with normalized schema (v0.5).
4
+
5
+ Schema:
6
+ kontra_runs - Run-level metadata
7
+ kontra_rule_results - Per-rule results (references kontra_runs)
8
+ kontra_annotations - Append-only annotations (references runs/rules)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ from datetime import datetime, timezone
16
+ from typing import Any, Dict, List, Optional
17
+ from urllib.parse import urlparse, parse_qs
18
+
19
+ from .base import StateBackend
20
+ from kontra.state.types import (
21
+ Annotation,
22
+ RuleState,
23
+ StateSummary,
24
+ ValidationState,
25
+ )
26
+
27
+
28
+ class SQLServerStore(StateBackend):
29
+ """
30
+ SQL Server database state storage backend with normalized schema.
31
+
32
+ Uses pyodbc for database access. Automatically creates
33
+ the required tables if they don't exist.
34
+
35
+ URI format: mssql://user:pass@host:port/database
36
+ sqlserver://user:pass@host:port/database
37
+
38
+ Also supports environment variables:
39
+ MSSQL_HOST, MSSQL_PORT, MSSQL_USER, MSSQL_PASSWORD, MSSQL_DATABASE
40
+ """
41
+
42
+ # Table names
43
+ RUNS_TABLE = "kontra_runs"
44
+ RULE_RESULTS_TABLE = "kontra_rule_results"
45
+ ANNOTATIONS_TABLE = "kontra_annotations"
46
+
47
+ # DDL for creating tables (SQL Server syntax)
48
+ CREATE_TABLES_SQL = """
49
+ IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='kontra_runs' AND xtype='U')
50
+ CREATE TABLE kontra_runs (
51
+ id INT IDENTITY(1,1) PRIMARY KEY,
52
+
53
+ -- Identity
54
+ contract_fingerprint NVARCHAR(255) NOT NULL,
55
+ contract_name NVARCHAR(255) NOT NULL,
56
+ dataset_fingerprint NVARCHAR(255),
57
+ dataset_name NVARCHAR(500),
58
+
59
+ -- Timing
60
+ run_at DATETIMEOFFSET NOT NULL,
61
+ duration_ms INT,
62
+
63
+ -- Summary
64
+ passed BIT NOT NULL,
65
+ total_rows BIGINT,
66
+ total_rules INT NOT NULL,
67
+ passed_rules INT NOT NULL,
68
+ failed_rules INT NOT NULL,
69
+
70
+ -- By severity
71
+ blocking_failures INT NOT NULL DEFAULT 0,
72
+ warning_failures INT NOT NULL DEFAULT 0,
73
+ info_failures INT NOT NULL DEFAULT 0,
74
+
75
+ -- Execution metadata
76
+ execution_stats NVARCHAR(MAX), -- JSON string
77
+
78
+ -- Schema version
79
+ schema_version NVARCHAR(50) NOT NULL DEFAULT '2.0',
80
+ engine_version NVARCHAR(50)
81
+ );
82
+
83
+ IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name='idx_kontra_runs_contract_time')
84
+ CREATE INDEX idx_kontra_runs_contract_time
85
+ ON kontra_runs (contract_fingerprint, run_at DESC);
86
+
87
+ IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name='idx_kontra_runs_passed')
88
+ CREATE INDEX idx_kontra_runs_passed
89
+ ON kontra_runs (contract_fingerprint, passed, run_at DESC);
90
+
91
+ IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='kontra_rule_results' AND xtype='U')
92
+ CREATE TABLE kontra_rule_results (
93
+ id INT IDENTITY(1,1) PRIMARY KEY,
94
+ run_id INT NOT NULL REFERENCES kontra_runs(id) ON DELETE CASCADE,
95
+
96
+ -- Rule identity
97
+ rule_id NVARCHAR(255) NOT NULL,
98
+ rule_name NVARCHAR(100) NOT NULL,
99
+
100
+ -- Result
101
+ passed BIT NOT NULL,
102
+ failed_count BIGINT NOT NULL DEFAULT 0,
103
+
104
+ -- Metadata
105
+ severity NVARCHAR(20) NOT NULL,
106
+ message NVARCHAR(MAX),
107
+ column_name NVARCHAR(255),
108
+ execution_source NVARCHAR(50),
109
+
110
+ -- Variable structure
111
+ failure_mode NVARCHAR(100),
112
+ details NVARCHAR(MAX), -- JSON string
113
+ context NVARCHAR(MAX), -- JSON string
114
+ samples NVARCHAR(MAX) -- JSON string
115
+ );
116
+
117
+ IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name='idx_kontra_rule_results_run')
118
+ CREATE INDEX idx_kontra_rule_results_run
119
+ ON kontra_rule_results (run_id);
120
+
121
+ IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name='idx_kontra_rule_results_rule_id')
122
+ CREATE INDEX idx_kontra_rule_results_rule_id
123
+ ON kontra_rule_results (rule_id, run_id DESC);
124
+
125
+ IF NOT EXISTS (SELECT * FROM sysobjects WHERE name='kontra_annotations' AND xtype='U')
126
+ CREATE TABLE kontra_annotations (
127
+ id INT IDENTITY(1,1) PRIMARY KEY,
128
+
129
+ -- What this annotates
130
+ run_id INT NOT NULL REFERENCES kontra_runs(id) ON DELETE CASCADE,
131
+ rule_result_id INT REFERENCES kontra_rule_results(id) ON DELETE CASCADE,
132
+
133
+ -- Who created it
134
+ actor_type NVARCHAR(50) NOT NULL,
135
+ actor_id NVARCHAR(255) NOT NULL,
136
+
137
+ -- What it says
138
+ annotation_type NVARCHAR(100) NOT NULL,
139
+ summary NVARCHAR(MAX) NOT NULL,
140
+ payload NVARCHAR(MAX), -- JSON string
141
+
142
+ -- When
143
+ created_at DATETIMEOFFSET NOT NULL DEFAULT SYSDATETIMEOFFSET()
144
+ );
145
+
146
+ IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name='idx_kontra_annotations_run')
147
+ CREATE INDEX idx_kontra_annotations_run
148
+ ON kontra_annotations (run_id);
149
+
150
+ IF NOT EXISTS (SELECT * FROM sys.indexes WHERE name='idx_kontra_annotations_time')
151
+ CREATE INDEX idx_kontra_annotations_time
152
+ ON kontra_annotations (created_at DESC);
153
+ """
154
+
155
+ def __init__(self, uri: str):
156
+ """
157
+ Initialize the SQL Server store.
158
+
159
+ Args:
160
+ uri: SQL Server connection URI
161
+
162
+ The URI can be a full connection string or just the scheme,
163
+ with connection details from environment variables.
164
+ """
165
+ self.uri = uri
166
+ self._conn_params = self._parse_connection_params(uri)
167
+ self._conn = None
168
+ self._tables_created = False
169
+
170
+ @staticmethod
171
+ def _parse_connection_params(uri: str) -> Dict[str, Any]:
172
+ """
173
+ Parse SQL Server connection parameters from URI and environment.
174
+
175
+ Priority: URI values > MSSQL_XXX env vars > defaults
176
+ """
177
+ parsed = urlparse(uri)
178
+
179
+ # Start with defaults
180
+ params: Dict[str, Any] = {
181
+ "server": "localhost",
182
+ "port": 1433,
183
+ "user": None,
184
+ "password": None,
185
+ "database": None,
186
+ "driver": "{ODBC Driver 17 for SQL Server}",
187
+ }
188
+
189
+ # Layer 1: Environment variables
190
+ if os.getenv("MSSQL_HOST"):
191
+ params["server"] = os.getenv("MSSQL_HOST")
192
+ if os.getenv("MSSQL_PORT"):
193
+ params["port"] = int(os.getenv("MSSQL_PORT"))
194
+ if os.getenv("MSSQL_USER"):
195
+ params["user"] = os.getenv("MSSQL_USER")
196
+ if os.getenv("MSSQL_PASSWORD"):
197
+ params["password"] = os.getenv("MSSQL_PASSWORD")
198
+ if os.getenv("MSSQL_DATABASE"):
199
+ params["database"] = os.getenv("MSSQL_DATABASE")
200
+ if os.getenv("MSSQL_DRIVER"):
201
+ params["driver"] = os.getenv("MSSQL_DRIVER")
202
+
203
+ # Layer 2: Explicit URI values (highest priority)
204
+ if parsed.hostname:
205
+ params["server"] = parsed.hostname
206
+ if parsed.port:
207
+ params["port"] = parsed.port
208
+ if parsed.username:
209
+ params["user"] = parsed.username
210
+ if parsed.password:
211
+ params["password"] = parsed.password
212
+ if parsed.path and parsed.path != "/":
213
+ params["database"] = parsed.path.strip("/").split("/")[0]
214
+
215
+ # Parse query parameters
216
+ query_params = parse_qs(parsed.query)
217
+ for key, values in query_params.items():
218
+ if values:
219
+ params[key] = values[0]
220
+
221
+ return params
222
+
223
+ def _get_conn(self):
224
+ """Get or create the database connection."""
225
+ if self._conn is not None:
226
+ return self._conn
227
+
228
+ try:
229
+ import pyodbc
230
+ except ImportError as e:
231
+ raise RuntimeError(
232
+ "SQL Server state backend requires 'pyodbc'. "
233
+ "Install with: pip install pyodbc"
234
+ ) from e
235
+
236
+ # Build connection string
237
+ conn_str_parts = [
238
+ f"DRIVER={self._conn_params['driver']}",
239
+ f"SERVER={self._conn_params['server']},{self._conn_params['port']}",
240
+ ]
241
+ if self._conn_params.get("database"):
242
+ conn_str_parts.append(f"DATABASE={self._conn_params['database']}")
243
+ if self._conn_params.get("user"):
244
+ conn_str_parts.append(f"UID={self._conn_params['user']}")
245
+ if self._conn_params.get("password"):
246
+ conn_str_parts.append(f"PWD={self._conn_params['password']}")
247
+
248
+ conn_str = ";".join(conn_str_parts)
249
+
250
+ try:
251
+ self._conn = pyodbc.connect(conn_str)
252
+ self._ensure_tables()
253
+ except Exception as e:
254
+ raise ConnectionError(
255
+ f"Failed to connect to SQL Server: {e}\n\n"
256
+ "Set environment variables:\n"
257
+ " export MSSQL_HOST=localhost\n"
258
+ " export MSSQL_PORT=1433\n"
259
+ " export MSSQL_USER=your_user\n"
260
+ " export MSSQL_PASSWORD=your_password\n"
261
+ " export MSSQL_DATABASE=your_database\n\n"
262
+ "Or use full URI:\n"
263
+ " mssql://user:pass@host:1433/database"
264
+ ) from e
265
+
266
+ return self._conn
267
+
268
+ def _ensure_tables(self) -> None:
269
+ """Create the state tables if they don't exist."""
270
+ if self._tables_created:
271
+ return
272
+
273
+ conn = self._conn
274
+ cursor = conn.cursor()
275
+ # Execute each statement separately (SQL Server doesn't like batches with CREATE)
276
+ for statement in self.CREATE_TABLES_SQL.split(";"):
277
+ statement = statement.strip()
278
+ if statement:
279
+ cursor.execute(statement)
280
+ conn.commit()
281
+ self._tables_created = True
282
+
283
+ def save(self, state: ValidationState) -> None:
284
+ """Save a validation state to the database (normalized)."""
285
+ conn = self._get_conn()
286
+
287
+ # Insert run
288
+ run_sql = f"""
289
+ INSERT INTO {self.RUNS_TABLE} (
290
+ contract_fingerprint,
291
+ contract_name,
292
+ dataset_fingerprint,
293
+ dataset_name,
294
+ run_at,
295
+ duration_ms,
296
+ passed,
297
+ total_rows,
298
+ total_rules,
299
+ passed_rules,
300
+ failed_rules,
301
+ blocking_failures,
302
+ warning_failures,
303
+ info_failures,
304
+ schema_version,
305
+ engine_version
306
+ ) OUTPUT INSERTED.id VALUES (
307
+ ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
308
+ )
309
+ """
310
+
311
+ # Insert rule result
312
+ rule_sql = f"""
313
+ INSERT INTO {self.RULE_RESULTS_TABLE} (
314
+ run_id,
315
+ rule_id,
316
+ rule_name,
317
+ passed,
318
+ failed_count,
319
+ severity,
320
+ message,
321
+ column_name,
322
+ execution_source,
323
+ failure_mode,
324
+ details,
325
+ context,
326
+ samples
327
+ ) OUTPUT INSERTED.id VALUES (
328
+ ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?
329
+ )
330
+ """
331
+
332
+ try:
333
+ cursor = conn.cursor()
334
+
335
+ # Insert run
336
+ cursor.execute(run_sql, (
337
+ state.contract_fingerprint,
338
+ state.contract_name,
339
+ state.dataset_fingerprint,
340
+ state.dataset_uri,
341
+ state.run_at,
342
+ state.duration_ms,
343
+ state.summary.passed,
344
+ state.summary.row_count,
345
+ state.summary.total_rules,
346
+ state.summary.passed_rules,
347
+ state.summary.failed_rules,
348
+ state.summary.blocking_failures,
349
+ state.summary.warning_failures,
350
+ state.summary.info_failures,
351
+ state.schema_version,
352
+ state.engine_version,
353
+ ))
354
+ run_id = cursor.fetchone()[0]
355
+
356
+ # Insert rule results
357
+ for rule in state.rules:
358
+ cursor.execute(rule_sql, (
359
+ run_id,
360
+ rule.rule_id,
361
+ rule.rule_name,
362
+ rule.passed,
363
+ rule.failed_count,
364
+ rule.severity,
365
+ rule.message,
366
+ rule.column,
367
+ rule.execution_source,
368
+ rule.failure_mode,
369
+ json.dumps(rule.details) if rule.details else None,
370
+ None, # context
371
+ None, # samples
372
+ ))
373
+
374
+ conn.commit()
375
+
376
+ # Update state with assigned ID
377
+ state.id = run_id
378
+
379
+ except Exception as e:
380
+ conn.rollback()
381
+ raise IOError(f"Failed to save state to SQL Server: {e}") from e
382
+
383
+ def _build_state_from_rows(
384
+ self,
385
+ run_row: tuple,
386
+ rule_rows: List[tuple],
387
+ ) -> ValidationState:
388
+ """Build a ValidationState from database rows."""
389
+ # Parse run row (note: pyodbc returns in order, not named)
390
+ (
391
+ run_id, contract_fingerprint, contract_name, dataset_fingerprint,
392
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
393
+ passed_rules, failed_rules, blocking_failures, warning_failures,
394
+ info_failures, execution_stats, schema_version, engine_version
395
+ ) = run_row
396
+
397
+ # Build summary
398
+ summary = StateSummary(
399
+ passed=bool(passed),
400
+ total_rules=total_rules,
401
+ passed_rules=passed_rules,
402
+ failed_rules=failed_rules,
403
+ row_count=total_rows,
404
+ blocking_failures=blocking_failures,
405
+ warning_failures=warning_failures,
406
+ info_failures=info_failures,
407
+ )
408
+
409
+ # Build rules
410
+ rules = []
411
+ for rule_row in rule_rows:
412
+ (
413
+ rule_result_id, _, rule_id, rule_name, rule_passed,
414
+ failed_count, severity, message, column_name, execution_source,
415
+ failure_mode, details, context, samples
416
+ ) = rule_row
417
+
418
+ # Parse details from JSON string
419
+ parsed_details = None
420
+ if details:
421
+ try:
422
+ parsed_details = json.loads(details)
423
+ except Exception:
424
+ pass
425
+
426
+ rule = RuleState(
427
+ rule_id=rule_id,
428
+ rule_name=rule_name,
429
+ passed=bool(rule_passed),
430
+ failed_count=failed_count,
431
+ execution_source=execution_source or "unknown",
432
+ severity=severity,
433
+ failure_mode=failure_mode,
434
+ details=parsed_details,
435
+ message=message,
436
+ column=column_name,
437
+ id=rule_result_id,
438
+ )
439
+ rules.append(rule)
440
+
441
+ return ValidationState(
442
+ id=run_id,
443
+ contract_fingerprint=contract_fingerprint,
444
+ dataset_fingerprint=dataset_fingerprint,
445
+ contract_name=contract_name,
446
+ dataset_uri=dataset_name or "",
447
+ run_at=run_at if isinstance(run_at, datetime) else datetime.now(timezone.utc),
448
+ summary=summary,
449
+ rules=rules,
450
+ schema_version=schema_version or "2.0",
451
+ engine_version=engine_version or "unknown",
452
+ duration_ms=duration_ms,
453
+ )
454
+
455
+ def get_latest(self, contract_fingerprint: str) -> Optional[ValidationState]:
456
+ """Get the most recent state for a contract."""
457
+ conn = self._get_conn()
458
+
459
+ run_sql = f"""
460
+ SELECT TOP 1 id, contract_fingerprint, contract_name, dataset_fingerprint,
461
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
462
+ passed_rules, failed_rules, blocking_failures, warning_failures,
463
+ info_failures, execution_stats, schema_version, engine_version
464
+ FROM {self.RUNS_TABLE}
465
+ WHERE contract_fingerprint = ?
466
+ ORDER BY run_at DESC
467
+ """
468
+
469
+ rule_sql = f"""
470
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
471
+ severity, message, column_name, execution_source,
472
+ failure_mode, details, context, samples
473
+ FROM {self.RULE_RESULTS_TABLE}
474
+ WHERE run_id = ?
475
+ ORDER BY id
476
+ """
477
+
478
+ try:
479
+ cursor = conn.cursor()
480
+ cursor.execute(run_sql, (contract_fingerprint,))
481
+ run_row = cursor.fetchone()
482
+ if not run_row:
483
+ return None
484
+
485
+ run_id = run_row[0]
486
+ cursor.execute(rule_sql, (run_id,))
487
+ rule_rows = cursor.fetchall()
488
+
489
+ return self._build_state_from_rows(run_row, rule_rows)
490
+ except Exception:
491
+ return None
492
+
493
+ def get_history(
494
+ self,
495
+ contract_fingerprint: str,
496
+ limit: int = 10,
497
+ ) -> List[ValidationState]:
498
+ """Get recent history for a contract, newest first."""
499
+ conn = self._get_conn()
500
+
501
+ run_sql = f"""
502
+ SELECT TOP (?) id, contract_fingerprint, contract_name, dataset_fingerprint,
503
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
504
+ passed_rules, failed_rules, blocking_failures, warning_failures,
505
+ info_failures, execution_stats, schema_version, engine_version
506
+ FROM {self.RUNS_TABLE}
507
+ WHERE contract_fingerprint = ?
508
+ ORDER BY run_at DESC
509
+ """
510
+
511
+ rule_sql = f"""
512
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
513
+ severity, message, column_name, execution_source,
514
+ failure_mode, details, context, samples
515
+ FROM {self.RULE_RESULTS_TABLE}
516
+ WHERE run_id IN (?)
517
+ ORDER BY run_id, id
518
+ """
519
+
520
+ try:
521
+ cursor = conn.cursor()
522
+ cursor.execute(run_sql, (limit, contract_fingerprint))
523
+ run_rows = cursor.fetchall()
524
+ if not run_rows:
525
+ return []
526
+
527
+ # Get all rule results (one query per run for simplicity)
528
+ states = []
529
+ rule_sql_single = f"""
530
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
531
+ severity, message, column_name, execution_source,
532
+ failure_mode, details, context, samples
533
+ FROM {self.RULE_RESULTS_TABLE}
534
+ WHERE run_id = ?
535
+ ORDER BY id
536
+ """
537
+ for run_row in run_rows:
538
+ run_id = run_row[0]
539
+ cursor.execute(rule_sql_single, (run_id,))
540
+ rule_rows = cursor.fetchall()
541
+ state = self._build_state_from_rows(run_row, rule_rows)
542
+ states.append(state)
543
+
544
+ return states
545
+ except Exception:
546
+ return []
547
+
548
+ def get_at(
549
+ self,
550
+ contract_fingerprint: str,
551
+ timestamp: datetime,
552
+ ) -> Optional[ValidationState]:
553
+ """Get state at or before a specific timestamp."""
554
+ conn = self._get_conn()
555
+
556
+ run_sql = f"""
557
+ SELECT TOP 1 id, contract_fingerprint, contract_name, dataset_fingerprint,
558
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
559
+ passed_rules, failed_rules, blocking_failures, warning_failures,
560
+ info_failures, execution_stats, schema_version, engine_version
561
+ FROM {self.RUNS_TABLE}
562
+ WHERE contract_fingerprint = ? AND run_at <= ?
563
+ ORDER BY run_at DESC
564
+ """
565
+
566
+ rule_sql = f"""
567
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
568
+ severity, message, column_name, execution_source,
569
+ failure_mode, details, context, samples
570
+ FROM {self.RULE_RESULTS_TABLE}
571
+ WHERE run_id = ?
572
+ ORDER BY id
573
+ """
574
+
575
+ try:
576
+ cursor = conn.cursor()
577
+ cursor.execute(run_sql, (contract_fingerprint, timestamp))
578
+ run_row = cursor.fetchone()
579
+ if not run_row:
580
+ return None
581
+
582
+ run_id = run_row[0]
583
+ cursor.execute(rule_sql, (run_id,))
584
+ rule_rows = cursor.fetchall()
585
+
586
+ return self._build_state_from_rows(run_row, rule_rows)
587
+ except Exception:
588
+ return None
589
+
590
+ def delete_old(
591
+ self,
592
+ contract_fingerprint: str,
593
+ keep_count: int = 100,
594
+ ) -> int:
595
+ """Delete old states, keeping the most recent ones."""
596
+ conn = self._get_conn()
597
+
598
+ # Delete runs not in the top keep_count (cascade deletes rule_results)
599
+ sql_delete = f"""
600
+ DELETE FROM {self.RUNS_TABLE}
601
+ WHERE contract_fingerprint = ?
602
+ AND id NOT IN (
603
+ SELECT TOP (?) id FROM {self.RUNS_TABLE}
604
+ WHERE contract_fingerprint = ?
605
+ ORDER BY run_at DESC
606
+ )
607
+ """
608
+
609
+ try:
610
+ cursor = conn.cursor()
611
+ cursor.execute(sql_delete, (contract_fingerprint, keep_count, contract_fingerprint))
612
+ deleted = cursor.rowcount
613
+ conn.commit()
614
+ return deleted
615
+ except Exception:
616
+ conn.rollback()
617
+ return 0
618
+
619
+ def list_contracts(self) -> List[str]:
620
+ """List all contract fingerprints with stored state."""
621
+ conn = self._get_conn()
622
+
623
+ sql = f"""
624
+ SELECT DISTINCT contract_fingerprint FROM {self.RUNS_TABLE}
625
+ ORDER BY contract_fingerprint
626
+ """
627
+
628
+ try:
629
+ cursor = conn.cursor()
630
+ cursor.execute(sql)
631
+ rows = cursor.fetchall()
632
+ return [row[0] for row in rows]
633
+ except Exception:
634
+ return []
635
+
636
+ def clear(self, contract_fingerprint: Optional[str] = None) -> int:
637
+ """
638
+ Clear stored states.
639
+
640
+ Args:
641
+ contract_fingerprint: If provided, only clear this contract's states.
642
+ If None, clear all states.
643
+
644
+ Returns:
645
+ Number of run rows deleted (rule_results cascade).
646
+ """
647
+ conn = self._get_conn()
648
+
649
+ try:
650
+ cursor = conn.cursor()
651
+ if contract_fingerprint:
652
+ cursor.execute(
653
+ f"DELETE FROM {self.RUNS_TABLE} WHERE contract_fingerprint = ?",
654
+ (contract_fingerprint,)
655
+ )
656
+ else:
657
+ cursor.execute(f"DELETE FROM {self.RUNS_TABLE}")
658
+ deleted = cursor.rowcount
659
+ conn.commit()
660
+ return deleted
661
+ except Exception:
662
+ conn.rollback()
663
+ return 0
664
+
665
+ # -------------------------------------------------------------------------
666
+ # Annotation Methods
667
+ # -------------------------------------------------------------------------
668
+
669
+ def save_annotation(self, annotation: Annotation) -> int:
670
+ """Save an annotation (append-only)."""
671
+ conn = self._get_conn()
672
+
673
+ sql = f"""
674
+ INSERT INTO {self.ANNOTATIONS_TABLE} (
675
+ run_id, rule_result_id, actor_type, actor_id,
676
+ annotation_type, summary, payload, created_at
677
+ ) OUTPUT INSERTED.id VALUES (
678
+ ?, ?, ?, ?, ?, ?, ?, ?
679
+ )
680
+ """
681
+
682
+ try:
683
+ cursor = conn.cursor()
684
+ cursor.execute(sql, (
685
+ annotation.run_id,
686
+ annotation.rule_result_id,
687
+ annotation.actor_type,
688
+ annotation.actor_id,
689
+ annotation.annotation_type,
690
+ annotation.summary,
691
+ json.dumps(annotation.payload) if annotation.payload else None,
692
+ annotation.created_at or datetime.now(timezone.utc),
693
+ ))
694
+ annotation_id = cursor.fetchone()[0]
695
+ conn.commit()
696
+
697
+ annotation.id = annotation_id
698
+ return annotation_id
699
+ except Exception as e:
700
+ conn.rollback()
701
+ raise IOError(f"Failed to save annotation: {e}") from e
702
+
703
+ def get_annotations(
704
+ self,
705
+ run_id: int,
706
+ rule_result_id: Optional[int] = None,
707
+ ) -> List[Annotation]:
708
+ """Get annotations for a run or specific rule result."""
709
+ conn = self._get_conn()
710
+
711
+ if rule_result_id is not None:
712
+ sql = f"""
713
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
714
+ annotation_type, summary, payload, created_at
715
+ FROM {self.ANNOTATIONS_TABLE}
716
+ WHERE run_id = ? AND rule_result_id = ?
717
+ ORDER BY created_at DESC
718
+ """
719
+ params = (run_id, rule_result_id)
720
+ else:
721
+ sql = f"""
722
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
723
+ annotation_type, summary, payload, created_at
724
+ FROM {self.ANNOTATIONS_TABLE}
725
+ WHERE run_id = ?
726
+ ORDER BY created_at DESC
727
+ """
728
+ params = (run_id,)
729
+
730
+ try:
731
+ cursor = conn.cursor()
732
+ cursor.execute(sql, params)
733
+ rows = cursor.fetchall()
734
+
735
+ annotations = []
736
+ for row in rows:
737
+ (
738
+ ann_id, run_id_val, rule_result_id_val, actor_type, actor_id,
739
+ annotation_type, summary, payload, created_at
740
+ ) = row
741
+
742
+ # Parse payload from JSON string
743
+ parsed_payload = None
744
+ if payload:
745
+ try:
746
+ parsed_payload = json.loads(payload)
747
+ except Exception:
748
+ pass
749
+
750
+ annotation = Annotation(
751
+ id=ann_id,
752
+ run_id=run_id_val,
753
+ rule_result_id=rule_result_id_val,
754
+ actor_type=actor_type,
755
+ actor_id=actor_id,
756
+ annotation_type=annotation_type,
757
+ summary=summary,
758
+ payload=parsed_payload,
759
+ created_at=created_at if isinstance(created_at, datetime) else None,
760
+ )
761
+ annotations.append(annotation)
762
+ return annotations
763
+ except Exception:
764
+ return []
765
+
766
+ def get_run_with_annotations(
767
+ self,
768
+ contract_fingerprint: str,
769
+ run_id: Optional[int] = None,
770
+ ) -> Optional[ValidationState]:
771
+ """Get a validation state with its annotations loaded."""
772
+ conn = self._get_conn()
773
+
774
+ # Get the run
775
+ if run_id is not None:
776
+ run_sql = f"""
777
+ SELECT TOP 1 id, contract_fingerprint, contract_name, dataset_fingerprint,
778
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
779
+ passed_rules, failed_rules, blocking_failures, warning_failures,
780
+ info_failures, execution_stats, schema_version, engine_version
781
+ FROM {self.RUNS_TABLE}
782
+ WHERE id = ? AND contract_fingerprint = ?
783
+ """
784
+ run_params = (run_id, contract_fingerprint)
785
+ else:
786
+ run_sql = f"""
787
+ SELECT TOP 1 id, contract_fingerprint, contract_name, dataset_fingerprint,
788
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
789
+ passed_rules, failed_rules, blocking_failures, warning_failures,
790
+ info_failures, execution_stats, schema_version, engine_version
791
+ FROM {self.RUNS_TABLE}
792
+ WHERE contract_fingerprint = ?
793
+ ORDER BY run_at DESC
794
+ """
795
+ run_params = (contract_fingerprint,)
796
+
797
+ rule_sql = f"""
798
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
799
+ severity, message, column_name, execution_source,
800
+ failure_mode, details, context, samples
801
+ FROM {self.RULE_RESULTS_TABLE}
802
+ WHERE run_id = ?
803
+ ORDER BY id
804
+ """
805
+
806
+ ann_sql = f"""
807
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
808
+ annotation_type, summary, payload, created_at
809
+ FROM {self.ANNOTATIONS_TABLE}
810
+ WHERE run_id = ?
811
+ ORDER BY created_at DESC
812
+ """
813
+
814
+ try:
815
+ cursor = conn.cursor()
816
+ cursor.execute(run_sql, run_params)
817
+ run_row = cursor.fetchone()
818
+ if not run_row:
819
+ return None
820
+
821
+ actual_run_id = run_row[0]
822
+
823
+ # Get rules
824
+ cursor.execute(rule_sql, (actual_run_id,))
825
+ rule_rows = cursor.fetchall()
826
+
827
+ # Get annotations
828
+ cursor.execute(ann_sql, (actual_run_id,))
829
+ ann_rows = cursor.fetchall()
830
+
831
+ # Build state
832
+ state = self._build_state_from_rows(run_row, rule_rows)
833
+
834
+ # Build annotations list
835
+ annotations = []
836
+ for row in ann_rows:
837
+ (
838
+ ann_id, run_id_val, rule_result_id_val, actor_type, actor_id,
839
+ annotation_type, summary, payload, created_at
840
+ ) = row
841
+
842
+ parsed_payload = None
843
+ if payload:
844
+ try:
845
+ parsed_payload = json.loads(payload)
846
+ except Exception:
847
+ pass
848
+
849
+ annotations.append(Annotation(
850
+ id=ann_id,
851
+ run_id=run_id_val,
852
+ rule_result_id=rule_result_id_val,
853
+ actor_type=actor_type,
854
+ actor_id=actor_id,
855
+ annotation_type=annotation_type,
856
+ summary=summary,
857
+ payload=parsed_payload,
858
+ created_at=created_at if isinstance(created_at, datetime) else None,
859
+ ))
860
+
861
+ self._attach_annotations_to_state(state, annotations)
862
+ return state
863
+ except Exception:
864
+ return None
865
+
866
+ def get_history_with_annotations(
867
+ self,
868
+ contract_fingerprint: str,
869
+ limit: int = 10,
870
+ ) -> List[ValidationState]:
871
+ """Get recent history with annotations loaded."""
872
+ # Get history first
873
+ states = self.get_history(contract_fingerprint, limit=limit)
874
+ if not states:
875
+ return []
876
+
877
+ conn = self._get_conn()
878
+ run_ids = [s.id for s in states if s.id is not None]
879
+
880
+ if not run_ids:
881
+ for state in states:
882
+ state.annotations = []
883
+ for rule in state.rules:
884
+ rule.annotations = []
885
+ return states
886
+
887
+ # Build IN clause (SQL Server style)
888
+ placeholders = ",".join("?" * len(run_ids))
889
+ ann_sql = f"""
890
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
891
+ annotation_type, summary, payload, created_at
892
+ FROM {self.ANNOTATIONS_TABLE}
893
+ WHERE run_id IN ({placeholders})
894
+ ORDER BY created_at DESC
895
+ """
896
+
897
+ try:
898
+ cursor = conn.cursor()
899
+ cursor.execute(ann_sql, run_ids)
900
+ ann_rows = cursor.fetchall()
901
+
902
+ # Build annotations index
903
+ annotations_index: Dict[int, Dict[Optional[int], List[Annotation]]] = {}
904
+
905
+ for row in ann_rows:
906
+ (
907
+ ann_id, run_id_val, rule_result_id_val, actor_type, actor_id,
908
+ annotation_type, summary, payload, created_at
909
+ ) = row
910
+
911
+ parsed_payload = None
912
+ if payload:
913
+ try:
914
+ parsed_payload = json.loads(payload)
915
+ except Exception:
916
+ pass
917
+
918
+ annotation = Annotation(
919
+ id=ann_id,
920
+ run_id=run_id_val,
921
+ rule_result_id=rule_result_id_val,
922
+ actor_type=actor_type,
923
+ actor_id=actor_id,
924
+ annotation_type=annotation_type,
925
+ summary=summary,
926
+ payload=parsed_payload,
927
+ created_at=created_at if isinstance(created_at, datetime) else None,
928
+ )
929
+
930
+ if run_id_val not in annotations_index:
931
+ annotations_index[run_id_val] = {}
932
+ annotations_index[run_id_val].setdefault(rule_result_id_val, []).append(annotation)
933
+
934
+ # Attach to states
935
+ for state in states:
936
+ if state.id is not None and state.id in annotations_index:
937
+ run_anns = annotations_index[state.id]
938
+ state.annotations = run_anns.get(None, [])
939
+ for rule in state.rules:
940
+ if rule.id is not None:
941
+ rule.annotations = run_anns.get(rule.id, [])
942
+ else:
943
+ rule.annotations = []
944
+ else:
945
+ state.annotations = []
946
+ for rule in state.rules:
947
+ rule.annotations = []
948
+
949
+ return states
950
+ except Exception:
951
+ for state in states:
952
+ state.annotations = []
953
+ for rule in state.rules:
954
+ rule.annotations = []
955
+ return states
956
+
957
+ def close(self) -> None:
958
+ """Close the database connection."""
959
+ if self._conn is not None:
960
+ self._conn.close()
961
+ self._conn = None
962
+
963
+ def __repr__(self) -> str:
964
+ server = self._conn_params.get("server", "?")
965
+ database = self._conn_params.get("database", "?")
966
+ return f"SQLServerStore(server={server}, database={database})"
967
+
968
+ def __del__(self):
969
+ self.close()