kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1010 @@
1
+ # src/kontra/state/backends/postgres.py
2
+ """
3
+ PostgreSQL state storage with normalized schema (v0.5).
4
+
5
+ Schema:
6
+ kontra_runs - Run-level metadata
7
+ kontra_rule_results - Per-rule results (references kontra_runs)
8
+ kontra_annotations - Append-only annotations (references runs/rules)
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import os
15
+ from datetime import datetime, timezone
16
+ from typing import Any, Dict, List, Optional
17
+ from urllib.parse import urlparse, parse_qs
18
+
19
+ from .base import StateBackend
20
+ from kontra.state.types import (
21
+ Annotation,
22
+ RuleState,
23
+ StateSummary,
24
+ ValidationState,
25
+ )
26
+
27
+
28
+ class PostgresStore(StateBackend):
29
+ """
30
+ PostgreSQL database state storage backend with normalized schema.
31
+
32
+ Uses psycopg3 (psycopg) for database access. Automatically creates
33
+ the required tables if they don't exist.
34
+
35
+ URI format: postgres://user:pass@host:port/database
36
+ postgresql://user:pass@host:port/database
37
+
38
+ Also supports standard PostgreSQL environment variables:
39
+ PGHOST, PGPORT, PGUSER, PGPASSWORD, PGDATABASE
40
+ """
41
+
42
+ # Table names
43
+ RUNS_TABLE = "kontra_runs"
44
+ RULE_RESULTS_TABLE = "kontra_rule_results"
45
+ ANNOTATIONS_TABLE = "kontra_annotations"
46
+
47
+ # Legacy table for migration detection
48
+ LEGACY_TABLE = "kontra_state"
49
+
50
+ CREATE_TABLES_SQL = """
51
+ -- Run-level metadata
52
+ CREATE TABLE IF NOT EXISTS kontra_runs (
53
+ id SERIAL PRIMARY KEY,
54
+
55
+ -- Identity
56
+ contract_fingerprint TEXT NOT NULL,
57
+ contract_name TEXT NOT NULL,
58
+ dataset_fingerprint TEXT,
59
+ dataset_name TEXT,
60
+
61
+ -- Timing
62
+ run_at TIMESTAMPTZ NOT NULL,
63
+ duration_ms INT,
64
+
65
+ -- Summary
66
+ passed BOOLEAN NOT NULL,
67
+ total_rows BIGINT,
68
+ total_rules INT NOT NULL,
69
+ passed_rules INT NOT NULL,
70
+ failed_rules INT NOT NULL,
71
+
72
+ -- By severity
73
+ blocking_failures INT NOT NULL DEFAULT 0,
74
+ warning_failures INT NOT NULL DEFAULT 0,
75
+ info_failures INT NOT NULL DEFAULT 0,
76
+
77
+ -- Execution metadata
78
+ execution_stats JSONB,
79
+
80
+ -- Schema version
81
+ schema_version TEXT NOT NULL DEFAULT '2.0',
82
+ engine_version TEXT
83
+ );
84
+
85
+ CREATE INDEX IF NOT EXISTS idx_kontra_runs_contract_time
86
+ ON kontra_runs (contract_fingerprint, run_at DESC);
87
+
88
+ CREATE INDEX IF NOT EXISTS idx_kontra_runs_passed
89
+ ON kontra_runs (contract_fingerprint, passed, run_at DESC);
90
+
91
+ -- Per-rule results
92
+ CREATE TABLE IF NOT EXISTS kontra_rule_results (
93
+ id SERIAL PRIMARY KEY,
94
+ run_id INT NOT NULL REFERENCES kontra_runs(id) ON DELETE CASCADE,
95
+
96
+ -- Rule identity
97
+ rule_id TEXT NOT NULL,
98
+ rule_name TEXT NOT NULL,
99
+
100
+ -- Result
101
+ passed BOOLEAN NOT NULL,
102
+ failed_count BIGINT NOT NULL DEFAULT 0,
103
+
104
+ -- Metadata
105
+ severity TEXT NOT NULL,
106
+ message TEXT,
107
+ column_name TEXT,
108
+ execution_source TEXT,
109
+
110
+ -- Variable structure
111
+ failure_mode TEXT,
112
+ details JSONB,
113
+ context JSONB,
114
+ samples JSONB
115
+ );
116
+
117
+ CREATE INDEX IF NOT EXISTS idx_kontra_rule_results_run
118
+ ON kontra_rule_results (run_id);
119
+
120
+ CREATE INDEX IF NOT EXISTS idx_kontra_rule_results_rule_id
121
+ ON kontra_rule_results (rule_id, run_id DESC);
122
+
123
+ -- Annotations (append-only)
124
+ CREATE TABLE IF NOT EXISTS kontra_annotations (
125
+ id SERIAL PRIMARY KEY,
126
+
127
+ -- What this annotates
128
+ run_id INT NOT NULL REFERENCES kontra_runs(id) ON DELETE CASCADE,
129
+ rule_result_id INT REFERENCES kontra_rule_results(id) ON DELETE CASCADE,
130
+
131
+ -- Who created it
132
+ actor_type TEXT NOT NULL,
133
+ actor_id TEXT NOT NULL,
134
+
135
+ -- What it says
136
+ annotation_type TEXT NOT NULL,
137
+ summary TEXT NOT NULL,
138
+ payload JSONB,
139
+
140
+ -- When
141
+ created_at TIMESTAMPTZ NOT NULL DEFAULT now()
142
+ );
143
+
144
+ CREATE INDEX IF NOT EXISTS idx_kontra_annotations_run
145
+ ON kontra_annotations (run_id);
146
+
147
+ CREATE INDEX IF NOT EXISTS idx_kontra_annotations_rule
148
+ ON kontra_annotations (rule_result_id)
149
+ WHERE rule_result_id IS NOT NULL;
150
+
151
+ CREATE INDEX IF NOT EXISTS idx_kontra_annotations_time
152
+ ON kontra_annotations (created_at DESC);
153
+ """
154
+
155
+ def __init__(self, uri: str):
156
+ """
157
+ Initialize the PostgreSQL store.
158
+
159
+ Args:
160
+ uri: PostgreSQL connection URI
161
+
162
+ The URI can be a full connection string or just the scheme,
163
+ with connection details from environment variables.
164
+ """
165
+ self.uri = uri
166
+ self._conn_params = self._parse_connection_params(uri)
167
+ self._conn = None
168
+ self._tables_created = False
169
+
170
+ @staticmethod
171
+ def _parse_connection_params(uri: str) -> Dict[str, Any]:
172
+ """
173
+ Parse PostgreSQL connection parameters from URI and environment.
174
+
175
+ Priority: URI values > DATABASE_URL > PGXXX env vars > defaults
176
+ """
177
+ parsed = urlparse(uri)
178
+
179
+ # Start with defaults
180
+ params: Dict[str, Any] = {
181
+ "host": "localhost",
182
+ "port": 5432,
183
+ "user": os.getenv("USER", "postgres"),
184
+ "password": None,
185
+ "dbname": None,
186
+ }
187
+
188
+ # Layer 1: Standard PGXXX environment variables
189
+ if os.getenv("PGHOST"):
190
+ params["host"] = os.getenv("PGHOST")
191
+ if os.getenv("PGPORT"):
192
+ params["port"] = int(os.getenv("PGPORT"))
193
+ if os.getenv("PGUSER"):
194
+ params["user"] = os.getenv("PGUSER")
195
+ if os.getenv("PGPASSWORD"):
196
+ params["password"] = os.getenv("PGPASSWORD")
197
+ if os.getenv("PGDATABASE"):
198
+ params["dbname"] = os.getenv("PGDATABASE")
199
+
200
+ # Layer 2: DATABASE_URL (common in PaaS)
201
+ database_url = os.getenv("DATABASE_URL")
202
+ if database_url:
203
+ db_parsed = urlparse(database_url)
204
+ if db_parsed.hostname:
205
+ params["host"] = db_parsed.hostname
206
+ if db_parsed.port:
207
+ params["port"] = db_parsed.port
208
+ if db_parsed.username:
209
+ params["user"] = db_parsed.username
210
+ if db_parsed.password:
211
+ params["password"] = db_parsed.password
212
+ if db_parsed.path and db_parsed.path != "/":
213
+ params["dbname"] = db_parsed.path.strip("/").split("/")[0]
214
+
215
+ # Layer 3: Explicit URI values (highest priority)
216
+ if parsed.hostname:
217
+ params["host"] = parsed.hostname
218
+ if parsed.port:
219
+ params["port"] = parsed.port
220
+ if parsed.username:
221
+ params["user"] = parsed.username
222
+ if parsed.password:
223
+ params["password"] = parsed.password
224
+ if parsed.path and parsed.path != "/":
225
+ params["dbname"] = parsed.path.strip("/").split("/")[0]
226
+
227
+ # Parse query parameters
228
+ query_params = parse_qs(parsed.query)
229
+ for key, values in query_params.items():
230
+ if values:
231
+ params[key] = values[0]
232
+
233
+ return params
234
+
235
+ def _get_conn(self):
236
+ """Get or create the database connection."""
237
+ if self._conn is not None:
238
+ return self._conn
239
+
240
+ try:
241
+ import psycopg
242
+ except ImportError as e:
243
+ raise RuntimeError(
244
+ "PostgreSQL state backend requires 'psycopg'. "
245
+ "Install with: pip install psycopg[binary]"
246
+ ) from e
247
+
248
+ # Build connection string
249
+ conn_str = f"host={self._conn_params['host']} port={self._conn_params['port']}"
250
+ if self._conn_params.get("user"):
251
+ conn_str += f" user={self._conn_params['user']}"
252
+ if self._conn_params.get("password"):
253
+ conn_str += f" password={self._conn_params['password']}"
254
+ if self._conn_params.get("dbname"):
255
+ conn_str += f" dbname={self._conn_params['dbname']}"
256
+
257
+ try:
258
+ self._conn = psycopg.connect(conn_str)
259
+ self._ensure_tables()
260
+ except Exception as e:
261
+ raise ConnectionError(
262
+ f"Failed to connect to PostgreSQL: {e}\n\n"
263
+ "Set environment variables:\n"
264
+ " export PGHOST=localhost\n"
265
+ " export PGPORT=5432\n"
266
+ " export PGUSER=your_user\n"
267
+ " export PGPASSWORD=your_password\n"
268
+ " export PGDATABASE=your_database\n\n"
269
+ "Or use full URI:\n"
270
+ " postgres://user:pass@host:5432/database"
271
+ ) from e
272
+
273
+ return self._conn
274
+
275
+ def _ensure_tables(self) -> None:
276
+ """Create the state tables if they don't exist."""
277
+ if self._tables_created:
278
+ return
279
+
280
+ conn = self._conn
281
+ with conn.cursor() as cur:
282
+ cur.execute(self.CREATE_TABLES_SQL)
283
+ conn.commit()
284
+ self._tables_created = True
285
+
286
+ def save(self, state: ValidationState) -> None:
287
+ """Save a validation state to the database (normalized)."""
288
+ conn = self._get_conn()
289
+
290
+ # Insert run
291
+ run_sql = f"""
292
+ INSERT INTO {self.RUNS_TABLE} (
293
+ contract_fingerprint,
294
+ contract_name,
295
+ dataset_fingerprint,
296
+ dataset_name,
297
+ run_at,
298
+ duration_ms,
299
+ passed,
300
+ total_rows,
301
+ total_rules,
302
+ passed_rules,
303
+ failed_rules,
304
+ blocking_failures,
305
+ warning_failures,
306
+ info_failures,
307
+ schema_version,
308
+ engine_version
309
+ ) VALUES (
310
+ %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
311
+ ) RETURNING id
312
+ """
313
+
314
+ # Insert rule result
315
+ rule_sql = f"""
316
+ INSERT INTO {self.RULE_RESULTS_TABLE} (
317
+ run_id,
318
+ rule_id,
319
+ rule_name,
320
+ passed,
321
+ failed_count,
322
+ severity,
323
+ message,
324
+ column_name,
325
+ execution_source,
326
+ failure_mode,
327
+ details,
328
+ context,
329
+ samples
330
+ ) VALUES (
331
+ %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
332
+ ) RETURNING id
333
+ """
334
+
335
+ try:
336
+ with conn.cursor() as cur:
337
+ # Insert run
338
+ cur.execute(run_sql, (
339
+ state.contract_fingerprint,
340
+ state.contract_name,
341
+ state.dataset_fingerprint,
342
+ state.dataset_uri,
343
+ state.run_at,
344
+ state.duration_ms,
345
+ state.summary.passed,
346
+ state.summary.row_count,
347
+ state.summary.total_rules,
348
+ state.summary.passed_rules,
349
+ state.summary.failed_rules,
350
+ state.summary.blocking_failures,
351
+ state.summary.warning_failures,
352
+ state.summary.info_failures,
353
+ state.schema_version,
354
+ state.engine_version,
355
+ ))
356
+ run_id = cur.fetchone()[0]
357
+
358
+ # Insert rule results
359
+ for rule in state.rules:
360
+ cur.execute(rule_sql, (
361
+ run_id,
362
+ rule.rule_id,
363
+ rule.rule_name,
364
+ rule.passed,
365
+ rule.failed_count,
366
+ rule.severity,
367
+ rule.message,
368
+ rule.column,
369
+ rule.execution_source,
370
+ rule.failure_mode,
371
+ json.dumps(rule.details) if rule.details else None,
372
+ None, # context - not stored in RuleState currently
373
+ None, # samples - not stored in state currently
374
+ ))
375
+
376
+ conn.commit()
377
+
378
+ # Update state with assigned ID
379
+ state.id = run_id
380
+
381
+ except Exception as e:
382
+ conn.rollback()
383
+ raise IOError(f"Failed to save state to PostgreSQL: {e}") from e
384
+
385
+ def _build_state_from_rows(
386
+ self,
387
+ run_row: tuple,
388
+ rule_rows: List[tuple],
389
+ ) -> ValidationState:
390
+ """Build a ValidationState from database rows."""
391
+ # Parse run row
392
+ (
393
+ run_id, contract_fingerprint, contract_name, dataset_fingerprint,
394
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
395
+ passed_rules, failed_rules, blocking_failures, warning_failures,
396
+ info_failures, execution_stats, schema_version, engine_version
397
+ ) = run_row
398
+
399
+ # Build summary
400
+ summary = StateSummary(
401
+ passed=passed,
402
+ total_rules=total_rules,
403
+ passed_rules=passed_rules,
404
+ failed_rules=failed_rules,
405
+ row_count=total_rows,
406
+ blocking_failures=blocking_failures,
407
+ warning_failures=warning_failures,
408
+ info_failures=info_failures,
409
+ )
410
+
411
+ # Build rules
412
+ rules = []
413
+ for rule_row in rule_rows:
414
+ (
415
+ rule_result_id, _, rule_id, rule_name, rule_passed,
416
+ failed_count, severity, message, column_name, execution_source,
417
+ failure_mode, details, context, samples
418
+ ) = rule_row
419
+
420
+ rule = RuleState(
421
+ rule_id=rule_id,
422
+ rule_name=rule_name,
423
+ passed=rule_passed,
424
+ failed_count=failed_count,
425
+ execution_source=execution_source or "unknown",
426
+ severity=severity,
427
+ failure_mode=failure_mode,
428
+ details=details,
429
+ message=message,
430
+ column=column_name,
431
+ id=rule_result_id,
432
+ )
433
+ rules.append(rule)
434
+
435
+ return ValidationState(
436
+ id=run_id,
437
+ contract_fingerprint=contract_fingerprint,
438
+ dataset_fingerprint=dataset_fingerprint,
439
+ contract_name=contract_name,
440
+ dataset_uri=dataset_name or "",
441
+ run_at=run_at,
442
+ summary=summary,
443
+ rules=rules,
444
+ schema_version=schema_version or "2.0",
445
+ engine_version=engine_version or "unknown",
446
+ duration_ms=duration_ms,
447
+ )
448
+
449
+ def get_latest(self, contract_fingerprint: str) -> Optional[ValidationState]:
450
+ """Get the most recent state for a contract."""
451
+ conn = self._get_conn()
452
+
453
+ run_sql = f"""
454
+ SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
455
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
456
+ passed_rules, failed_rules, blocking_failures, warning_failures,
457
+ info_failures, execution_stats, schema_version, engine_version
458
+ FROM {self.RUNS_TABLE}
459
+ WHERE contract_fingerprint = %s
460
+ ORDER BY run_at DESC
461
+ LIMIT 1
462
+ """
463
+
464
+ rule_sql = f"""
465
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
466
+ severity, message, column_name, execution_source,
467
+ failure_mode, details, context, samples
468
+ FROM {self.RULE_RESULTS_TABLE}
469
+ WHERE run_id = %s
470
+ ORDER BY id
471
+ """
472
+
473
+ try:
474
+ with conn.cursor() as cur:
475
+ cur.execute(run_sql, (contract_fingerprint,))
476
+ run_row = cur.fetchone()
477
+ if not run_row:
478
+ return None
479
+
480
+ run_id = run_row[0]
481
+ cur.execute(rule_sql, (run_id,))
482
+ rule_rows = cur.fetchall()
483
+
484
+ return self._build_state_from_rows(run_row, rule_rows)
485
+ except Exception:
486
+ return None
487
+
488
+ def get_history(
489
+ self,
490
+ contract_fingerprint: str,
491
+ limit: int = 10,
492
+ ) -> List[ValidationState]:
493
+ """Get recent history for a contract, newest first."""
494
+ conn = self._get_conn()
495
+
496
+ run_sql = f"""
497
+ SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
498
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
499
+ passed_rules, failed_rules, blocking_failures, warning_failures,
500
+ info_failures, execution_stats, schema_version, engine_version
501
+ FROM {self.RUNS_TABLE}
502
+ WHERE contract_fingerprint = %s
503
+ ORDER BY run_at DESC
504
+ LIMIT %s
505
+ """
506
+
507
+ rule_sql = f"""
508
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
509
+ severity, message, column_name, execution_source,
510
+ failure_mode, details, context, samples
511
+ FROM {self.RULE_RESULTS_TABLE}
512
+ WHERE run_id = ANY(%s)
513
+ ORDER BY run_id, id
514
+ """
515
+
516
+ try:
517
+ with conn.cursor() as cur:
518
+ cur.execute(run_sql, (contract_fingerprint, limit))
519
+ run_rows = cur.fetchall()
520
+ if not run_rows:
521
+ return []
522
+
523
+ # Get all rule results in one query
524
+ run_ids = [row[0] for row in run_rows]
525
+ cur.execute(rule_sql, (run_ids,))
526
+ all_rule_rows = cur.fetchall()
527
+
528
+ # Group rule rows by run_id
529
+ rules_by_run: Dict[int, List[tuple]] = {}
530
+ for rule_row in all_rule_rows:
531
+ run_id = rule_row[1]
532
+ rules_by_run.setdefault(run_id, []).append(rule_row)
533
+
534
+ # Build states
535
+ states = []
536
+ for run_row in run_rows:
537
+ run_id = run_row[0]
538
+ rule_rows = rules_by_run.get(run_id, [])
539
+ state = self._build_state_from_rows(run_row, rule_rows)
540
+ states.append(state)
541
+
542
+ return states
543
+ except Exception:
544
+ return []
545
+
546
+ def get_at(
547
+ self,
548
+ contract_fingerprint: str,
549
+ timestamp: datetime,
550
+ ) -> Optional[ValidationState]:
551
+ """Get state at or before a specific timestamp."""
552
+ conn = self._get_conn()
553
+
554
+ run_sql = f"""
555
+ SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
556
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
557
+ passed_rules, failed_rules, blocking_failures, warning_failures,
558
+ info_failures, execution_stats, schema_version, engine_version
559
+ FROM {self.RUNS_TABLE}
560
+ WHERE contract_fingerprint = %s AND run_at <= %s
561
+ ORDER BY run_at DESC
562
+ LIMIT 1
563
+ """
564
+
565
+ rule_sql = f"""
566
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
567
+ severity, message, column_name, execution_source,
568
+ failure_mode, details, context, samples
569
+ FROM {self.RULE_RESULTS_TABLE}
570
+ WHERE run_id = %s
571
+ ORDER BY id
572
+ """
573
+
574
+ try:
575
+ with conn.cursor() as cur:
576
+ cur.execute(run_sql, (contract_fingerprint, timestamp))
577
+ run_row = cur.fetchone()
578
+ if not run_row:
579
+ return None
580
+
581
+ run_id = run_row[0]
582
+ cur.execute(rule_sql, (run_id,))
583
+ rule_rows = cur.fetchall()
584
+
585
+ return self._build_state_from_rows(run_row, rule_rows)
586
+ except Exception:
587
+ return None
588
+
589
+ def delete_old(
590
+ self,
591
+ contract_fingerprint: str,
592
+ keep_count: int = 100,
593
+ ) -> int:
594
+ """Delete old states, keeping the most recent ones."""
595
+ conn = self._get_conn()
596
+
597
+ # Delete runs not in the top keep_count (cascade deletes rule_results)
598
+ sql_delete = f"""
599
+ DELETE FROM {self.RUNS_TABLE}
600
+ WHERE contract_fingerprint = %s
601
+ AND id NOT IN (
602
+ SELECT id FROM {self.RUNS_TABLE}
603
+ WHERE contract_fingerprint = %s
604
+ ORDER BY run_at DESC
605
+ LIMIT %s
606
+ )
607
+ """
608
+
609
+ try:
610
+ with conn.cursor() as cur:
611
+ cur.execute(sql_delete, (contract_fingerprint, contract_fingerprint, keep_count))
612
+ deleted = cur.rowcount
613
+ conn.commit()
614
+ return deleted
615
+ except Exception:
616
+ conn.rollback()
617
+ return 0
618
+
619
+ def list_contracts(self) -> List[str]:
620
+ """List all contract fingerprints with stored state."""
621
+ conn = self._get_conn()
622
+
623
+ sql = f"""
624
+ SELECT DISTINCT contract_fingerprint FROM {self.RUNS_TABLE}
625
+ ORDER BY contract_fingerprint
626
+ """
627
+
628
+ try:
629
+ with conn.cursor() as cur:
630
+ cur.execute(sql)
631
+ rows = cur.fetchall()
632
+ return [row[0] for row in rows]
633
+ except Exception:
634
+ return []
635
+
636
+ def clear(self, contract_fingerprint: Optional[str] = None) -> int:
637
+ """
638
+ Clear stored states.
639
+
640
+ Args:
641
+ contract_fingerprint: If provided, only clear this contract's states.
642
+ If None, clear all states.
643
+
644
+ Returns:
645
+ Number of run rows deleted (rule_results cascade).
646
+ """
647
+ conn = self._get_conn()
648
+
649
+ try:
650
+ with conn.cursor() as cur:
651
+ if contract_fingerprint:
652
+ cur.execute(
653
+ f"DELETE FROM {self.RUNS_TABLE} WHERE contract_fingerprint = %s",
654
+ (contract_fingerprint,)
655
+ )
656
+ else:
657
+ cur.execute(f"DELETE FROM {self.RUNS_TABLE}")
658
+ deleted = cur.rowcount
659
+ conn.commit()
660
+ return deleted
661
+ except Exception:
662
+ conn.rollback()
663
+ return 0
664
+
665
+ # -------------------------------------------------------------------------
666
+ # Annotation Methods
667
+ # -------------------------------------------------------------------------
668
+
669
+ def save_annotation(self, annotation: Annotation) -> int:
670
+ """Save an annotation (append-only)."""
671
+ conn = self._get_conn()
672
+
673
+ sql = f"""
674
+ INSERT INTO {self.ANNOTATIONS_TABLE} (
675
+ run_id, rule_result_id, actor_type, actor_id,
676
+ annotation_type, summary, payload, created_at
677
+ ) VALUES (
678
+ %s, %s, %s, %s, %s, %s, %s, %s
679
+ ) RETURNING id
680
+ """
681
+
682
+ try:
683
+ with conn.cursor() as cur:
684
+ cur.execute(sql, (
685
+ annotation.run_id,
686
+ annotation.rule_result_id,
687
+ annotation.actor_type,
688
+ annotation.actor_id,
689
+ annotation.annotation_type,
690
+ annotation.summary,
691
+ json.dumps(annotation.payload) if annotation.payload else None,
692
+ annotation.created_at or datetime.now(timezone.utc),
693
+ ))
694
+ annotation_id = cur.fetchone()[0]
695
+ conn.commit()
696
+
697
+ annotation.id = annotation_id
698
+ return annotation_id
699
+ except Exception as e:
700
+ conn.rollback()
701
+ raise IOError(f"Failed to save annotation: {e}") from e
702
+
703
+ def get_annotations(
704
+ self,
705
+ run_id: int,
706
+ rule_result_id: Optional[int] = None,
707
+ ) -> List[Annotation]:
708
+ """Get annotations for a run or specific rule result."""
709
+ conn = self._get_conn()
710
+
711
+ if rule_result_id is not None:
712
+ sql = f"""
713
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
714
+ annotation_type, summary, payload, created_at
715
+ FROM {self.ANNOTATIONS_TABLE}
716
+ WHERE run_id = %s AND rule_result_id = %s
717
+ ORDER BY created_at DESC
718
+ """
719
+ params = (run_id, rule_result_id)
720
+ else:
721
+ sql = f"""
722
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
723
+ annotation_type, summary, payload, created_at
724
+ FROM {self.ANNOTATIONS_TABLE}
725
+ WHERE run_id = %s
726
+ ORDER BY created_at DESC
727
+ """
728
+ params = (run_id,)
729
+
730
+ try:
731
+ with conn.cursor() as cur:
732
+ cur.execute(sql, params)
733
+ rows = cur.fetchall()
734
+
735
+ annotations = []
736
+ for row in rows:
737
+ (
738
+ ann_id, run_id, rule_result_id, actor_type, actor_id,
739
+ annotation_type, summary, payload, created_at
740
+ ) = row
741
+ annotation = Annotation(
742
+ id=ann_id,
743
+ run_id=run_id,
744
+ rule_result_id=rule_result_id,
745
+ actor_type=actor_type,
746
+ actor_id=actor_id,
747
+ annotation_type=annotation_type,
748
+ summary=summary,
749
+ payload=payload,
750
+ created_at=created_at,
751
+ )
752
+ annotations.append(annotation)
753
+ return annotations
754
+ except Exception:
755
+ return []
756
+
757
+ def get_annotations_for_contract(
758
+ self,
759
+ contract_fingerprint: str,
760
+ rule_id: Optional[str] = None,
761
+ annotation_type: Optional[str] = None,
762
+ limit: int = 20,
763
+ ) -> List[Annotation]:
764
+ """Get annotations across all runs for a contract."""
765
+ conn = self._get_conn()
766
+
767
+ # Build the query with JOINs to get rule_id
768
+ # We join runs to filter by contract, and rule_results to get rule_id
769
+ sql = f"""
770
+ SELECT
771
+ a.id, a.run_id, a.rule_result_id, a.actor_type, a.actor_id,
772
+ a.annotation_type, a.summary, a.payload, a.created_at,
773
+ rr.rule_id
774
+ FROM {self.ANNOTATIONS_TABLE} a
775
+ JOIN {self.RUNS_TABLE} r ON a.run_id = r.id
776
+ LEFT JOIN {self.RULE_RESULTS_TABLE} rr ON a.rule_result_id = rr.id
777
+ WHERE r.contract_fingerprint = %s
778
+ """
779
+ params: List[Any] = [contract_fingerprint]
780
+
781
+ if rule_id is not None:
782
+ sql += " AND rr.rule_id = %s"
783
+ params.append(rule_id)
784
+
785
+ if annotation_type is not None:
786
+ sql += " AND a.annotation_type = %s"
787
+ params.append(annotation_type)
788
+
789
+ sql += " ORDER BY a.created_at DESC LIMIT %s"
790
+ params.append(limit)
791
+
792
+ try:
793
+ with conn.cursor() as cur:
794
+ cur.execute(sql, tuple(params))
795
+ rows = cur.fetchall()
796
+
797
+ annotations = []
798
+ for row in rows:
799
+ (
800
+ ann_id, run_id, rule_result_id, actor_type, actor_id,
801
+ ann_type, summary, payload, created_at, rule_id_val
802
+ ) = row
803
+ annotation = Annotation(
804
+ id=ann_id,
805
+ run_id=run_id,
806
+ rule_result_id=rule_result_id,
807
+ rule_id=rule_id_val,
808
+ actor_type=actor_type,
809
+ actor_id=actor_id,
810
+ annotation_type=ann_type,
811
+ summary=summary,
812
+ payload=payload,
813
+ created_at=created_at,
814
+ )
815
+ annotations.append(annotation)
816
+ return annotations
817
+ except Exception:
818
+ return []
819
+
820
+ def get_run_with_annotations(
821
+ self,
822
+ contract_fingerprint: str,
823
+ run_id: Optional[int] = None,
824
+ ) -> Optional[ValidationState]:
825
+ """Get a validation state with its annotations loaded."""
826
+ conn = self._get_conn()
827
+
828
+ # Get the run
829
+ if run_id is not None:
830
+ run_sql = f"""
831
+ SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
832
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
833
+ passed_rules, failed_rules, blocking_failures, warning_failures,
834
+ info_failures, execution_stats, schema_version, engine_version
835
+ FROM {self.RUNS_TABLE}
836
+ WHERE id = %s AND contract_fingerprint = %s
837
+ """
838
+ run_params = (run_id, contract_fingerprint)
839
+ else:
840
+ run_sql = f"""
841
+ SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
842
+ dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
843
+ passed_rules, failed_rules, blocking_failures, warning_failures,
844
+ info_failures, execution_stats, schema_version, engine_version
845
+ FROM {self.RUNS_TABLE}
846
+ WHERE contract_fingerprint = %s
847
+ ORDER BY run_at DESC
848
+ LIMIT 1
849
+ """
850
+ run_params = (contract_fingerprint,)
851
+
852
+ rule_sql = f"""
853
+ SELECT id, run_id, rule_id, rule_name, passed, failed_count,
854
+ severity, message, column_name, execution_source,
855
+ failure_mode, details, context, samples
856
+ FROM {self.RULE_RESULTS_TABLE}
857
+ WHERE run_id = %s
858
+ ORDER BY id
859
+ """
860
+
861
+ ann_sql = f"""
862
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
863
+ annotation_type, summary, payload, created_at
864
+ FROM {self.ANNOTATIONS_TABLE}
865
+ WHERE run_id = %s
866
+ ORDER BY created_at DESC
867
+ """
868
+
869
+ try:
870
+ with conn.cursor() as cur:
871
+ cur.execute(run_sql, run_params)
872
+ run_row = cur.fetchone()
873
+ if not run_row:
874
+ return None
875
+
876
+ actual_run_id = run_row[0]
877
+
878
+ # Get rules
879
+ cur.execute(rule_sql, (actual_run_id,))
880
+ rule_rows = cur.fetchall()
881
+
882
+ # Get annotations
883
+ cur.execute(ann_sql, (actual_run_id,))
884
+ ann_rows = cur.fetchall()
885
+
886
+ # Build state
887
+ state = self._build_state_from_rows(run_row, rule_rows)
888
+
889
+ # Build annotations list
890
+ annotations = []
891
+ for row in ann_rows:
892
+ (
893
+ ann_id, run_id_val, rule_result_id, actor_type, actor_id,
894
+ annotation_type, summary, payload, created_at
895
+ ) = row
896
+ annotations.append(Annotation(
897
+ id=ann_id,
898
+ run_id=run_id_val,
899
+ rule_result_id=rule_result_id,
900
+ actor_type=actor_type,
901
+ actor_id=actor_id,
902
+ annotation_type=annotation_type,
903
+ summary=summary,
904
+ payload=payload,
905
+ created_at=created_at,
906
+ ))
907
+
908
+ self._attach_annotations_to_state(state, annotations)
909
+ return state
910
+ except Exception:
911
+ return None
912
+
913
+ def get_history_with_annotations(
914
+ self,
915
+ contract_fingerprint: str,
916
+ limit: int = 10,
917
+ ) -> List[ValidationState]:
918
+ """Get recent history with annotations loaded."""
919
+ # For efficiency, we load history without annotations first,
920
+ # then load annotations in batch
921
+ states = self.get_history(contract_fingerprint, limit=limit)
922
+ if not states:
923
+ return []
924
+
925
+ conn = self._get_conn()
926
+ run_ids = [s.id for s in states if s.id is not None]
927
+
928
+ if not run_ids:
929
+ # No IDs, just return empty annotations
930
+ for state in states:
931
+ state.annotations = []
932
+ for rule in state.rules:
933
+ rule.annotations = []
934
+ return states
935
+
936
+ ann_sql = f"""
937
+ SELECT id, run_id, rule_result_id, actor_type, actor_id,
938
+ annotation_type, summary, payload, created_at
939
+ FROM {self.ANNOTATIONS_TABLE}
940
+ WHERE run_id = ANY(%s)
941
+ ORDER BY created_at DESC
942
+ """
943
+
944
+ try:
945
+ with conn.cursor() as cur:
946
+ cur.execute(ann_sql, (run_ids,))
947
+ ann_rows = cur.fetchall()
948
+
949
+ # Build annotations index
950
+ # Key: (run_id, rule_result_id or None)
951
+ annotations_index: Dict[int, Dict[Optional[int], List[Annotation]]] = {}
952
+
953
+ for row in ann_rows:
954
+ (
955
+ ann_id, run_id, rule_result_id, actor_type, actor_id,
956
+ annotation_type, summary, payload, created_at
957
+ ) = row
958
+ annotation = Annotation(
959
+ id=ann_id,
960
+ run_id=run_id,
961
+ rule_result_id=rule_result_id,
962
+ actor_type=actor_type,
963
+ actor_id=actor_id,
964
+ annotation_type=annotation_type,
965
+ summary=summary,
966
+ payload=payload,
967
+ created_at=created_at,
968
+ )
969
+
970
+ if run_id not in annotations_index:
971
+ annotations_index[run_id] = {}
972
+ annotations_index[run_id].setdefault(rule_result_id, []).append(annotation)
973
+
974
+ # Attach to states
975
+ for state in states:
976
+ if state.id is not None and state.id in annotations_index:
977
+ run_anns = annotations_index[state.id]
978
+ state.annotations = run_anns.get(None, [])
979
+ for rule in state.rules:
980
+ if rule.id is not None:
981
+ rule.annotations = run_anns.get(rule.id, [])
982
+ else:
983
+ rule.annotations = []
984
+ else:
985
+ state.annotations = []
986
+ for rule in state.rules:
987
+ rule.annotations = []
988
+
989
+ return states
990
+ except Exception:
991
+ # On error, return states without annotations
992
+ for state in states:
993
+ state.annotations = []
994
+ for rule in state.rules:
995
+ rule.annotations = []
996
+ return states
997
+
998
+ def close(self) -> None:
999
+ """Close the database connection."""
1000
+ if self._conn is not None:
1001
+ self._conn.close()
1002
+ self._conn = None
1003
+
1004
+ def __repr__(self) -> str:
1005
+ host = self._conn_params.get("host", "?")
1006
+ dbname = self._conn_params.get("dbname", "?")
1007
+ return f"PostgresStore(host={host}, dbname={dbname})"
1008
+
1009
+ def __del__(self):
1010
+ self.close()