kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1010 @@
|
|
|
1
|
+
# src/kontra/state/backends/postgres.py
|
|
2
|
+
"""
|
|
3
|
+
PostgreSQL state storage with normalized schema (v0.5).
|
|
4
|
+
|
|
5
|
+
Schema:
|
|
6
|
+
kontra_runs - Run-level metadata
|
|
7
|
+
kontra_rule_results - Per-rule results (references kontra_runs)
|
|
8
|
+
kontra_annotations - Append-only annotations (references runs/rules)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
from datetime import datetime, timezone
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
from urllib.parse import urlparse, parse_qs
|
|
18
|
+
|
|
19
|
+
from .base import StateBackend
|
|
20
|
+
from kontra.state.types import (
|
|
21
|
+
Annotation,
|
|
22
|
+
RuleState,
|
|
23
|
+
StateSummary,
|
|
24
|
+
ValidationState,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class PostgresStore(StateBackend):
|
|
29
|
+
"""
|
|
30
|
+
PostgreSQL database state storage backend with normalized schema.
|
|
31
|
+
|
|
32
|
+
Uses psycopg3 (psycopg) for database access. Automatically creates
|
|
33
|
+
the required tables if they don't exist.
|
|
34
|
+
|
|
35
|
+
URI format: postgres://user:pass@host:port/database
|
|
36
|
+
postgresql://user:pass@host:port/database
|
|
37
|
+
|
|
38
|
+
Also supports standard PostgreSQL environment variables:
|
|
39
|
+
PGHOST, PGPORT, PGUSER, PGPASSWORD, PGDATABASE
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
# Table names
|
|
43
|
+
RUNS_TABLE = "kontra_runs"
|
|
44
|
+
RULE_RESULTS_TABLE = "kontra_rule_results"
|
|
45
|
+
ANNOTATIONS_TABLE = "kontra_annotations"
|
|
46
|
+
|
|
47
|
+
# Legacy table for migration detection
|
|
48
|
+
LEGACY_TABLE = "kontra_state"
|
|
49
|
+
|
|
50
|
+
CREATE_TABLES_SQL = """
|
|
51
|
+
-- Run-level metadata
|
|
52
|
+
CREATE TABLE IF NOT EXISTS kontra_runs (
|
|
53
|
+
id SERIAL PRIMARY KEY,
|
|
54
|
+
|
|
55
|
+
-- Identity
|
|
56
|
+
contract_fingerprint TEXT NOT NULL,
|
|
57
|
+
contract_name TEXT NOT NULL,
|
|
58
|
+
dataset_fingerprint TEXT,
|
|
59
|
+
dataset_name TEXT,
|
|
60
|
+
|
|
61
|
+
-- Timing
|
|
62
|
+
run_at TIMESTAMPTZ NOT NULL,
|
|
63
|
+
duration_ms INT,
|
|
64
|
+
|
|
65
|
+
-- Summary
|
|
66
|
+
passed BOOLEAN NOT NULL,
|
|
67
|
+
total_rows BIGINT,
|
|
68
|
+
total_rules INT NOT NULL,
|
|
69
|
+
passed_rules INT NOT NULL,
|
|
70
|
+
failed_rules INT NOT NULL,
|
|
71
|
+
|
|
72
|
+
-- By severity
|
|
73
|
+
blocking_failures INT NOT NULL DEFAULT 0,
|
|
74
|
+
warning_failures INT NOT NULL DEFAULT 0,
|
|
75
|
+
info_failures INT NOT NULL DEFAULT 0,
|
|
76
|
+
|
|
77
|
+
-- Execution metadata
|
|
78
|
+
execution_stats JSONB,
|
|
79
|
+
|
|
80
|
+
-- Schema version
|
|
81
|
+
schema_version TEXT NOT NULL DEFAULT '2.0',
|
|
82
|
+
engine_version TEXT
|
|
83
|
+
);
|
|
84
|
+
|
|
85
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_runs_contract_time
|
|
86
|
+
ON kontra_runs (contract_fingerprint, run_at DESC);
|
|
87
|
+
|
|
88
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_runs_passed
|
|
89
|
+
ON kontra_runs (contract_fingerprint, passed, run_at DESC);
|
|
90
|
+
|
|
91
|
+
-- Per-rule results
|
|
92
|
+
CREATE TABLE IF NOT EXISTS kontra_rule_results (
|
|
93
|
+
id SERIAL PRIMARY KEY,
|
|
94
|
+
run_id INT NOT NULL REFERENCES kontra_runs(id) ON DELETE CASCADE,
|
|
95
|
+
|
|
96
|
+
-- Rule identity
|
|
97
|
+
rule_id TEXT NOT NULL,
|
|
98
|
+
rule_name TEXT NOT NULL,
|
|
99
|
+
|
|
100
|
+
-- Result
|
|
101
|
+
passed BOOLEAN NOT NULL,
|
|
102
|
+
failed_count BIGINT NOT NULL DEFAULT 0,
|
|
103
|
+
|
|
104
|
+
-- Metadata
|
|
105
|
+
severity TEXT NOT NULL,
|
|
106
|
+
message TEXT,
|
|
107
|
+
column_name TEXT,
|
|
108
|
+
execution_source TEXT,
|
|
109
|
+
|
|
110
|
+
-- Variable structure
|
|
111
|
+
failure_mode TEXT,
|
|
112
|
+
details JSONB,
|
|
113
|
+
context JSONB,
|
|
114
|
+
samples JSONB
|
|
115
|
+
);
|
|
116
|
+
|
|
117
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_rule_results_run
|
|
118
|
+
ON kontra_rule_results (run_id);
|
|
119
|
+
|
|
120
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_rule_results_rule_id
|
|
121
|
+
ON kontra_rule_results (rule_id, run_id DESC);
|
|
122
|
+
|
|
123
|
+
-- Annotations (append-only)
|
|
124
|
+
CREATE TABLE IF NOT EXISTS kontra_annotations (
|
|
125
|
+
id SERIAL PRIMARY KEY,
|
|
126
|
+
|
|
127
|
+
-- What this annotates
|
|
128
|
+
run_id INT NOT NULL REFERENCES kontra_runs(id) ON DELETE CASCADE,
|
|
129
|
+
rule_result_id INT REFERENCES kontra_rule_results(id) ON DELETE CASCADE,
|
|
130
|
+
|
|
131
|
+
-- Who created it
|
|
132
|
+
actor_type TEXT NOT NULL,
|
|
133
|
+
actor_id TEXT NOT NULL,
|
|
134
|
+
|
|
135
|
+
-- What it says
|
|
136
|
+
annotation_type TEXT NOT NULL,
|
|
137
|
+
summary TEXT NOT NULL,
|
|
138
|
+
payload JSONB,
|
|
139
|
+
|
|
140
|
+
-- When
|
|
141
|
+
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
|
142
|
+
);
|
|
143
|
+
|
|
144
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_annotations_run
|
|
145
|
+
ON kontra_annotations (run_id);
|
|
146
|
+
|
|
147
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_annotations_rule
|
|
148
|
+
ON kontra_annotations (rule_result_id)
|
|
149
|
+
WHERE rule_result_id IS NOT NULL;
|
|
150
|
+
|
|
151
|
+
CREATE INDEX IF NOT EXISTS idx_kontra_annotations_time
|
|
152
|
+
ON kontra_annotations (created_at DESC);
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def __init__(self, uri: str):
|
|
156
|
+
"""
|
|
157
|
+
Initialize the PostgreSQL store.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
uri: PostgreSQL connection URI
|
|
161
|
+
|
|
162
|
+
The URI can be a full connection string or just the scheme,
|
|
163
|
+
with connection details from environment variables.
|
|
164
|
+
"""
|
|
165
|
+
self.uri = uri
|
|
166
|
+
self._conn_params = self._parse_connection_params(uri)
|
|
167
|
+
self._conn = None
|
|
168
|
+
self._tables_created = False
|
|
169
|
+
|
|
170
|
+
@staticmethod
|
|
171
|
+
def _parse_connection_params(uri: str) -> Dict[str, Any]:
|
|
172
|
+
"""
|
|
173
|
+
Parse PostgreSQL connection parameters from URI and environment.
|
|
174
|
+
|
|
175
|
+
Priority: URI values > DATABASE_URL > PGXXX env vars > defaults
|
|
176
|
+
"""
|
|
177
|
+
parsed = urlparse(uri)
|
|
178
|
+
|
|
179
|
+
# Start with defaults
|
|
180
|
+
params: Dict[str, Any] = {
|
|
181
|
+
"host": "localhost",
|
|
182
|
+
"port": 5432,
|
|
183
|
+
"user": os.getenv("USER", "postgres"),
|
|
184
|
+
"password": None,
|
|
185
|
+
"dbname": None,
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
# Layer 1: Standard PGXXX environment variables
|
|
189
|
+
if os.getenv("PGHOST"):
|
|
190
|
+
params["host"] = os.getenv("PGHOST")
|
|
191
|
+
if os.getenv("PGPORT"):
|
|
192
|
+
params["port"] = int(os.getenv("PGPORT"))
|
|
193
|
+
if os.getenv("PGUSER"):
|
|
194
|
+
params["user"] = os.getenv("PGUSER")
|
|
195
|
+
if os.getenv("PGPASSWORD"):
|
|
196
|
+
params["password"] = os.getenv("PGPASSWORD")
|
|
197
|
+
if os.getenv("PGDATABASE"):
|
|
198
|
+
params["dbname"] = os.getenv("PGDATABASE")
|
|
199
|
+
|
|
200
|
+
# Layer 2: DATABASE_URL (common in PaaS)
|
|
201
|
+
database_url = os.getenv("DATABASE_URL")
|
|
202
|
+
if database_url:
|
|
203
|
+
db_parsed = urlparse(database_url)
|
|
204
|
+
if db_parsed.hostname:
|
|
205
|
+
params["host"] = db_parsed.hostname
|
|
206
|
+
if db_parsed.port:
|
|
207
|
+
params["port"] = db_parsed.port
|
|
208
|
+
if db_parsed.username:
|
|
209
|
+
params["user"] = db_parsed.username
|
|
210
|
+
if db_parsed.password:
|
|
211
|
+
params["password"] = db_parsed.password
|
|
212
|
+
if db_parsed.path and db_parsed.path != "/":
|
|
213
|
+
params["dbname"] = db_parsed.path.strip("/").split("/")[0]
|
|
214
|
+
|
|
215
|
+
# Layer 3: Explicit URI values (highest priority)
|
|
216
|
+
if parsed.hostname:
|
|
217
|
+
params["host"] = parsed.hostname
|
|
218
|
+
if parsed.port:
|
|
219
|
+
params["port"] = parsed.port
|
|
220
|
+
if parsed.username:
|
|
221
|
+
params["user"] = parsed.username
|
|
222
|
+
if parsed.password:
|
|
223
|
+
params["password"] = parsed.password
|
|
224
|
+
if parsed.path and parsed.path != "/":
|
|
225
|
+
params["dbname"] = parsed.path.strip("/").split("/")[0]
|
|
226
|
+
|
|
227
|
+
# Parse query parameters
|
|
228
|
+
query_params = parse_qs(parsed.query)
|
|
229
|
+
for key, values in query_params.items():
|
|
230
|
+
if values:
|
|
231
|
+
params[key] = values[0]
|
|
232
|
+
|
|
233
|
+
return params
|
|
234
|
+
|
|
235
|
+
def _get_conn(self):
|
|
236
|
+
"""Get or create the database connection."""
|
|
237
|
+
if self._conn is not None:
|
|
238
|
+
return self._conn
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
import psycopg
|
|
242
|
+
except ImportError as e:
|
|
243
|
+
raise RuntimeError(
|
|
244
|
+
"PostgreSQL state backend requires 'psycopg'. "
|
|
245
|
+
"Install with: pip install psycopg[binary]"
|
|
246
|
+
) from e
|
|
247
|
+
|
|
248
|
+
# Build connection string
|
|
249
|
+
conn_str = f"host={self._conn_params['host']} port={self._conn_params['port']}"
|
|
250
|
+
if self._conn_params.get("user"):
|
|
251
|
+
conn_str += f" user={self._conn_params['user']}"
|
|
252
|
+
if self._conn_params.get("password"):
|
|
253
|
+
conn_str += f" password={self._conn_params['password']}"
|
|
254
|
+
if self._conn_params.get("dbname"):
|
|
255
|
+
conn_str += f" dbname={self._conn_params['dbname']}"
|
|
256
|
+
|
|
257
|
+
try:
|
|
258
|
+
self._conn = psycopg.connect(conn_str)
|
|
259
|
+
self._ensure_tables()
|
|
260
|
+
except Exception as e:
|
|
261
|
+
raise ConnectionError(
|
|
262
|
+
f"Failed to connect to PostgreSQL: {e}\n\n"
|
|
263
|
+
"Set environment variables:\n"
|
|
264
|
+
" export PGHOST=localhost\n"
|
|
265
|
+
" export PGPORT=5432\n"
|
|
266
|
+
" export PGUSER=your_user\n"
|
|
267
|
+
" export PGPASSWORD=your_password\n"
|
|
268
|
+
" export PGDATABASE=your_database\n\n"
|
|
269
|
+
"Or use full URI:\n"
|
|
270
|
+
" postgres://user:pass@host:5432/database"
|
|
271
|
+
) from e
|
|
272
|
+
|
|
273
|
+
return self._conn
|
|
274
|
+
|
|
275
|
+
def _ensure_tables(self) -> None:
|
|
276
|
+
"""Create the state tables if they don't exist."""
|
|
277
|
+
if self._tables_created:
|
|
278
|
+
return
|
|
279
|
+
|
|
280
|
+
conn = self._conn
|
|
281
|
+
with conn.cursor() as cur:
|
|
282
|
+
cur.execute(self.CREATE_TABLES_SQL)
|
|
283
|
+
conn.commit()
|
|
284
|
+
self._tables_created = True
|
|
285
|
+
|
|
286
|
+
def save(self, state: ValidationState) -> None:
|
|
287
|
+
"""Save a validation state to the database (normalized)."""
|
|
288
|
+
conn = self._get_conn()
|
|
289
|
+
|
|
290
|
+
# Insert run
|
|
291
|
+
run_sql = f"""
|
|
292
|
+
INSERT INTO {self.RUNS_TABLE} (
|
|
293
|
+
contract_fingerprint,
|
|
294
|
+
contract_name,
|
|
295
|
+
dataset_fingerprint,
|
|
296
|
+
dataset_name,
|
|
297
|
+
run_at,
|
|
298
|
+
duration_ms,
|
|
299
|
+
passed,
|
|
300
|
+
total_rows,
|
|
301
|
+
total_rules,
|
|
302
|
+
passed_rules,
|
|
303
|
+
failed_rules,
|
|
304
|
+
blocking_failures,
|
|
305
|
+
warning_failures,
|
|
306
|
+
info_failures,
|
|
307
|
+
schema_version,
|
|
308
|
+
engine_version
|
|
309
|
+
) VALUES (
|
|
310
|
+
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
|
|
311
|
+
) RETURNING id
|
|
312
|
+
"""
|
|
313
|
+
|
|
314
|
+
# Insert rule result
|
|
315
|
+
rule_sql = f"""
|
|
316
|
+
INSERT INTO {self.RULE_RESULTS_TABLE} (
|
|
317
|
+
run_id,
|
|
318
|
+
rule_id,
|
|
319
|
+
rule_name,
|
|
320
|
+
passed,
|
|
321
|
+
failed_count,
|
|
322
|
+
severity,
|
|
323
|
+
message,
|
|
324
|
+
column_name,
|
|
325
|
+
execution_source,
|
|
326
|
+
failure_mode,
|
|
327
|
+
details,
|
|
328
|
+
context,
|
|
329
|
+
samples
|
|
330
|
+
) VALUES (
|
|
331
|
+
%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s
|
|
332
|
+
) RETURNING id
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
try:
|
|
336
|
+
with conn.cursor() as cur:
|
|
337
|
+
# Insert run
|
|
338
|
+
cur.execute(run_sql, (
|
|
339
|
+
state.contract_fingerprint,
|
|
340
|
+
state.contract_name,
|
|
341
|
+
state.dataset_fingerprint,
|
|
342
|
+
state.dataset_uri,
|
|
343
|
+
state.run_at,
|
|
344
|
+
state.duration_ms,
|
|
345
|
+
state.summary.passed,
|
|
346
|
+
state.summary.row_count,
|
|
347
|
+
state.summary.total_rules,
|
|
348
|
+
state.summary.passed_rules,
|
|
349
|
+
state.summary.failed_rules,
|
|
350
|
+
state.summary.blocking_failures,
|
|
351
|
+
state.summary.warning_failures,
|
|
352
|
+
state.summary.info_failures,
|
|
353
|
+
state.schema_version,
|
|
354
|
+
state.engine_version,
|
|
355
|
+
))
|
|
356
|
+
run_id = cur.fetchone()[0]
|
|
357
|
+
|
|
358
|
+
# Insert rule results
|
|
359
|
+
for rule in state.rules:
|
|
360
|
+
cur.execute(rule_sql, (
|
|
361
|
+
run_id,
|
|
362
|
+
rule.rule_id,
|
|
363
|
+
rule.rule_name,
|
|
364
|
+
rule.passed,
|
|
365
|
+
rule.failed_count,
|
|
366
|
+
rule.severity,
|
|
367
|
+
rule.message,
|
|
368
|
+
rule.column,
|
|
369
|
+
rule.execution_source,
|
|
370
|
+
rule.failure_mode,
|
|
371
|
+
json.dumps(rule.details) if rule.details else None,
|
|
372
|
+
None, # context - not stored in RuleState currently
|
|
373
|
+
None, # samples - not stored in state currently
|
|
374
|
+
))
|
|
375
|
+
|
|
376
|
+
conn.commit()
|
|
377
|
+
|
|
378
|
+
# Update state with assigned ID
|
|
379
|
+
state.id = run_id
|
|
380
|
+
|
|
381
|
+
except Exception as e:
|
|
382
|
+
conn.rollback()
|
|
383
|
+
raise IOError(f"Failed to save state to PostgreSQL: {e}") from e
|
|
384
|
+
|
|
385
|
+
def _build_state_from_rows(
|
|
386
|
+
self,
|
|
387
|
+
run_row: tuple,
|
|
388
|
+
rule_rows: List[tuple],
|
|
389
|
+
) -> ValidationState:
|
|
390
|
+
"""Build a ValidationState from database rows."""
|
|
391
|
+
# Parse run row
|
|
392
|
+
(
|
|
393
|
+
run_id, contract_fingerprint, contract_name, dataset_fingerprint,
|
|
394
|
+
dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
|
|
395
|
+
passed_rules, failed_rules, blocking_failures, warning_failures,
|
|
396
|
+
info_failures, execution_stats, schema_version, engine_version
|
|
397
|
+
) = run_row
|
|
398
|
+
|
|
399
|
+
# Build summary
|
|
400
|
+
summary = StateSummary(
|
|
401
|
+
passed=passed,
|
|
402
|
+
total_rules=total_rules,
|
|
403
|
+
passed_rules=passed_rules,
|
|
404
|
+
failed_rules=failed_rules,
|
|
405
|
+
row_count=total_rows,
|
|
406
|
+
blocking_failures=blocking_failures,
|
|
407
|
+
warning_failures=warning_failures,
|
|
408
|
+
info_failures=info_failures,
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
# Build rules
|
|
412
|
+
rules = []
|
|
413
|
+
for rule_row in rule_rows:
|
|
414
|
+
(
|
|
415
|
+
rule_result_id, _, rule_id, rule_name, rule_passed,
|
|
416
|
+
failed_count, severity, message, column_name, execution_source,
|
|
417
|
+
failure_mode, details, context, samples
|
|
418
|
+
) = rule_row
|
|
419
|
+
|
|
420
|
+
rule = RuleState(
|
|
421
|
+
rule_id=rule_id,
|
|
422
|
+
rule_name=rule_name,
|
|
423
|
+
passed=rule_passed,
|
|
424
|
+
failed_count=failed_count,
|
|
425
|
+
execution_source=execution_source or "unknown",
|
|
426
|
+
severity=severity,
|
|
427
|
+
failure_mode=failure_mode,
|
|
428
|
+
details=details,
|
|
429
|
+
message=message,
|
|
430
|
+
column=column_name,
|
|
431
|
+
id=rule_result_id,
|
|
432
|
+
)
|
|
433
|
+
rules.append(rule)
|
|
434
|
+
|
|
435
|
+
return ValidationState(
|
|
436
|
+
id=run_id,
|
|
437
|
+
contract_fingerprint=contract_fingerprint,
|
|
438
|
+
dataset_fingerprint=dataset_fingerprint,
|
|
439
|
+
contract_name=contract_name,
|
|
440
|
+
dataset_uri=dataset_name or "",
|
|
441
|
+
run_at=run_at,
|
|
442
|
+
summary=summary,
|
|
443
|
+
rules=rules,
|
|
444
|
+
schema_version=schema_version or "2.0",
|
|
445
|
+
engine_version=engine_version or "unknown",
|
|
446
|
+
duration_ms=duration_ms,
|
|
447
|
+
)
|
|
448
|
+
|
|
449
|
+
def get_latest(self, contract_fingerprint: str) -> Optional[ValidationState]:
|
|
450
|
+
"""Get the most recent state for a contract."""
|
|
451
|
+
conn = self._get_conn()
|
|
452
|
+
|
|
453
|
+
run_sql = f"""
|
|
454
|
+
SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
|
|
455
|
+
dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
|
|
456
|
+
passed_rules, failed_rules, blocking_failures, warning_failures,
|
|
457
|
+
info_failures, execution_stats, schema_version, engine_version
|
|
458
|
+
FROM {self.RUNS_TABLE}
|
|
459
|
+
WHERE contract_fingerprint = %s
|
|
460
|
+
ORDER BY run_at DESC
|
|
461
|
+
LIMIT 1
|
|
462
|
+
"""
|
|
463
|
+
|
|
464
|
+
rule_sql = f"""
|
|
465
|
+
SELECT id, run_id, rule_id, rule_name, passed, failed_count,
|
|
466
|
+
severity, message, column_name, execution_source,
|
|
467
|
+
failure_mode, details, context, samples
|
|
468
|
+
FROM {self.RULE_RESULTS_TABLE}
|
|
469
|
+
WHERE run_id = %s
|
|
470
|
+
ORDER BY id
|
|
471
|
+
"""
|
|
472
|
+
|
|
473
|
+
try:
|
|
474
|
+
with conn.cursor() as cur:
|
|
475
|
+
cur.execute(run_sql, (contract_fingerprint,))
|
|
476
|
+
run_row = cur.fetchone()
|
|
477
|
+
if not run_row:
|
|
478
|
+
return None
|
|
479
|
+
|
|
480
|
+
run_id = run_row[0]
|
|
481
|
+
cur.execute(rule_sql, (run_id,))
|
|
482
|
+
rule_rows = cur.fetchall()
|
|
483
|
+
|
|
484
|
+
return self._build_state_from_rows(run_row, rule_rows)
|
|
485
|
+
except Exception:
|
|
486
|
+
return None
|
|
487
|
+
|
|
488
|
+
def get_history(
|
|
489
|
+
self,
|
|
490
|
+
contract_fingerprint: str,
|
|
491
|
+
limit: int = 10,
|
|
492
|
+
) -> List[ValidationState]:
|
|
493
|
+
"""Get recent history for a contract, newest first."""
|
|
494
|
+
conn = self._get_conn()
|
|
495
|
+
|
|
496
|
+
run_sql = f"""
|
|
497
|
+
SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
|
|
498
|
+
dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
|
|
499
|
+
passed_rules, failed_rules, blocking_failures, warning_failures,
|
|
500
|
+
info_failures, execution_stats, schema_version, engine_version
|
|
501
|
+
FROM {self.RUNS_TABLE}
|
|
502
|
+
WHERE contract_fingerprint = %s
|
|
503
|
+
ORDER BY run_at DESC
|
|
504
|
+
LIMIT %s
|
|
505
|
+
"""
|
|
506
|
+
|
|
507
|
+
rule_sql = f"""
|
|
508
|
+
SELECT id, run_id, rule_id, rule_name, passed, failed_count,
|
|
509
|
+
severity, message, column_name, execution_source,
|
|
510
|
+
failure_mode, details, context, samples
|
|
511
|
+
FROM {self.RULE_RESULTS_TABLE}
|
|
512
|
+
WHERE run_id = ANY(%s)
|
|
513
|
+
ORDER BY run_id, id
|
|
514
|
+
"""
|
|
515
|
+
|
|
516
|
+
try:
|
|
517
|
+
with conn.cursor() as cur:
|
|
518
|
+
cur.execute(run_sql, (contract_fingerprint, limit))
|
|
519
|
+
run_rows = cur.fetchall()
|
|
520
|
+
if not run_rows:
|
|
521
|
+
return []
|
|
522
|
+
|
|
523
|
+
# Get all rule results in one query
|
|
524
|
+
run_ids = [row[0] for row in run_rows]
|
|
525
|
+
cur.execute(rule_sql, (run_ids,))
|
|
526
|
+
all_rule_rows = cur.fetchall()
|
|
527
|
+
|
|
528
|
+
# Group rule rows by run_id
|
|
529
|
+
rules_by_run: Dict[int, List[tuple]] = {}
|
|
530
|
+
for rule_row in all_rule_rows:
|
|
531
|
+
run_id = rule_row[1]
|
|
532
|
+
rules_by_run.setdefault(run_id, []).append(rule_row)
|
|
533
|
+
|
|
534
|
+
# Build states
|
|
535
|
+
states = []
|
|
536
|
+
for run_row in run_rows:
|
|
537
|
+
run_id = run_row[0]
|
|
538
|
+
rule_rows = rules_by_run.get(run_id, [])
|
|
539
|
+
state = self._build_state_from_rows(run_row, rule_rows)
|
|
540
|
+
states.append(state)
|
|
541
|
+
|
|
542
|
+
return states
|
|
543
|
+
except Exception:
|
|
544
|
+
return []
|
|
545
|
+
|
|
546
|
+
def get_at(
|
|
547
|
+
self,
|
|
548
|
+
contract_fingerprint: str,
|
|
549
|
+
timestamp: datetime,
|
|
550
|
+
) -> Optional[ValidationState]:
|
|
551
|
+
"""Get state at or before a specific timestamp."""
|
|
552
|
+
conn = self._get_conn()
|
|
553
|
+
|
|
554
|
+
run_sql = f"""
|
|
555
|
+
SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
|
|
556
|
+
dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
|
|
557
|
+
passed_rules, failed_rules, blocking_failures, warning_failures,
|
|
558
|
+
info_failures, execution_stats, schema_version, engine_version
|
|
559
|
+
FROM {self.RUNS_TABLE}
|
|
560
|
+
WHERE contract_fingerprint = %s AND run_at <= %s
|
|
561
|
+
ORDER BY run_at DESC
|
|
562
|
+
LIMIT 1
|
|
563
|
+
"""
|
|
564
|
+
|
|
565
|
+
rule_sql = f"""
|
|
566
|
+
SELECT id, run_id, rule_id, rule_name, passed, failed_count,
|
|
567
|
+
severity, message, column_name, execution_source,
|
|
568
|
+
failure_mode, details, context, samples
|
|
569
|
+
FROM {self.RULE_RESULTS_TABLE}
|
|
570
|
+
WHERE run_id = %s
|
|
571
|
+
ORDER BY id
|
|
572
|
+
"""
|
|
573
|
+
|
|
574
|
+
try:
|
|
575
|
+
with conn.cursor() as cur:
|
|
576
|
+
cur.execute(run_sql, (contract_fingerprint, timestamp))
|
|
577
|
+
run_row = cur.fetchone()
|
|
578
|
+
if not run_row:
|
|
579
|
+
return None
|
|
580
|
+
|
|
581
|
+
run_id = run_row[0]
|
|
582
|
+
cur.execute(rule_sql, (run_id,))
|
|
583
|
+
rule_rows = cur.fetchall()
|
|
584
|
+
|
|
585
|
+
return self._build_state_from_rows(run_row, rule_rows)
|
|
586
|
+
except Exception:
|
|
587
|
+
return None
|
|
588
|
+
|
|
589
|
+
def delete_old(
|
|
590
|
+
self,
|
|
591
|
+
contract_fingerprint: str,
|
|
592
|
+
keep_count: int = 100,
|
|
593
|
+
) -> int:
|
|
594
|
+
"""Delete old states, keeping the most recent ones."""
|
|
595
|
+
conn = self._get_conn()
|
|
596
|
+
|
|
597
|
+
# Delete runs not in the top keep_count (cascade deletes rule_results)
|
|
598
|
+
sql_delete = f"""
|
|
599
|
+
DELETE FROM {self.RUNS_TABLE}
|
|
600
|
+
WHERE contract_fingerprint = %s
|
|
601
|
+
AND id NOT IN (
|
|
602
|
+
SELECT id FROM {self.RUNS_TABLE}
|
|
603
|
+
WHERE contract_fingerprint = %s
|
|
604
|
+
ORDER BY run_at DESC
|
|
605
|
+
LIMIT %s
|
|
606
|
+
)
|
|
607
|
+
"""
|
|
608
|
+
|
|
609
|
+
try:
|
|
610
|
+
with conn.cursor() as cur:
|
|
611
|
+
cur.execute(sql_delete, (contract_fingerprint, contract_fingerprint, keep_count))
|
|
612
|
+
deleted = cur.rowcount
|
|
613
|
+
conn.commit()
|
|
614
|
+
return deleted
|
|
615
|
+
except Exception:
|
|
616
|
+
conn.rollback()
|
|
617
|
+
return 0
|
|
618
|
+
|
|
619
|
+
def list_contracts(self) -> List[str]:
|
|
620
|
+
"""List all contract fingerprints with stored state."""
|
|
621
|
+
conn = self._get_conn()
|
|
622
|
+
|
|
623
|
+
sql = f"""
|
|
624
|
+
SELECT DISTINCT contract_fingerprint FROM {self.RUNS_TABLE}
|
|
625
|
+
ORDER BY contract_fingerprint
|
|
626
|
+
"""
|
|
627
|
+
|
|
628
|
+
try:
|
|
629
|
+
with conn.cursor() as cur:
|
|
630
|
+
cur.execute(sql)
|
|
631
|
+
rows = cur.fetchall()
|
|
632
|
+
return [row[0] for row in rows]
|
|
633
|
+
except Exception:
|
|
634
|
+
return []
|
|
635
|
+
|
|
636
|
+
def clear(self, contract_fingerprint: Optional[str] = None) -> int:
|
|
637
|
+
"""
|
|
638
|
+
Clear stored states.
|
|
639
|
+
|
|
640
|
+
Args:
|
|
641
|
+
contract_fingerprint: If provided, only clear this contract's states.
|
|
642
|
+
If None, clear all states.
|
|
643
|
+
|
|
644
|
+
Returns:
|
|
645
|
+
Number of run rows deleted (rule_results cascade).
|
|
646
|
+
"""
|
|
647
|
+
conn = self._get_conn()
|
|
648
|
+
|
|
649
|
+
try:
|
|
650
|
+
with conn.cursor() as cur:
|
|
651
|
+
if contract_fingerprint:
|
|
652
|
+
cur.execute(
|
|
653
|
+
f"DELETE FROM {self.RUNS_TABLE} WHERE contract_fingerprint = %s",
|
|
654
|
+
(contract_fingerprint,)
|
|
655
|
+
)
|
|
656
|
+
else:
|
|
657
|
+
cur.execute(f"DELETE FROM {self.RUNS_TABLE}")
|
|
658
|
+
deleted = cur.rowcount
|
|
659
|
+
conn.commit()
|
|
660
|
+
return deleted
|
|
661
|
+
except Exception:
|
|
662
|
+
conn.rollback()
|
|
663
|
+
return 0
|
|
664
|
+
|
|
665
|
+
# -------------------------------------------------------------------------
|
|
666
|
+
# Annotation Methods
|
|
667
|
+
# -------------------------------------------------------------------------
|
|
668
|
+
|
|
669
|
+
def save_annotation(self, annotation: Annotation) -> int:
|
|
670
|
+
"""Save an annotation (append-only)."""
|
|
671
|
+
conn = self._get_conn()
|
|
672
|
+
|
|
673
|
+
sql = f"""
|
|
674
|
+
INSERT INTO {self.ANNOTATIONS_TABLE} (
|
|
675
|
+
run_id, rule_result_id, actor_type, actor_id,
|
|
676
|
+
annotation_type, summary, payload, created_at
|
|
677
|
+
) VALUES (
|
|
678
|
+
%s, %s, %s, %s, %s, %s, %s, %s
|
|
679
|
+
) RETURNING id
|
|
680
|
+
"""
|
|
681
|
+
|
|
682
|
+
try:
|
|
683
|
+
with conn.cursor() as cur:
|
|
684
|
+
cur.execute(sql, (
|
|
685
|
+
annotation.run_id,
|
|
686
|
+
annotation.rule_result_id,
|
|
687
|
+
annotation.actor_type,
|
|
688
|
+
annotation.actor_id,
|
|
689
|
+
annotation.annotation_type,
|
|
690
|
+
annotation.summary,
|
|
691
|
+
json.dumps(annotation.payload) if annotation.payload else None,
|
|
692
|
+
annotation.created_at or datetime.now(timezone.utc),
|
|
693
|
+
))
|
|
694
|
+
annotation_id = cur.fetchone()[0]
|
|
695
|
+
conn.commit()
|
|
696
|
+
|
|
697
|
+
annotation.id = annotation_id
|
|
698
|
+
return annotation_id
|
|
699
|
+
except Exception as e:
|
|
700
|
+
conn.rollback()
|
|
701
|
+
raise IOError(f"Failed to save annotation: {e}") from e
|
|
702
|
+
|
|
703
|
+
def get_annotations(
|
|
704
|
+
self,
|
|
705
|
+
run_id: int,
|
|
706
|
+
rule_result_id: Optional[int] = None,
|
|
707
|
+
) -> List[Annotation]:
|
|
708
|
+
"""Get annotations for a run or specific rule result."""
|
|
709
|
+
conn = self._get_conn()
|
|
710
|
+
|
|
711
|
+
if rule_result_id is not None:
|
|
712
|
+
sql = f"""
|
|
713
|
+
SELECT id, run_id, rule_result_id, actor_type, actor_id,
|
|
714
|
+
annotation_type, summary, payload, created_at
|
|
715
|
+
FROM {self.ANNOTATIONS_TABLE}
|
|
716
|
+
WHERE run_id = %s AND rule_result_id = %s
|
|
717
|
+
ORDER BY created_at DESC
|
|
718
|
+
"""
|
|
719
|
+
params = (run_id, rule_result_id)
|
|
720
|
+
else:
|
|
721
|
+
sql = f"""
|
|
722
|
+
SELECT id, run_id, rule_result_id, actor_type, actor_id,
|
|
723
|
+
annotation_type, summary, payload, created_at
|
|
724
|
+
FROM {self.ANNOTATIONS_TABLE}
|
|
725
|
+
WHERE run_id = %s
|
|
726
|
+
ORDER BY created_at DESC
|
|
727
|
+
"""
|
|
728
|
+
params = (run_id,)
|
|
729
|
+
|
|
730
|
+
try:
|
|
731
|
+
with conn.cursor() as cur:
|
|
732
|
+
cur.execute(sql, params)
|
|
733
|
+
rows = cur.fetchall()
|
|
734
|
+
|
|
735
|
+
annotations = []
|
|
736
|
+
for row in rows:
|
|
737
|
+
(
|
|
738
|
+
ann_id, run_id, rule_result_id, actor_type, actor_id,
|
|
739
|
+
annotation_type, summary, payload, created_at
|
|
740
|
+
) = row
|
|
741
|
+
annotation = Annotation(
|
|
742
|
+
id=ann_id,
|
|
743
|
+
run_id=run_id,
|
|
744
|
+
rule_result_id=rule_result_id,
|
|
745
|
+
actor_type=actor_type,
|
|
746
|
+
actor_id=actor_id,
|
|
747
|
+
annotation_type=annotation_type,
|
|
748
|
+
summary=summary,
|
|
749
|
+
payload=payload,
|
|
750
|
+
created_at=created_at,
|
|
751
|
+
)
|
|
752
|
+
annotations.append(annotation)
|
|
753
|
+
return annotations
|
|
754
|
+
except Exception:
|
|
755
|
+
return []
|
|
756
|
+
|
|
757
|
+
def get_annotations_for_contract(
|
|
758
|
+
self,
|
|
759
|
+
contract_fingerprint: str,
|
|
760
|
+
rule_id: Optional[str] = None,
|
|
761
|
+
annotation_type: Optional[str] = None,
|
|
762
|
+
limit: int = 20,
|
|
763
|
+
) -> List[Annotation]:
|
|
764
|
+
"""Get annotations across all runs for a contract."""
|
|
765
|
+
conn = self._get_conn()
|
|
766
|
+
|
|
767
|
+
# Build the query with JOINs to get rule_id
|
|
768
|
+
# We join runs to filter by contract, and rule_results to get rule_id
|
|
769
|
+
sql = f"""
|
|
770
|
+
SELECT
|
|
771
|
+
a.id, a.run_id, a.rule_result_id, a.actor_type, a.actor_id,
|
|
772
|
+
a.annotation_type, a.summary, a.payload, a.created_at,
|
|
773
|
+
rr.rule_id
|
|
774
|
+
FROM {self.ANNOTATIONS_TABLE} a
|
|
775
|
+
JOIN {self.RUNS_TABLE} r ON a.run_id = r.id
|
|
776
|
+
LEFT JOIN {self.RULE_RESULTS_TABLE} rr ON a.rule_result_id = rr.id
|
|
777
|
+
WHERE r.contract_fingerprint = %s
|
|
778
|
+
"""
|
|
779
|
+
params: List[Any] = [contract_fingerprint]
|
|
780
|
+
|
|
781
|
+
if rule_id is not None:
|
|
782
|
+
sql += " AND rr.rule_id = %s"
|
|
783
|
+
params.append(rule_id)
|
|
784
|
+
|
|
785
|
+
if annotation_type is not None:
|
|
786
|
+
sql += " AND a.annotation_type = %s"
|
|
787
|
+
params.append(annotation_type)
|
|
788
|
+
|
|
789
|
+
sql += " ORDER BY a.created_at DESC LIMIT %s"
|
|
790
|
+
params.append(limit)
|
|
791
|
+
|
|
792
|
+
try:
|
|
793
|
+
with conn.cursor() as cur:
|
|
794
|
+
cur.execute(sql, tuple(params))
|
|
795
|
+
rows = cur.fetchall()
|
|
796
|
+
|
|
797
|
+
annotations = []
|
|
798
|
+
for row in rows:
|
|
799
|
+
(
|
|
800
|
+
ann_id, run_id, rule_result_id, actor_type, actor_id,
|
|
801
|
+
ann_type, summary, payload, created_at, rule_id_val
|
|
802
|
+
) = row
|
|
803
|
+
annotation = Annotation(
|
|
804
|
+
id=ann_id,
|
|
805
|
+
run_id=run_id,
|
|
806
|
+
rule_result_id=rule_result_id,
|
|
807
|
+
rule_id=rule_id_val,
|
|
808
|
+
actor_type=actor_type,
|
|
809
|
+
actor_id=actor_id,
|
|
810
|
+
annotation_type=ann_type,
|
|
811
|
+
summary=summary,
|
|
812
|
+
payload=payload,
|
|
813
|
+
created_at=created_at,
|
|
814
|
+
)
|
|
815
|
+
annotations.append(annotation)
|
|
816
|
+
return annotations
|
|
817
|
+
except Exception:
|
|
818
|
+
return []
|
|
819
|
+
|
|
820
|
+
def get_run_with_annotations(
|
|
821
|
+
self,
|
|
822
|
+
contract_fingerprint: str,
|
|
823
|
+
run_id: Optional[int] = None,
|
|
824
|
+
) -> Optional[ValidationState]:
|
|
825
|
+
"""Get a validation state with its annotations loaded."""
|
|
826
|
+
conn = self._get_conn()
|
|
827
|
+
|
|
828
|
+
# Get the run
|
|
829
|
+
if run_id is not None:
|
|
830
|
+
run_sql = f"""
|
|
831
|
+
SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
|
|
832
|
+
dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
|
|
833
|
+
passed_rules, failed_rules, blocking_failures, warning_failures,
|
|
834
|
+
info_failures, execution_stats, schema_version, engine_version
|
|
835
|
+
FROM {self.RUNS_TABLE}
|
|
836
|
+
WHERE id = %s AND contract_fingerprint = %s
|
|
837
|
+
"""
|
|
838
|
+
run_params = (run_id, contract_fingerprint)
|
|
839
|
+
else:
|
|
840
|
+
run_sql = f"""
|
|
841
|
+
SELECT id, contract_fingerprint, contract_name, dataset_fingerprint,
|
|
842
|
+
dataset_name, run_at, duration_ms, passed, total_rows, total_rules,
|
|
843
|
+
passed_rules, failed_rules, blocking_failures, warning_failures,
|
|
844
|
+
info_failures, execution_stats, schema_version, engine_version
|
|
845
|
+
FROM {self.RUNS_TABLE}
|
|
846
|
+
WHERE contract_fingerprint = %s
|
|
847
|
+
ORDER BY run_at DESC
|
|
848
|
+
LIMIT 1
|
|
849
|
+
"""
|
|
850
|
+
run_params = (contract_fingerprint,)
|
|
851
|
+
|
|
852
|
+
rule_sql = f"""
|
|
853
|
+
SELECT id, run_id, rule_id, rule_name, passed, failed_count,
|
|
854
|
+
severity, message, column_name, execution_source,
|
|
855
|
+
failure_mode, details, context, samples
|
|
856
|
+
FROM {self.RULE_RESULTS_TABLE}
|
|
857
|
+
WHERE run_id = %s
|
|
858
|
+
ORDER BY id
|
|
859
|
+
"""
|
|
860
|
+
|
|
861
|
+
ann_sql = f"""
|
|
862
|
+
SELECT id, run_id, rule_result_id, actor_type, actor_id,
|
|
863
|
+
annotation_type, summary, payload, created_at
|
|
864
|
+
FROM {self.ANNOTATIONS_TABLE}
|
|
865
|
+
WHERE run_id = %s
|
|
866
|
+
ORDER BY created_at DESC
|
|
867
|
+
"""
|
|
868
|
+
|
|
869
|
+
try:
|
|
870
|
+
with conn.cursor() as cur:
|
|
871
|
+
cur.execute(run_sql, run_params)
|
|
872
|
+
run_row = cur.fetchone()
|
|
873
|
+
if not run_row:
|
|
874
|
+
return None
|
|
875
|
+
|
|
876
|
+
actual_run_id = run_row[0]
|
|
877
|
+
|
|
878
|
+
# Get rules
|
|
879
|
+
cur.execute(rule_sql, (actual_run_id,))
|
|
880
|
+
rule_rows = cur.fetchall()
|
|
881
|
+
|
|
882
|
+
# Get annotations
|
|
883
|
+
cur.execute(ann_sql, (actual_run_id,))
|
|
884
|
+
ann_rows = cur.fetchall()
|
|
885
|
+
|
|
886
|
+
# Build state
|
|
887
|
+
state = self._build_state_from_rows(run_row, rule_rows)
|
|
888
|
+
|
|
889
|
+
# Build annotations list
|
|
890
|
+
annotations = []
|
|
891
|
+
for row in ann_rows:
|
|
892
|
+
(
|
|
893
|
+
ann_id, run_id_val, rule_result_id, actor_type, actor_id,
|
|
894
|
+
annotation_type, summary, payload, created_at
|
|
895
|
+
) = row
|
|
896
|
+
annotations.append(Annotation(
|
|
897
|
+
id=ann_id,
|
|
898
|
+
run_id=run_id_val,
|
|
899
|
+
rule_result_id=rule_result_id,
|
|
900
|
+
actor_type=actor_type,
|
|
901
|
+
actor_id=actor_id,
|
|
902
|
+
annotation_type=annotation_type,
|
|
903
|
+
summary=summary,
|
|
904
|
+
payload=payload,
|
|
905
|
+
created_at=created_at,
|
|
906
|
+
))
|
|
907
|
+
|
|
908
|
+
self._attach_annotations_to_state(state, annotations)
|
|
909
|
+
return state
|
|
910
|
+
except Exception:
|
|
911
|
+
return None
|
|
912
|
+
|
|
913
|
+
def get_history_with_annotations(
|
|
914
|
+
self,
|
|
915
|
+
contract_fingerprint: str,
|
|
916
|
+
limit: int = 10,
|
|
917
|
+
) -> List[ValidationState]:
|
|
918
|
+
"""Get recent history with annotations loaded."""
|
|
919
|
+
# For efficiency, we load history without annotations first,
|
|
920
|
+
# then load annotations in batch
|
|
921
|
+
states = self.get_history(contract_fingerprint, limit=limit)
|
|
922
|
+
if not states:
|
|
923
|
+
return []
|
|
924
|
+
|
|
925
|
+
conn = self._get_conn()
|
|
926
|
+
run_ids = [s.id for s in states if s.id is not None]
|
|
927
|
+
|
|
928
|
+
if not run_ids:
|
|
929
|
+
# No IDs, just return empty annotations
|
|
930
|
+
for state in states:
|
|
931
|
+
state.annotations = []
|
|
932
|
+
for rule in state.rules:
|
|
933
|
+
rule.annotations = []
|
|
934
|
+
return states
|
|
935
|
+
|
|
936
|
+
ann_sql = f"""
|
|
937
|
+
SELECT id, run_id, rule_result_id, actor_type, actor_id,
|
|
938
|
+
annotation_type, summary, payload, created_at
|
|
939
|
+
FROM {self.ANNOTATIONS_TABLE}
|
|
940
|
+
WHERE run_id = ANY(%s)
|
|
941
|
+
ORDER BY created_at DESC
|
|
942
|
+
"""
|
|
943
|
+
|
|
944
|
+
try:
|
|
945
|
+
with conn.cursor() as cur:
|
|
946
|
+
cur.execute(ann_sql, (run_ids,))
|
|
947
|
+
ann_rows = cur.fetchall()
|
|
948
|
+
|
|
949
|
+
# Build annotations index
|
|
950
|
+
# Key: (run_id, rule_result_id or None)
|
|
951
|
+
annotations_index: Dict[int, Dict[Optional[int], List[Annotation]]] = {}
|
|
952
|
+
|
|
953
|
+
for row in ann_rows:
|
|
954
|
+
(
|
|
955
|
+
ann_id, run_id, rule_result_id, actor_type, actor_id,
|
|
956
|
+
annotation_type, summary, payload, created_at
|
|
957
|
+
) = row
|
|
958
|
+
annotation = Annotation(
|
|
959
|
+
id=ann_id,
|
|
960
|
+
run_id=run_id,
|
|
961
|
+
rule_result_id=rule_result_id,
|
|
962
|
+
actor_type=actor_type,
|
|
963
|
+
actor_id=actor_id,
|
|
964
|
+
annotation_type=annotation_type,
|
|
965
|
+
summary=summary,
|
|
966
|
+
payload=payload,
|
|
967
|
+
created_at=created_at,
|
|
968
|
+
)
|
|
969
|
+
|
|
970
|
+
if run_id not in annotations_index:
|
|
971
|
+
annotations_index[run_id] = {}
|
|
972
|
+
annotations_index[run_id].setdefault(rule_result_id, []).append(annotation)
|
|
973
|
+
|
|
974
|
+
# Attach to states
|
|
975
|
+
for state in states:
|
|
976
|
+
if state.id is not None and state.id in annotations_index:
|
|
977
|
+
run_anns = annotations_index[state.id]
|
|
978
|
+
state.annotations = run_anns.get(None, [])
|
|
979
|
+
for rule in state.rules:
|
|
980
|
+
if rule.id is not None:
|
|
981
|
+
rule.annotations = run_anns.get(rule.id, [])
|
|
982
|
+
else:
|
|
983
|
+
rule.annotations = []
|
|
984
|
+
else:
|
|
985
|
+
state.annotations = []
|
|
986
|
+
for rule in state.rules:
|
|
987
|
+
rule.annotations = []
|
|
988
|
+
|
|
989
|
+
return states
|
|
990
|
+
except Exception:
|
|
991
|
+
# On error, return states without annotations
|
|
992
|
+
for state in states:
|
|
993
|
+
state.annotations = []
|
|
994
|
+
for rule in state.rules:
|
|
995
|
+
rule.annotations = []
|
|
996
|
+
return states
|
|
997
|
+
|
|
998
|
+
def close(self) -> None:
|
|
999
|
+
"""Close the database connection."""
|
|
1000
|
+
if self._conn is not None:
|
|
1001
|
+
self._conn.close()
|
|
1002
|
+
self._conn = None
|
|
1003
|
+
|
|
1004
|
+
def __repr__(self) -> str:
|
|
1005
|
+
host = self._conn_params.get("host", "?")
|
|
1006
|
+
dbname = self._conn_params.get("dbname", "?")
|
|
1007
|
+
return f"PostgresStore(host={host}, dbname={dbname})"
|
|
1008
|
+
|
|
1009
|
+
def __del__(self):
|
|
1010
|
+
self.close()
|