kontra 0.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kontra/__init__.py +1871 -0
- kontra/api/__init__.py +22 -0
- kontra/api/compare.py +340 -0
- kontra/api/decorators.py +153 -0
- kontra/api/results.py +2121 -0
- kontra/api/rules.py +681 -0
- kontra/cli/__init__.py +0 -0
- kontra/cli/commands/__init__.py +1 -0
- kontra/cli/commands/config.py +153 -0
- kontra/cli/commands/diff.py +450 -0
- kontra/cli/commands/history.py +196 -0
- kontra/cli/commands/profile.py +289 -0
- kontra/cli/commands/validate.py +468 -0
- kontra/cli/constants.py +6 -0
- kontra/cli/main.py +48 -0
- kontra/cli/renderers.py +304 -0
- kontra/cli/utils.py +28 -0
- kontra/config/__init__.py +34 -0
- kontra/config/loader.py +127 -0
- kontra/config/models.py +49 -0
- kontra/config/settings.py +797 -0
- kontra/connectors/__init__.py +0 -0
- kontra/connectors/db_utils.py +251 -0
- kontra/connectors/detection.py +323 -0
- kontra/connectors/handle.py +368 -0
- kontra/connectors/postgres.py +127 -0
- kontra/connectors/sqlserver.py +226 -0
- kontra/engine/__init__.py +0 -0
- kontra/engine/backends/duckdb_session.py +227 -0
- kontra/engine/backends/duckdb_utils.py +18 -0
- kontra/engine/backends/polars_backend.py +47 -0
- kontra/engine/engine.py +1205 -0
- kontra/engine/executors/__init__.py +15 -0
- kontra/engine/executors/base.py +50 -0
- kontra/engine/executors/database_base.py +528 -0
- kontra/engine/executors/duckdb_sql.py +607 -0
- kontra/engine/executors/postgres_sql.py +162 -0
- kontra/engine/executors/registry.py +69 -0
- kontra/engine/executors/sqlserver_sql.py +163 -0
- kontra/engine/materializers/__init__.py +14 -0
- kontra/engine/materializers/base.py +42 -0
- kontra/engine/materializers/duckdb.py +110 -0
- kontra/engine/materializers/factory.py +22 -0
- kontra/engine/materializers/polars_connector.py +131 -0
- kontra/engine/materializers/postgres.py +157 -0
- kontra/engine/materializers/registry.py +138 -0
- kontra/engine/materializers/sqlserver.py +160 -0
- kontra/engine/result.py +15 -0
- kontra/engine/sql_utils.py +611 -0
- kontra/engine/sql_validator.py +609 -0
- kontra/engine/stats.py +194 -0
- kontra/engine/types.py +138 -0
- kontra/errors.py +533 -0
- kontra/logging.py +85 -0
- kontra/preplan/__init__.py +5 -0
- kontra/preplan/planner.py +253 -0
- kontra/preplan/postgres.py +179 -0
- kontra/preplan/sqlserver.py +191 -0
- kontra/preplan/types.py +24 -0
- kontra/probes/__init__.py +20 -0
- kontra/probes/compare.py +400 -0
- kontra/probes/relationship.py +283 -0
- kontra/reporters/__init__.py +0 -0
- kontra/reporters/json_reporter.py +190 -0
- kontra/reporters/rich_reporter.py +11 -0
- kontra/rules/__init__.py +35 -0
- kontra/rules/base.py +186 -0
- kontra/rules/builtin/__init__.py +40 -0
- kontra/rules/builtin/allowed_values.py +156 -0
- kontra/rules/builtin/compare.py +188 -0
- kontra/rules/builtin/conditional_not_null.py +213 -0
- kontra/rules/builtin/conditional_range.py +310 -0
- kontra/rules/builtin/contains.py +138 -0
- kontra/rules/builtin/custom_sql_check.py +182 -0
- kontra/rules/builtin/disallowed_values.py +140 -0
- kontra/rules/builtin/dtype.py +203 -0
- kontra/rules/builtin/ends_with.py +129 -0
- kontra/rules/builtin/freshness.py +240 -0
- kontra/rules/builtin/length.py +193 -0
- kontra/rules/builtin/max_rows.py +35 -0
- kontra/rules/builtin/min_rows.py +46 -0
- kontra/rules/builtin/not_null.py +121 -0
- kontra/rules/builtin/range.py +222 -0
- kontra/rules/builtin/regex.py +143 -0
- kontra/rules/builtin/starts_with.py +129 -0
- kontra/rules/builtin/unique.py +124 -0
- kontra/rules/condition_parser.py +203 -0
- kontra/rules/execution_plan.py +455 -0
- kontra/rules/factory.py +103 -0
- kontra/rules/predicates.py +25 -0
- kontra/rules/registry.py +24 -0
- kontra/rules/static_predicates.py +120 -0
- kontra/scout/__init__.py +9 -0
- kontra/scout/backends/__init__.py +17 -0
- kontra/scout/backends/base.py +111 -0
- kontra/scout/backends/duckdb_backend.py +359 -0
- kontra/scout/backends/postgres_backend.py +519 -0
- kontra/scout/backends/sqlserver_backend.py +577 -0
- kontra/scout/dtype_mapping.py +150 -0
- kontra/scout/patterns.py +69 -0
- kontra/scout/profiler.py +801 -0
- kontra/scout/reporters/__init__.py +39 -0
- kontra/scout/reporters/json_reporter.py +165 -0
- kontra/scout/reporters/markdown_reporter.py +152 -0
- kontra/scout/reporters/rich_reporter.py +144 -0
- kontra/scout/store.py +208 -0
- kontra/scout/suggest.py +200 -0
- kontra/scout/types.py +652 -0
- kontra/state/__init__.py +29 -0
- kontra/state/backends/__init__.py +79 -0
- kontra/state/backends/base.py +348 -0
- kontra/state/backends/local.py +480 -0
- kontra/state/backends/postgres.py +1010 -0
- kontra/state/backends/s3.py +543 -0
- kontra/state/backends/sqlserver.py +969 -0
- kontra/state/fingerprint.py +166 -0
- kontra/state/types.py +1061 -0
- kontra/version.py +1 -0
- kontra-0.5.2.dist-info/METADATA +122 -0
- kontra-0.5.2.dist-info/RECORD +124 -0
- kontra-0.5.2.dist-info/WHEEL +5 -0
- kontra-0.5.2.dist-info/entry_points.txt +2 -0
- kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
- kontra-0.5.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,480 @@
|
|
|
1
|
+
# src/kontra/state/backends/local.py
|
|
2
|
+
"""
|
|
3
|
+
Local filesystem state storage with normalized format (v0.5).
|
|
4
|
+
|
|
5
|
+
Directory structure:
|
|
6
|
+
.kontra/state/
|
|
7
|
+
└── <contract_fingerprint>/
|
|
8
|
+
└── runs/
|
|
9
|
+
├── <run_id>.json # run metadata + rule results
|
|
10
|
+
└── <run_id>.ann.jsonl # annotations (append-only)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import os
|
|
17
|
+
import random
|
|
18
|
+
import string
|
|
19
|
+
from datetime import datetime, timezone
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Dict, List, Optional
|
|
22
|
+
|
|
23
|
+
from .base import StateBackend
|
|
24
|
+
from kontra.state.types import Annotation, ValidationState
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class LocalStore(StateBackend):
|
|
28
|
+
"""
|
|
29
|
+
Filesystem-based state storage with normalized format.
|
|
30
|
+
|
|
31
|
+
Default storage location is .kontra/state/ in the current working
|
|
32
|
+
directory. Can be customized via the base_path parameter.
|
|
33
|
+
|
|
34
|
+
Run IDs are timestamp-based: YYYY-MM-DDTHH-MM-SS_<random>
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(self, base_path: Optional[str] = None):
|
|
38
|
+
"""
|
|
39
|
+
Initialize the local store.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
base_path: Base directory for state storage.
|
|
43
|
+
Defaults to .kontra/state/ in cwd.
|
|
44
|
+
"""
|
|
45
|
+
if base_path:
|
|
46
|
+
self.base_path = Path(base_path)
|
|
47
|
+
else:
|
|
48
|
+
self.base_path = Path.cwd() / ".kontra" / "state"
|
|
49
|
+
|
|
50
|
+
def _contract_dir(self, contract_fingerprint: str) -> Path:
|
|
51
|
+
"""Get the directory for a contract's states."""
|
|
52
|
+
return self.base_path / contract_fingerprint
|
|
53
|
+
|
|
54
|
+
def _runs_dir(self, contract_fingerprint: str) -> Path:
|
|
55
|
+
"""Get the runs directory for a contract."""
|
|
56
|
+
return self._contract_dir(contract_fingerprint) / "runs"
|
|
57
|
+
|
|
58
|
+
def _generate_run_id(self, run_at: datetime) -> str:
|
|
59
|
+
"""Generate a unique run ID from timestamp."""
|
|
60
|
+
# Format: YYYY-MM-DDTHH-MM-SS_<random>
|
|
61
|
+
# The timestamp prefix makes them sortable
|
|
62
|
+
ts = run_at.strftime("%Y-%m-%dT%H-%M-%S")
|
|
63
|
+
suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
|
|
64
|
+
return f"{ts}_{suffix}"
|
|
65
|
+
|
|
66
|
+
def _parse_run_id_timestamp(self, run_id: str) -> Optional[datetime]:
|
|
67
|
+
"""Parse timestamp from run ID."""
|
|
68
|
+
try:
|
|
69
|
+
# Split on underscore to get timestamp part
|
|
70
|
+
ts_part = run_id.split("_")[0]
|
|
71
|
+
return datetime.strptime(ts_part, "%Y-%m-%dT%H-%M-%S").replace(tzinfo=timezone.utc)
|
|
72
|
+
except Exception:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
def _run_file(self, contract_fingerprint: str, run_id: str) -> Path:
|
|
76
|
+
"""Get the path for a run's state file."""
|
|
77
|
+
return self._runs_dir(contract_fingerprint) / f"{run_id}.json"
|
|
78
|
+
|
|
79
|
+
def _annotations_file(self, contract_fingerprint: str, run_id: str) -> Path:
|
|
80
|
+
"""Get the path for a run's annotations file."""
|
|
81
|
+
return self._runs_dir(contract_fingerprint) / f"{run_id}.ann.jsonl"
|
|
82
|
+
|
|
83
|
+
def save(self, state: ValidationState) -> None:
|
|
84
|
+
"""Save a validation state to the filesystem."""
|
|
85
|
+
runs_dir = self._runs_dir(state.contract_fingerprint)
|
|
86
|
+
runs_dir.mkdir(parents=True, exist_ok=True)
|
|
87
|
+
|
|
88
|
+
# Generate run ID if not set
|
|
89
|
+
run_id = self._generate_run_id(state.run_at)
|
|
90
|
+
|
|
91
|
+
# Store run_id in the state dict
|
|
92
|
+
state_dict = state.to_dict()
|
|
93
|
+
state_dict["_run_id"] = run_id
|
|
94
|
+
|
|
95
|
+
filepath = self._run_file(state.contract_fingerprint, run_id)
|
|
96
|
+
|
|
97
|
+
# Write atomically using temp file
|
|
98
|
+
temp_path = filepath.with_suffix(".tmp")
|
|
99
|
+
try:
|
|
100
|
+
temp_path.write_text(
|
|
101
|
+
json.dumps(state_dict, indent=2, default=str),
|
|
102
|
+
encoding="utf-8",
|
|
103
|
+
)
|
|
104
|
+
temp_path.rename(filepath)
|
|
105
|
+
except Exception:
|
|
106
|
+
if temp_path.exists():
|
|
107
|
+
temp_path.unlink()
|
|
108
|
+
raise
|
|
109
|
+
|
|
110
|
+
def _load_state(self, filepath: Path) -> Optional[ValidationState]:
|
|
111
|
+
"""Load a state from a file path."""
|
|
112
|
+
try:
|
|
113
|
+
content = filepath.read_text(encoding="utf-8")
|
|
114
|
+
data = json.loads(content)
|
|
115
|
+
|
|
116
|
+
# Extract run_id for later use
|
|
117
|
+
run_id = data.pop("_run_id", None)
|
|
118
|
+
|
|
119
|
+
state = ValidationState.from_dict(data)
|
|
120
|
+
|
|
121
|
+
# Store run_id as a synthetic ID (hash for now)
|
|
122
|
+
if run_id:
|
|
123
|
+
# Use string hash as integer ID for compatibility
|
|
124
|
+
state.id = hash(run_id) & 0x7FFFFFFF # Positive integer
|
|
125
|
+
|
|
126
|
+
return state
|
|
127
|
+
except Exception:
|
|
128
|
+
return None
|
|
129
|
+
|
|
130
|
+
def get_latest(self, contract_fingerprint: str) -> Optional[ValidationState]:
|
|
131
|
+
"""Get the most recent state for a contract."""
|
|
132
|
+
history = self.get_history(contract_fingerprint, limit=1)
|
|
133
|
+
return history[0] if history else None
|
|
134
|
+
|
|
135
|
+
def get_history(
|
|
136
|
+
self,
|
|
137
|
+
contract_fingerprint: str,
|
|
138
|
+
limit: int = 10,
|
|
139
|
+
) -> List[ValidationState]:
|
|
140
|
+
"""Get recent history for a contract, newest first."""
|
|
141
|
+
runs_dir = self._runs_dir(contract_fingerprint)
|
|
142
|
+
|
|
143
|
+
if not runs_dir.exists():
|
|
144
|
+
return []
|
|
145
|
+
|
|
146
|
+
# List all JSON files (excluding .ann.jsonl)
|
|
147
|
+
state_files = sorted(
|
|
148
|
+
[f for f in runs_dir.glob("*.json") if not f.name.endswith(".ann.jsonl")],
|
|
149
|
+
key=lambda p: p.name,
|
|
150
|
+
reverse=True, # Newest first (timestamp prefix sorts correctly)
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
states = []
|
|
154
|
+
for filepath in state_files[:limit]:
|
|
155
|
+
state = self._load_state(filepath)
|
|
156
|
+
if state:
|
|
157
|
+
states.append(state)
|
|
158
|
+
|
|
159
|
+
return states
|
|
160
|
+
|
|
161
|
+
def delete_old(
|
|
162
|
+
self,
|
|
163
|
+
contract_fingerprint: str,
|
|
164
|
+
keep_count: int = 100,
|
|
165
|
+
) -> int:
|
|
166
|
+
"""Delete old states, keeping the most recent ones."""
|
|
167
|
+
runs_dir = self._runs_dir(contract_fingerprint)
|
|
168
|
+
|
|
169
|
+
if not runs_dir.exists():
|
|
170
|
+
return 0
|
|
171
|
+
|
|
172
|
+
# List all JSON files, sorted newest first
|
|
173
|
+
state_files = sorted(
|
|
174
|
+
[f for f in runs_dir.glob("*.json") if not f.name.endswith(".ann.jsonl")],
|
|
175
|
+
key=lambda p: p.name,
|
|
176
|
+
reverse=True,
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Delete files beyond keep_count
|
|
180
|
+
deleted = 0
|
|
181
|
+
for filepath in state_files[keep_count:]:
|
|
182
|
+
try:
|
|
183
|
+
# Delete state file
|
|
184
|
+
filepath.unlink()
|
|
185
|
+
deleted += 1
|
|
186
|
+
|
|
187
|
+
# Also delete corresponding annotations file if exists
|
|
188
|
+
run_id = filepath.stem
|
|
189
|
+
ann_file = self._annotations_file(contract_fingerprint, run_id)
|
|
190
|
+
if ann_file.exists():
|
|
191
|
+
ann_file.unlink()
|
|
192
|
+
except Exception:
|
|
193
|
+
continue
|
|
194
|
+
|
|
195
|
+
return deleted
|
|
196
|
+
|
|
197
|
+
def list_contracts(self) -> List[str]:
|
|
198
|
+
"""List all contract fingerprints with stored state."""
|
|
199
|
+
if not self.base_path.exists():
|
|
200
|
+
return []
|
|
201
|
+
|
|
202
|
+
contracts = []
|
|
203
|
+
for item in self.base_path.iterdir():
|
|
204
|
+
if item.is_dir() and len(item.name) == 16: # Fingerprint length
|
|
205
|
+
contracts.append(item.name)
|
|
206
|
+
|
|
207
|
+
return sorted(contracts)
|
|
208
|
+
|
|
209
|
+
def clear(self, contract_fingerprint: Optional[str] = None) -> int:
|
|
210
|
+
"""
|
|
211
|
+
Clear stored states.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
contract_fingerprint: If provided, only clear this contract's states.
|
|
215
|
+
If None, clear all states.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Number of state files deleted.
|
|
219
|
+
"""
|
|
220
|
+
deleted = 0
|
|
221
|
+
|
|
222
|
+
if contract_fingerprint:
|
|
223
|
+
runs_dir = self._runs_dir(contract_fingerprint)
|
|
224
|
+
if runs_dir.exists():
|
|
225
|
+
for filepath in runs_dir.glob("*.json"):
|
|
226
|
+
filepath.unlink()
|
|
227
|
+
deleted += 1
|
|
228
|
+
for filepath in runs_dir.glob("*.jsonl"):
|
|
229
|
+
filepath.unlink()
|
|
230
|
+
# Remove empty directories
|
|
231
|
+
try:
|
|
232
|
+
runs_dir.rmdir()
|
|
233
|
+
self._contract_dir(contract_fingerprint).rmdir()
|
|
234
|
+
except OSError:
|
|
235
|
+
pass
|
|
236
|
+
else:
|
|
237
|
+
# Clear all
|
|
238
|
+
if self.base_path.exists():
|
|
239
|
+
for contract_dir in self.base_path.iterdir():
|
|
240
|
+
if contract_dir.is_dir():
|
|
241
|
+
runs_dir = contract_dir / "runs"
|
|
242
|
+
if runs_dir.exists():
|
|
243
|
+
for filepath in runs_dir.glob("*.json"):
|
|
244
|
+
filepath.unlink()
|
|
245
|
+
deleted += 1
|
|
246
|
+
for filepath in runs_dir.glob("*.jsonl"):
|
|
247
|
+
filepath.unlink()
|
|
248
|
+
try:
|
|
249
|
+
runs_dir.rmdir()
|
|
250
|
+
except OSError:
|
|
251
|
+
pass
|
|
252
|
+
try:
|
|
253
|
+
contract_dir.rmdir()
|
|
254
|
+
except OSError:
|
|
255
|
+
pass
|
|
256
|
+
|
|
257
|
+
return deleted
|
|
258
|
+
|
|
259
|
+
# -------------------------------------------------------------------------
|
|
260
|
+
# Annotation Methods
|
|
261
|
+
# -------------------------------------------------------------------------
|
|
262
|
+
|
|
263
|
+
def save_annotation(self, annotation: Annotation) -> int:
|
|
264
|
+
"""
|
|
265
|
+
Save an annotation (append-only).
|
|
266
|
+
|
|
267
|
+
For file-based backends, we need the run_id string, not the integer ID.
|
|
268
|
+
Annotations are stored in JSONL format alongside the run file.
|
|
269
|
+
"""
|
|
270
|
+
# We need to find the run file to get the run_id string
|
|
271
|
+
# This is a limitation of file-based backends - we need the fingerprint
|
|
272
|
+
|
|
273
|
+
# For now, raise NotImplementedError - annotations require the contract_fingerprint
|
|
274
|
+
# which isn't stored in the annotation. Callers should use save_annotation_for_run.
|
|
275
|
+
raise NotImplementedError(
|
|
276
|
+
"LocalStore.save_annotation requires contract fingerprint. "
|
|
277
|
+
"Use save_annotation_for_run instead."
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
def save_annotation_for_run(
|
|
281
|
+
self,
|
|
282
|
+
contract_fingerprint: str,
|
|
283
|
+
run_id_str: str,
|
|
284
|
+
annotation: Annotation,
|
|
285
|
+
) -> int:
|
|
286
|
+
"""
|
|
287
|
+
Save an annotation for a specific run.
|
|
288
|
+
|
|
289
|
+
Args:
|
|
290
|
+
contract_fingerprint: The contract fingerprint
|
|
291
|
+
run_id_str: The string run ID (e.g., "2024-01-15T09-30-00_abc123")
|
|
292
|
+
annotation: The annotation to save
|
|
293
|
+
|
|
294
|
+
Returns:
|
|
295
|
+
A synthetic annotation ID (line number)
|
|
296
|
+
"""
|
|
297
|
+
ann_file = self._annotations_file(contract_fingerprint, run_id_str)
|
|
298
|
+
ann_file.parent.mkdir(parents=True, exist_ok=True)
|
|
299
|
+
|
|
300
|
+
# Generate a synthetic ID based on existing line count
|
|
301
|
+
line_count = 0
|
|
302
|
+
if ann_file.exists():
|
|
303
|
+
with open(ann_file, encoding="utf-8") as f:
|
|
304
|
+
line_count = sum(1 for _ in f)
|
|
305
|
+
annotation.id = line_count + 1
|
|
306
|
+
|
|
307
|
+
# Append to JSONL file
|
|
308
|
+
with open(ann_file, "a", encoding="utf-8") as f:
|
|
309
|
+
f.write(annotation.to_json() + "\n")
|
|
310
|
+
|
|
311
|
+
return annotation.id
|
|
312
|
+
|
|
313
|
+
def get_annotations(
|
|
314
|
+
self,
|
|
315
|
+
run_id: int,
|
|
316
|
+
rule_result_id: Optional[int] = None,
|
|
317
|
+
) -> List[Annotation]:
|
|
318
|
+
"""
|
|
319
|
+
Get annotations for a run.
|
|
320
|
+
|
|
321
|
+
Note: For file-based backends, run_id is a hash of the run_id string.
|
|
322
|
+
This method may not work directly. Use get_run_with_annotations instead.
|
|
323
|
+
"""
|
|
324
|
+
# File-based backends need the fingerprint to locate annotations
|
|
325
|
+
return []
|
|
326
|
+
|
|
327
|
+
def get_annotations_for_contract(
|
|
328
|
+
self,
|
|
329
|
+
contract_fingerprint: str,
|
|
330
|
+
rule_id: Optional[str] = None,
|
|
331
|
+
annotation_type: Optional[str] = None,
|
|
332
|
+
limit: int = 20,
|
|
333
|
+
) -> List[Annotation]:
|
|
334
|
+
"""Get annotations across all runs for a contract."""
|
|
335
|
+
runs_dir = self._runs_dir(contract_fingerprint)
|
|
336
|
+
if not runs_dir.exists():
|
|
337
|
+
return []
|
|
338
|
+
|
|
339
|
+
# Collect all annotations from all .ann.jsonl files
|
|
340
|
+
all_annotations: List[Annotation] = []
|
|
341
|
+
|
|
342
|
+
for ann_file in runs_dir.glob("*.ann.jsonl"):
|
|
343
|
+
with open(ann_file, encoding="utf-8") as f:
|
|
344
|
+
for line in f:
|
|
345
|
+
line = line.strip()
|
|
346
|
+
if not line:
|
|
347
|
+
continue
|
|
348
|
+
try:
|
|
349
|
+
ann = Annotation.from_json(line)
|
|
350
|
+
|
|
351
|
+
# Filter by rule_id if specified
|
|
352
|
+
if rule_id is not None and ann.rule_id != rule_id:
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
# Filter by annotation_type if specified
|
|
356
|
+
if annotation_type is not None and ann.annotation_type != annotation_type:
|
|
357
|
+
continue
|
|
358
|
+
|
|
359
|
+
all_annotations.append(ann)
|
|
360
|
+
except Exception:
|
|
361
|
+
# Skip malformed annotations
|
|
362
|
+
continue
|
|
363
|
+
|
|
364
|
+
# Sort by created_at descending (newest first)
|
|
365
|
+
all_annotations.sort(
|
|
366
|
+
key=lambda a: a.created_at or datetime.min.replace(tzinfo=timezone.utc),
|
|
367
|
+
reverse=True,
|
|
368
|
+
)
|
|
369
|
+
|
|
370
|
+
# Apply limit
|
|
371
|
+
return all_annotations[:limit]
|
|
372
|
+
|
|
373
|
+
def get_run_with_annotations(
|
|
374
|
+
self,
|
|
375
|
+
contract_fingerprint: str,
|
|
376
|
+
run_id: Optional[int] = None,
|
|
377
|
+
) -> Optional[ValidationState]:
|
|
378
|
+
"""Get a validation state with its annotations loaded."""
|
|
379
|
+
# Get the state
|
|
380
|
+
if run_id is None:
|
|
381
|
+
state = self.get_latest(contract_fingerprint)
|
|
382
|
+
else:
|
|
383
|
+
# Search for state with matching ID hash
|
|
384
|
+
states = self.get_history(contract_fingerprint, limit=100)
|
|
385
|
+
state = None
|
|
386
|
+
for s in states:
|
|
387
|
+
if s.id == run_id:
|
|
388
|
+
state = s
|
|
389
|
+
break
|
|
390
|
+
|
|
391
|
+
if not state:
|
|
392
|
+
return None
|
|
393
|
+
|
|
394
|
+
# Load annotations
|
|
395
|
+
runs_dir = self._runs_dir(contract_fingerprint)
|
|
396
|
+
if not runs_dir.exists():
|
|
397
|
+
state.annotations = []
|
|
398
|
+
for rule in state.rules:
|
|
399
|
+
rule.annotations = []
|
|
400
|
+
return state
|
|
401
|
+
|
|
402
|
+
# Find the corresponding run file to get run_id string
|
|
403
|
+
run_id_str = None
|
|
404
|
+
for filepath in runs_dir.glob("*.json"):
|
|
405
|
+
if filepath.name.endswith(".ann.jsonl"):
|
|
406
|
+
continue
|
|
407
|
+
loaded = self._load_state(filepath)
|
|
408
|
+
if loaded and loaded.id == state.id:
|
|
409
|
+
run_id_str = filepath.stem
|
|
410
|
+
break
|
|
411
|
+
|
|
412
|
+
if not run_id_str:
|
|
413
|
+
state.annotations = []
|
|
414
|
+
for rule in state.rules:
|
|
415
|
+
rule.annotations = []
|
|
416
|
+
return state
|
|
417
|
+
|
|
418
|
+
# Load annotations from JSONL
|
|
419
|
+
ann_file = self._annotations_file(contract_fingerprint, run_id_str)
|
|
420
|
+
annotations = []
|
|
421
|
+
if ann_file.exists():
|
|
422
|
+
with open(ann_file, encoding="utf-8") as f:
|
|
423
|
+
for line in f:
|
|
424
|
+
line = line.strip()
|
|
425
|
+
if line:
|
|
426
|
+
annotations.append(Annotation.from_json(line))
|
|
427
|
+
|
|
428
|
+
self._attach_annotations_to_state(state, annotations)
|
|
429
|
+
return state
|
|
430
|
+
|
|
431
|
+
def get_history_with_annotations(
|
|
432
|
+
self,
|
|
433
|
+
contract_fingerprint: str,
|
|
434
|
+
limit: int = 10,
|
|
435
|
+
) -> List[ValidationState]:
|
|
436
|
+
"""Get recent history with annotations loaded."""
|
|
437
|
+
states = self.get_history(contract_fingerprint, limit=limit)
|
|
438
|
+
|
|
439
|
+
runs_dir = self._runs_dir(contract_fingerprint)
|
|
440
|
+
if not runs_dir.exists():
|
|
441
|
+
for state in states:
|
|
442
|
+
state.annotations = []
|
|
443
|
+
for rule in state.rules:
|
|
444
|
+
rule.annotations = []
|
|
445
|
+
return states
|
|
446
|
+
|
|
447
|
+
# Build ID to run_id_str mapping
|
|
448
|
+
id_to_run_id: Dict[int, str] = {}
|
|
449
|
+
for filepath in runs_dir.glob("*.json"):
|
|
450
|
+
if filepath.name.endswith(".ann.jsonl"):
|
|
451
|
+
continue
|
|
452
|
+
loaded = self._load_state(filepath)
|
|
453
|
+
if loaded and loaded.id:
|
|
454
|
+
id_to_run_id[loaded.id] = filepath.stem
|
|
455
|
+
|
|
456
|
+
# Load annotations for each state
|
|
457
|
+
for state in states:
|
|
458
|
+
if state.id is None or state.id not in id_to_run_id:
|
|
459
|
+
state.annotations = []
|
|
460
|
+
for rule in state.rules:
|
|
461
|
+
rule.annotations = []
|
|
462
|
+
continue
|
|
463
|
+
|
|
464
|
+
run_id_str = id_to_run_id[state.id]
|
|
465
|
+
ann_file = self._annotations_file(contract_fingerprint, run_id_str)
|
|
466
|
+
|
|
467
|
+
annotations = []
|
|
468
|
+
if ann_file.exists():
|
|
469
|
+
with open(ann_file, encoding="utf-8") as f:
|
|
470
|
+
for line in f:
|
|
471
|
+
line = line.strip()
|
|
472
|
+
if line:
|
|
473
|
+
annotations.append(Annotation.from_json(line))
|
|
474
|
+
|
|
475
|
+
self._attach_annotations_to_state(state, annotations)
|
|
476
|
+
|
|
477
|
+
return states
|
|
478
|
+
|
|
479
|
+
def __repr__(self) -> str:
|
|
480
|
+
return f"LocalStore(base_path={self.base_path})"
|