kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,543 @@
1
+ # src/kontra/state/backends/s3.py
2
+ """
3
+ S3-compatible state storage with normalized format (v0.5).
4
+
5
+ Directory structure:
6
+ s3://bucket/prefix/
7
+ └── state/
8
+ └── <contract_fingerprint>/
9
+ └── runs/
10
+ ├── <run_id>.json # run metadata + rule results
11
+ └── <run_id>.ann.jsonl # annotations (append-only)
12
+
13
+ Works with:
14
+ - AWS S3
15
+ - MinIO
16
+ - Any S3-compatible storage
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import os
23
+ import random
24
+ import string
25
+ from datetime import datetime, timezone
26
+ from typing import Any, Dict, List, Optional
27
+ from urllib.parse import urlparse
28
+
29
+ from .base import StateBackend
30
+ from kontra.state.types import Annotation, ValidationState
31
+
32
+
33
+ class S3Store(StateBackend):
34
+ """
35
+ S3-compatible object storage backend with normalized format.
36
+
37
+ Uses fsspec/s3fs for S3 access. Supports AWS S3, MinIO, and other
38
+ S3-compatible storage systems.
39
+
40
+ URI format: s3://bucket/prefix
41
+ """
42
+
43
+ def __init__(self, uri: str):
44
+ """
45
+ Initialize the S3 store.
46
+
47
+ Args:
48
+ uri: S3 URI in format s3://bucket/prefix
49
+
50
+ Environment variables:
51
+ AWS_ACCESS_KEY_ID: Access key
52
+ AWS_SECRET_ACCESS_KEY: Secret key
53
+ AWS_ENDPOINT_URL: Custom endpoint (for MinIO)
54
+ AWS_REGION: AWS region
55
+ """
56
+ self.uri = uri
57
+ parsed = urlparse(uri)
58
+ self.bucket = parsed.netloc
59
+ self.prefix = parsed.path.strip("/")
60
+ if self.prefix:
61
+ self.prefix = f"{self.prefix}/state"
62
+ else:
63
+ self.prefix = "state"
64
+
65
+ self._fs = None # Lazy initialization
66
+
67
+ def _get_fs(self):
68
+ """Get or create the S3 filesystem."""
69
+ if self._fs is not None:
70
+ return self._fs
71
+
72
+ try:
73
+ import fsspec
74
+ except ImportError as e:
75
+ raise RuntimeError(
76
+ "S3 state backend requires 's3fs'. Install with: pip install s3fs"
77
+ ) from e
78
+
79
+ storage_options = self._storage_options()
80
+ self._fs = fsspec.filesystem("s3", **storage_options)
81
+ return self._fs
82
+
83
+ @staticmethod
84
+ def _storage_options() -> Dict[str, Any]:
85
+ """Build fsspec storage options from environment."""
86
+ opts: Dict[str, Any] = {"anon": False}
87
+
88
+ key = os.getenv("AWS_ACCESS_KEY_ID")
89
+ secret = os.getenv("AWS_SECRET_ACCESS_KEY")
90
+ if key and secret:
91
+ opts["key"] = key
92
+ opts["secret"] = secret
93
+
94
+ endpoint = os.getenv("AWS_ENDPOINT_URL")
95
+ if endpoint:
96
+ opts["client_kwargs"] = {"endpoint_url": endpoint}
97
+ opts["config_kwargs"] = {"s3": {"addressing_style": "path"}}
98
+ opts["use_ssl"] = endpoint.startswith("https")
99
+
100
+ region = os.getenv("AWS_REGION")
101
+ if region:
102
+ opts.setdefault("client_kwargs", {})
103
+ opts["client_kwargs"]["region_name"] = region
104
+
105
+ return opts
106
+
107
+ def _runs_prefix(self, contract_fingerprint: str) -> str:
108
+ """Get the S3 prefix for a contract's runs."""
109
+ return f"{self.bucket}/{self.prefix}/{contract_fingerprint}/runs"
110
+
111
+ def _generate_run_id(self, run_at: datetime) -> str:
112
+ """Generate a unique run ID from timestamp."""
113
+ ts = run_at.strftime("%Y-%m-%dT%H-%M-%S")
114
+ suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
115
+ return f"{ts}_{suffix}"
116
+
117
+ def _run_key(self, contract_fingerprint: str, run_id: str) -> str:
118
+ """Get the S3 key for a run's state file."""
119
+ return f"{self._runs_prefix(contract_fingerprint)}/{run_id}.json"
120
+
121
+ def _annotations_key(self, contract_fingerprint: str, run_id: str) -> str:
122
+ """Get the S3 key prefix for a run's annotations (legacy JSONL)."""
123
+ return f"{self._runs_prefix(contract_fingerprint)}/{run_id}.ann.jsonl"
124
+
125
+ def _annotation_key(
126
+ self, contract_fingerprint: str, run_id: str, annotation_id: int
127
+ ) -> str:
128
+ """Get the S3 key for a single annotation file."""
129
+ return f"{self._runs_prefix(contract_fingerprint)}/{run_id}.ann.{annotation_id:06d}.json"
130
+
131
+ def _annotations_prefix(self, contract_fingerprint: str, run_id: str) -> str:
132
+ """Get the S3 prefix for a run's annotation files."""
133
+ return f"{self._runs_prefix(contract_fingerprint)}/{run_id}.ann."
134
+
135
+ def _load_annotations(
136
+ self, fs, contract_fingerprint: str, run_id_str: str
137
+ ) -> List[Annotation]:
138
+ """
139
+ Load annotations for a run (supports both legacy JSONL and new per-file format).
140
+
141
+ Args:
142
+ fs: The fsspec filesystem
143
+ contract_fingerprint: The contract fingerprint
144
+ run_id_str: The string run ID
145
+
146
+ Returns:
147
+ List of annotations
148
+ """
149
+ annotations = []
150
+
151
+ # Load from legacy JSONL format
152
+ legacy_key = self._annotations_key(contract_fingerprint, run_id_str)
153
+ try:
154
+ with fs.open(f"s3://{legacy_key}", "r") as f:
155
+ for line in f:
156
+ line = line.strip()
157
+ if line:
158
+ annotations.append(Annotation.from_json(line))
159
+ except Exception:
160
+ pass
161
+
162
+ # Load from new per-file format
163
+ prefix = self._annotations_prefix(contract_fingerprint, run_id_str)
164
+ try:
165
+ ann_files = fs.glob(f"s3://{prefix}*.json")
166
+ for ann_file in sorted(ann_files):
167
+ try:
168
+ with fs.open(f"s3://{ann_file}", "r") as f:
169
+ content = f.read().strip()
170
+ if content:
171
+ annotations.append(Annotation.from_json(content))
172
+ except Exception:
173
+ pass
174
+ except Exception:
175
+ pass
176
+
177
+ return annotations
178
+
179
+ def save(self, state: ValidationState) -> None:
180
+ """Save a validation state to S3."""
181
+ fs = self._get_fs()
182
+
183
+ # Generate run ID
184
+ run_id = self._generate_run_id(state.run_at)
185
+
186
+ # Store run_id in the state dict
187
+ state_dict = state.to_dict()
188
+ state_dict["_run_id"] = run_id
189
+
190
+ key = self._run_key(state.contract_fingerprint, run_id)
191
+
192
+ try:
193
+ with fs.open(f"s3://{key}", "w") as f:
194
+ f.write(json.dumps(state_dict, indent=2, default=str))
195
+ except Exception as e:
196
+ raise IOError(f"Failed to save state to S3: {e}") from e
197
+
198
+ def _load_state(self, filepath: str) -> Optional[ValidationState]:
199
+ """Load a state from an S3 path."""
200
+ fs = self._get_fs()
201
+ try:
202
+ with fs.open(f"s3://{filepath}", "r") as f:
203
+ content = f.read()
204
+ data = json.loads(content)
205
+
206
+ # Extract run_id for later use
207
+ run_id = data.pop("_run_id", None)
208
+
209
+ state = ValidationState.from_dict(data)
210
+
211
+ # Store run_id as a synthetic ID (hash)
212
+ if run_id:
213
+ state.id = hash(run_id) & 0x7FFFFFFF
214
+
215
+ return state
216
+ except Exception:
217
+ return None
218
+
219
+ def get_latest(self, contract_fingerprint: str) -> Optional[ValidationState]:
220
+ """Get the most recent state for a contract."""
221
+ history = self.get_history(contract_fingerprint, limit=1)
222
+ return history[0] if history else None
223
+
224
+ def get_history(
225
+ self,
226
+ contract_fingerprint: str,
227
+ limit: int = 10,
228
+ ) -> List[ValidationState]:
229
+ """Get recent history for a contract, newest first."""
230
+ fs = self._get_fs()
231
+ prefix = self._runs_prefix(contract_fingerprint)
232
+
233
+ try:
234
+ # List all JSON files (excluding annotation files)
235
+ all_files = fs.glob(f"s3://{prefix}/*.json")
236
+ files = [
237
+ f for f in all_files
238
+ if not f.endswith(".ann.jsonl") and ".ann." not in f.rsplit("/", 1)[-1]
239
+ ]
240
+ except Exception:
241
+ return []
242
+
243
+ if not files:
244
+ return []
245
+
246
+ # Sort by filename (timestamp prefix), newest first
247
+ files = sorted(files, reverse=True)
248
+
249
+ states = []
250
+ for filepath in files[:limit]:
251
+ state = self._load_state(filepath)
252
+ if state:
253
+ states.append(state)
254
+
255
+ return states
256
+
257
+ def delete_old(
258
+ self,
259
+ contract_fingerprint: str,
260
+ keep_count: int = 100,
261
+ ) -> int:
262
+ """Delete old states, keeping the most recent ones."""
263
+ fs = self._get_fs()
264
+ prefix = self._runs_prefix(contract_fingerprint)
265
+
266
+ try:
267
+ all_files = fs.glob(f"s3://{prefix}/*.json")
268
+ files = [
269
+ f for f in all_files
270
+ if not f.endswith(".ann.jsonl") and ".ann." not in f.rsplit("/", 1)[-1]
271
+ ]
272
+ except Exception:
273
+ return 0
274
+
275
+ if not files:
276
+ return 0
277
+
278
+ # Sort newest first
279
+ files = sorted(files, reverse=True)
280
+
281
+ # Delete files beyond keep_count
282
+ deleted = 0
283
+ for filepath in files[keep_count:]:
284
+ try:
285
+ # Delete state file
286
+ fs.rm(f"s3://{filepath}")
287
+ deleted += 1
288
+
289
+ # Delete corresponding annotations (both legacy JSONL and new per-file)
290
+ run_id = filepath.rsplit("/", 1)[-1].replace(".json", "")
291
+
292
+ # Legacy JSONL
293
+ ann_key = self._annotations_key(contract_fingerprint, run_id)
294
+ try:
295
+ fs.rm(f"s3://{ann_key}")
296
+ except Exception:
297
+ pass
298
+
299
+ # New per-file annotations
300
+ ann_prefix = self._annotations_prefix(contract_fingerprint, run_id)
301
+ try:
302
+ ann_files = fs.glob(f"s3://{ann_prefix}*.json")
303
+ for ann_file in ann_files:
304
+ try:
305
+ fs.rm(f"s3://{ann_file}")
306
+ except Exception:
307
+ pass
308
+ except Exception:
309
+ pass
310
+ except Exception:
311
+ continue
312
+
313
+ return deleted
314
+
315
+ def list_contracts(self) -> List[str]:
316
+ """List all contract fingerprints with stored state."""
317
+ fs = self._get_fs()
318
+ prefix = f"{self.bucket}/{self.prefix}"
319
+
320
+ try:
321
+ # List directories under the state prefix
322
+ items = fs.ls(f"s3://{prefix}/", detail=False)
323
+ except Exception:
324
+ return []
325
+
326
+ contracts = []
327
+ for item in items:
328
+ # Extract the fingerprint (last part of the path)
329
+ parts = item.rstrip("/").split("/")
330
+ if parts:
331
+ name = parts[-1]
332
+ # Fingerprints are 16 hex characters
333
+ if len(name) == 16 and all(c in "0123456789abcdef" for c in name):
334
+ contracts.append(name)
335
+
336
+ return sorted(contracts)
337
+
338
+ def clear(self, contract_fingerprint: Optional[str] = None) -> int:
339
+ """
340
+ Clear stored states.
341
+
342
+ Args:
343
+ contract_fingerprint: If provided, only clear this contract's states.
344
+ If None, clear all states.
345
+
346
+ Returns:
347
+ Number of state files deleted.
348
+ """
349
+ fs = self._get_fs()
350
+ deleted = 0
351
+
352
+ if contract_fingerprint:
353
+ prefix = self._runs_prefix(contract_fingerprint)
354
+ try:
355
+ # Delete all files (json and jsonl)
356
+ for pattern in ["*.json", "*.jsonl"]:
357
+ files = fs.glob(f"s3://{prefix}/{pattern}")
358
+ for filepath in files:
359
+ fs.rm(f"s3://{filepath}")
360
+ if filepath.endswith(".json") and not filepath.endswith(".ann.jsonl"):
361
+ deleted += 1
362
+ except Exception:
363
+ pass
364
+ else:
365
+ # Clear all contracts
366
+ for fp in self.list_contracts():
367
+ deleted += self.clear(fp)
368
+
369
+ return deleted
370
+
371
+ # -------------------------------------------------------------------------
372
+ # Annotation Methods
373
+ # -------------------------------------------------------------------------
374
+
375
+ def save_annotation(self, annotation: Annotation) -> int:
376
+ """
377
+ Save an annotation (append-only).
378
+
379
+ For S3 backends, we need the contract fingerprint and run_id string.
380
+ """
381
+ raise NotImplementedError(
382
+ "S3Store.save_annotation requires contract fingerprint. "
383
+ "Use save_annotation_for_run instead."
384
+ )
385
+
386
+ def save_annotation_for_run(
387
+ self,
388
+ contract_fingerprint: str,
389
+ run_id_str: str,
390
+ annotation: Annotation,
391
+ ) -> int:
392
+ """
393
+ Save an annotation for a specific run.
394
+
395
+ Each annotation is stored as a separate file to avoid race conditions.
396
+ File pattern: {run_id}.ann.{annotation_id:06d}.json
397
+
398
+ Args:
399
+ contract_fingerprint: The contract fingerprint
400
+ run_id_str: The string run ID
401
+ annotation: The annotation to save
402
+
403
+ Returns:
404
+ The annotation ID
405
+ """
406
+ fs = self._get_fs()
407
+ prefix = self._annotations_prefix(contract_fingerprint, run_id_str)
408
+
409
+ # Count existing annotation files to generate next ID
410
+ existing_count = 0
411
+ try:
412
+ # Glob for annotation files (new format)
413
+ ann_files = fs.glob(f"s3://{prefix}*.json")
414
+ existing_count = len(ann_files)
415
+
416
+ # Also check legacy JSONL for backwards compatibility
417
+ legacy_key = self._annotations_key(contract_fingerprint, run_id_str)
418
+ try:
419
+ with fs.open(f"s3://{legacy_key}", "r") as f:
420
+ existing_count += sum(1 for _ in f)
421
+ except Exception:
422
+ pass
423
+ except Exception:
424
+ pass
425
+
426
+ annotation.id = existing_count + 1
427
+
428
+ # Write annotation as a separate file (atomic, no race condition)
429
+ ann_key = self._annotation_key(
430
+ contract_fingerprint, run_id_str, annotation.id
431
+ )
432
+ try:
433
+ with fs.open(f"s3://{ann_key}", "w") as f:
434
+ f.write(annotation.to_json())
435
+ return annotation.id
436
+ except Exception as e:
437
+ raise IOError(f"Failed to save annotation to S3: {e}") from e
438
+
439
+ def get_annotations(
440
+ self,
441
+ run_id: int,
442
+ rule_result_id: Optional[int] = None,
443
+ ) -> List[Annotation]:
444
+ """Get annotations for a run."""
445
+ return []
446
+
447
+ def get_run_with_annotations(
448
+ self,
449
+ contract_fingerprint: str,
450
+ run_id: Optional[int] = None,
451
+ ) -> Optional[ValidationState]:
452
+ """Get a validation state with its annotations loaded."""
453
+ # Get the state
454
+ if run_id is None:
455
+ state = self.get_latest(contract_fingerprint)
456
+ else:
457
+ states = self.get_history(contract_fingerprint, limit=100)
458
+ state = None
459
+ for s in states:
460
+ if s.id == run_id:
461
+ state = s
462
+ break
463
+
464
+ if not state:
465
+ return None
466
+
467
+ fs = self._get_fs()
468
+ prefix = self._runs_prefix(contract_fingerprint)
469
+
470
+ # Find the run file to get run_id string
471
+ run_id_str = None
472
+ try:
473
+ all_files = fs.glob(f"s3://{prefix}/*.json")
474
+ files = [f for f in all_files if not f.endswith(".ann.jsonl")]
475
+
476
+ for filepath in files:
477
+ loaded = self._load_state(filepath)
478
+ if loaded and loaded.id == state.id:
479
+ run_id_str = filepath.rsplit("/", 1)[-1].replace(".json", "")
480
+ break
481
+ except Exception:
482
+ pass
483
+
484
+ if not run_id_str:
485
+ state.annotations = []
486
+ for rule in state.rules:
487
+ rule.annotations = []
488
+ return state
489
+
490
+ # Load annotations (supports both legacy JSONL and new per-file format)
491
+ annotations = self._load_annotations(
492
+ fs, contract_fingerprint, run_id_str
493
+ )
494
+
495
+ self._attach_annotations_to_state(state, annotations)
496
+ return state
497
+
498
+ def get_history_with_annotations(
499
+ self,
500
+ contract_fingerprint: str,
501
+ limit: int = 10,
502
+ ) -> List[ValidationState]:
503
+ """Get recent history with annotations loaded."""
504
+ states = self.get_history(contract_fingerprint, limit=limit)
505
+
506
+ fs = self._get_fs()
507
+ prefix = self._runs_prefix(contract_fingerprint)
508
+
509
+ # Build ID to run_id_str mapping
510
+ id_to_run_id: Dict[int, str] = {}
511
+ try:
512
+ all_files = fs.glob(f"s3://{prefix}/*.json")
513
+ files = [f for f in all_files if not f.endswith(".ann.jsonl")]
514
+
515
+ for filepath in files:
516
+ loaded = self._load_state(filepath)
517
+ if loaded and loaded.id:
518
+ run_id_str = filepath.rsplit("/", 1)[-1].replace(".json", "")
519
+ id_to_run_id[loaded.id] = run_id_str
520
+ except Exception:
521
+ pass
522
+
523
+ # Load annotations for each state
524
+ for state in states:
525
+ if state.id is None or state.id not in id_to_run_id:
526
+ state.annotations = []
527
+ for rule in state.rules:
528
+ rule.annotations = []
529
+ continue
530
+
531
+ run_id_str = id_to_run_id[state.id]
532
+
533
+ # Load annotations (supports both legacy JSONL and new per-file format)
534
+ annotations = self._load_annotations(
535
+ fs, contract_fingerprint, run_id_str
536
+ )
537
+
538
+ self._attach_annotations_to_state(state, annotations)
539
+
540
+ return states
541
+
542
+ def __repr__(self) -> str:
543
+ return f"S3Store(uri={self.uri})"