kontra 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. kontra/__init__.py +1871 -0
  2. kontra/api/__init__.py +22 -0
  3. kontra/api/compare.py +340 -0
  4. kontra/api/decorators.py +153 -0
  5. kontra/api/results.py +2121 -0
  6. kontra/api/rules.py +681 -0
  7. kontra/cli/__init__.py +0 -0
  8. kontra/cli/commands/__init__.py +1 -0
  9. kontra/cli/commands/config.py +153 -0
  10. kontra/cli/commands/diff.py +450 -0
  11. kontra/cli/commands/history.py +196 -0
  12. kontra/cli/commands/profile.py +289 -0
  13. kontra/cli/commands/validate.py +468 -0
  14. kontra/cli/constants.py +6 -0
  15. kontra/cli/main.py +48 -0
  16. kontra/cli/renderers.py +304 -0
  17. kontra/cli/utils.py +28 -0
  18. kontra/config/__init__.py +34 -0
  19. kontra/config/loader.py +127 -0
  20. kontra/config/models.py +49 -0
  21. kontra/config/settings.py +797 -0
  22. kontra/connectors/__init__.py +0 -0
  23. kontra/connectors/db_utils.py +251 -0
  24. kontra/connectors/detection.py +323 -0
  25. kontra/connectors/handle.py +368 -0
  26. kontra/connectors/postgres.py +127 -0
  27. kontra/connectors/sqlserver.py +226 -0
  28. kontra/engine/__init__.py +0 -0
  29. kontra/engine/backends/duckdb_session.py +227 -0
  30. kontra/engine/backends/duckdb_utils.py +18 -0
  31. kontra/engine/backends/polars_backend.py +47 -0
  32. kontra/engine/engine.py +1205 -0
  33. kontra/engine/executors/__init__.py +15 -0
  34. kontra/engine/executors/base.py +50 -0
  35. kontra/engine/executors/database_base.py +528 -0
  36. kontra/engine/executors/duckdb_sql.py +607 -0
  37. kontra/engine/executors/postgres_sql.py +162 -0
  38. kontra/engine/executors/registry.py +69 -0
  39. kontra/engine/executors/sqlserver_sql.py +163 -0
  40. kontra/engine/materializers/__init__.py +14 -0
  41. kontra/engine/materializers/base.py +42 -0
  42. kontra/engine/materializers/duckdb.py +110 -0
  43. kontra/engine/materializers/factory.py +22 -0
  44. kontra/engine/materializers/polars_connector.py +131 -0
  45. kontra/engine/materializers/postgres.py +157 -0
  46. kontra/engine/materializers/registry.py +138 -0
  47. kontra/engine/materializers/sqlserver.py +160 -0
  48. kontra/engine/result.py +15 -0
  49. kontra/engine/sql_utils.py +611 -0
  50. kontra/engine/sql_validator.py +609 -0
  51. kontra/engine/stats.py +194 -0
  52. kontra/engine/types.py +138 -0
  53. kontra/errors.py +533 -0
  54. kontra/logging.py +85 -0
  55. kontra/preplan/__init__.py +5 -0
  56. kontra/preplan/planner.py +253 -0
  57. kontra/preplan/postgres.py +179 -0
  58. kontra/preplan/sqlserver.py +191 -0
  59. kontra/preplan/types.py +24 -0
  60. kontra/probes/__init__.py +20 -0
  61. kontra/probes/compare.py +400 -0
  62. kontra/probes/relationship.py +283 -0
  63. kontra/reporters/__init__.py +0 -0
  64. kontra/reporters/json_reporter.py +190 -0
  65. kontra/reporters/rich_reporter.py +11 -0
  66. kontra/rules/__init__.py +35 -0
  67. kontra/rules/base.py +186 -0
  68. kontra/rules/builtin/__init__.py +40 -0
  69. kontra/rules/builtin/allowed_values.py +156 -0
  70. kontra/rules/builtin/compare.py +188 -0
  71. kontra/rules/builtin/conditional_not_null.py +213 -0
  72. kontra/rules/builtin/conditional_range.py +310 -0
  73. kontra/rules/builtin/contains.py +138 -0
  74. kontra/rules/builtin/custom_sql_check.py +182 -0
  75. kontra/rules/builtin/disallowed_values.py +140 -0
  76. kontra/rules/builtin/dtype.py +203 -0
  77. kontra/rules/builtin/ends_with.py +129 -0
  78. kontra/rules/builtin/freshness.py +240 -0
  79. kontra/rules/builtin/length.py +193 -0
  80. kontra/rules/builtin/max_rows.py +35 -0
  81. kontra/rules/builtin/min_rows.py +46 -0
  82. kontra/rules/builtin/not_null.py +121 -0
  83. kontra/rules/builtin/range.py +222 -0
  84. kontra/rules/builtin/regex.py +143 -0
  85. kontra/rules/builtin/starts_with.py +129 -0
  86. kontra/rules/builtin/unique.py +124 -0
  87. kontra/rules/condition_parser.py +203 -0
  88. kontra/rules/execution_plan.py +455 -0
  89. kontra/rules/factory.py +103 -0
  90. kontra/rules/predicates.py +25 -0
  91. kontra/rules/registry.py +24 -0
  92. kontra/rules/static_predicates.py +120 -0
  93. kontra/scout/__init__.py +9 -0
  94. kontra/scout/backends/__init__.py +17 -0
  95. kontra/scout/backends/base.py +111 -0
  96. kontra/scout/backends/duckdb_backend.py +359 -0
  97. kontra/scout/backends/postgres_backend.py +519 -0
  98. kontra/scout/backends/sqlserver_backend.py +577 -0
  99. kontra/scout/dtype_mapping.py +150 -0
  100. kontra/scout/patterns.py +69 -0
  101. kontra/scout/profiler.py +801 -0
  102. kontra/scout/reporters/__init__.py +39 -0
  103. kontra/scout/reporters/json_reporter.py +165 -0
  104. kontra/scout/reporters/markdown_reporter.py +152 -0
  105. kontra/scout/reporters/rich_reporter.py +144 -0
  106. kontra/scout/store.py +208 -0
  107. kontra/scout/suggest.py +200 -0
  108. kontra/scout/types.py +652 -0
  109. kontra/state/__init__.py +29 -0
  110. kontra/state/backends/__init__.py +79 -0
  111. kontra/state/backends/base.py +348 -0
  112. kontra/state/backends/local.py +480 -0
  113. kontra/state/backends/postgres.py +1010 -0
  114. kontra/state/backends/s3.py +543 -0
  115. kontra/state/backends/sqlserver.py +969 -0
  116. kontra/state/fingerprint.py +166 -0
  117. kontra/state/types.py +1061 -0
  118. kontra/version.py +1 -0
  119. kontra-0.5.2.dist-info/METADATA +122 -0
  120. kontra-0.5.2.dist-info/RECORD +124 -0
  121. kontra-0.5.2.dist-info/WHEEL +5 -0
  122. kontra-0.5.2.dist-info/entry_points.txt +2 -0
  123. kontra-0.5.2.dist-info/licenses/LICENSE +17 -0
  124. kontra-0.5.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,480 @@
1
+ # src/kontra/state/backends/local.py
2
+ """
3
+ Local filesystem state storage with normalized format (v0.5).
4
+
5
+ Directory structure:
6
+ .kontra/state/
7
+ └── <contract_fingerprint>/
8
+ └── runs/
9
+ ├── <run_id>.json # run metadata + rule results
10
+ └── <run_id>.ann.jsonl # annotations (append-only)
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import os
17
+ import random
18
+ import string
19
+ from datetime import datetime, timezone
20
+ from pathlib import Path
21
+ from typing import Dict, List, Optional
22
+
23
+ from .base import StateBackend
24
+ from kontra.state.types import Annotation, ValidationState
25
+
26
+
27
+ class LocalStore(StateBackend):
28
+ """
29
+ Filesystem-based state storage with normalized format.
30
+
31
+ Default storage location is .kontra/state/ in the current working
32
+ directory. Can be customized via the base_path parameter.
33
+
34
+ Run IDs are timestamp-based: YYYY-MM-DDTHH-MM-SS_<random>
35
+ """
36
+
37
+ def __init__(self, base_path: Optional[str] = None):
38
+ """
39
+ Initialize the local store.
40
+
41
+ Args:
42
+ base_path: Base directory for state storage.
43
+ Defaults to .kontra/state/ in cwd.
44
+ """
45
+ if base_path:
46
+ self.base_path = Path(base_path)
47
+ else:
48
+ self.base_path = Path.cwd() / ".kontra" / "state"
49
+
50
+ def _contract_dir(self, contract_fingerprint: str) -> Path:
51
+ """Get the directory for a contract's states."""
52
+ return self.base_path / contract_fingerprint
53
+
54
+ def _runs_dir(self, contract_fingerprint: str) -> Path:
55
+ """Get the runs directory for a contract."""
56
+ return self._contract_dir(contract_fingerprint) / "runs"
57
+
58
+ def _generate_run_id(self, run_at: datetime) -> str:
59
+ """Generate a unique run ID from timestamp."""
60
+ # Format: YYYY-MM-DDTHH-MM-SS_<random>
61
+ # The timestamp prefix makes them sortable
62
+ ts = run_at.strftime("%Y-%m-%dT%H-%M-%S")
63
+ suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=6))
64
+ return f"{ts}_{suffix}"
65
+
66
+ def _parse_run_id_timestamp(self, run_id: str) -> Optional[datetime]:
67
+ """Parse timestamp from run ID."""
68
+ try:
69
+ # Split on underscore to get timestamp part
70
+ ts_part = run_id.split("_")[0]
71
+ return datetime.strptime(ts_part, "%Y-%m-%dT%H-%M-%S").replace(tzinfo=timezone.utc)
72
+ except Exception:
73
+ return None
74
+
75
+ def _run_file(self, contract_fingerprint: str, run_id: str) -> Path:
76
+ """Get the path for a run's state file."""
77
+ return self._runs_dir(contract_fingerprint) / f"{run_id}.json"
78
+
79
+ def _annotations_file(self, contract_fingerprint: str, run_id: str) -> Path:
80
+ """Get the path for a run's annotations file."""
81
+ return self._runs_dir(contract_fingerprint) / f"{run_id}.ann.jsonl"
82
+
83
+ def save(self, state: ValidationState) -> None:
84
+ """Save a validation state to the filesystem."""
85
+ runs_dir = self._runs_dir(state.contract_fingerprint)
86
+ runs_dir.mkdir(parents=True, exist_ok=True)
87
+
88
+ # Generate run ID if not set
89
+ run_id = self._generate_run_id(state.run_at)
90
+
91
+ # Store run_id in the state dict
92
+ state_dict = state.to_dict()
93
+ state_dict["_run_id"] = run_id
94
+
95
+ filepath = self._run_file(state.contract_fingerprint, run_id)
96
+
97
+ # Write atomically using temp file
98
+ temp_path = filepath.with_suffix(".tmp")
99
+ try:
100
+ temp_path.write_text(
101
+ json.dumps(state_dict, indent=2, default=str),
102
+ encoding="utf-8",
103
+ )
104
+ temp_path.rename(filepath)
105
+ except Exception:
106
+ if temp_path.exists():
107
+ temp_path.unlink()
108
+ raise
109
+
110
+ def _load_state(self, filepath: Path) -> Optional[ValidationState]:
111
+ """Load a state from a file path."""
112
+ try:
113
+ content = filepath.read_text(encoding="utf-8")
114
+ data = json.loads(content)
115
+
116
+ # Extract run_id for later use
117
+ run_id = data.pop("_run_id", None)
118
+
119
+ state = ValidationState.from_dict(data)
120
+
121
+ # Store run_id as a synthetic ID (hash for now)
122
+ if run_id:
123
+ # Use string hash as integer ID for compatibility
124
+ state.id = hash(run_id) & 0x7FFFFFFF # Positive integer
125
+
126
+ return state
127
+ except Exception:
128
+ return None
129
+
130
+ def get_latest(self, contract_fingerprint: str) -> Optional[ValidationState]:
131
+ """Get the most recent state for a contract."""
132
+ history = self.get_history(contract_fingerprint, limit=1)
133
+ return history[0] if history else None
134
+
135
+ def get_history(
136
+ self,
137
+ contract_fingerprint: str,
138
+ limit: int = 10,
139
+ ) -> List[ValidationState]:
140
+ """Get recent history for a contract, newest first."""
141
+ runs_dir = self._runs_dir(contract_fingerprint)
142
+
143
+ if not runs_dir.exists():
144
+ return []
145
+
146
+ # List all JSON files (excluding .ann.jsonl)
147
+ state_files = sorted(
148
+ [f for f in runs_dir.glob("*.json") if not f.name.endswith(".ann.jsonl")],
149
+ key=lambda p: p.name,
150
+ reverse=True, # Newest first (timestamp prefix sorts correctly)
151
+ )
152
+
153
+ states = []
154
+ for filepath in state_files[:limit]:
155
+ state = self._load_state(filepath)
156
+ if state:
157
+ states.append(state)
158
+
159
+ return states
160
+
161
+ def delete_old(
162
+ self,
163
+ contract_fingerprint: str,
164
+ keep_count: int = 100,
165
+ ) -> int:
166
+ """Delete old states, keeping the most recent ones."""
167
+ runs_dir = self._runs_dir(contract_fingerprint)
168
+
169
+ if not runs_dir.exists():
170
+ return 0
171
+
172
+ # List all JSON files, sorted newest first
173
+ state_files = sorted(
174
+ [f for f in runs_dir.glob("*.json") if not f.name.endswith(".ann.jsonl")],
175
+ key=lambda p: p.name,
176
+ reverse=True,
177
+ )
178
+
179
+ # Delete files beyond keep_count
180
+ deleted = 0
181
+ for filepath in state_files[keep_count:]:
182
+ try:
183
+ # Delete state file
184
+ filepath.unlink()
185
+ deleted += 1
186
+
187
+ # Also delete corresponding annotations file if exists
188
+ run_id = filepath.stem
189
+ ann_file = self._annotations_file(contract_fingerprint, run_id)
190
+ if ann_file.exists():
191
+ ann_file.unlink()
192
+ except Exception:
193
+ continue
194
+
195
+ return deleted
196
+
197
+ def list_contracts(self) -> List[str]:
198
+ """List all contract fingerprints with stored state."""
199
+ if not self.base_path.exists():
200
+ return []
201
+
202
+ contracts = []
203
+ for item in self.base_path.iterdir():
204
+ if item.is_dir() and len(item.name) == 16: # Fingerprint length
205
+ contracts.append(item.name)
206
+
207
+ return sorted(contracts)
208
+
209
+ def clear(self, contract_fingerprint: Optional[str] = None) -> int:
210
+ """
211
+ Clear stored states.
212
+
213
+ Args:
214
+ contract_fingerprint: If provided, only clear this contract's states.
215
+ If None, clear all states.
216
+
217
+ Returns:
218
+ Number of state files deleted.
219
+ """
220
+ deleted = 0
221
+
222
+ if contract_fingerprint:
223
+ runs_dir = self._runs_dir(contract_fingerprint)
224
+ if runs_dir.exists():
225
+ for filepath in runs_dir.glob("*.json"):
226
+ filepath.unlink()
227
+ deleted += 1
228
+ for filepath in runs_dir.glob("*.jsonl"):
229
+ filepath.unlink()
230
+ # Remove empty directories
231
+ try:
232
+ runs_dir.rmdir()
233
+ self._contract_dir(contract_fingerprint).rmdir()
234
+ except OSError:
235
+ pass
236
+ else:
237
+ # Clear all
238
+ if self.base_path.exists():
239
+ for contract_dir in self.base_path.iterdir():
240
+ if contract_dir.is_dir():
241
+ runs_dir = contract_dir / "runs"
242
+ if runs_dir.exists():
243
+ for filepath in runs_dir.glob("*.json"):
244
+ filepath.unlink()
245
+ deleted += 1
246
+ for filepath in runs_dir.glob("*.jsonl"):
247
+ filepath.unlink()
248
+ try:
249
+ runs_dir.rmdir()
250
+ except OSError:
251
+ pass
252
+ try:
253
+ contract_dir.rmdir()
254
+ except OSError:
255
+ pass
256
+
257
+ return deleted
258
+
259
+ # -------------------------------------------------------------------------
260
+ # Annotation Methods
261
+ # -------------------------------------------------------------------------
262
+
263
+ def save_annotation(self, annotation: Annotation) -> int:
264
+ """
265
+ Save an annotation (append-only).
266
+
267
+ For file-based backends, we need the run_id string, not the integer ID.
268
+ Annotations are stored in JSONL format alongside the run file.
269
+ """
270
+ # We need to find the run file to get the run_id string
271
+ # This is a limitation of file-based backends - we need the fingerprint
272
+
273
+ # For now, raise NotImplementedError - annotations require the contract_fingerprint
274
+ # which isn't stored in the annotation. Callers should use save_annotation_for_run.
275
+ raise NotImplementedError(
276
+ "LocalStore.save_annotation requires contract fingerprint. "
277
+ "Use save_annotation_for_run instead."
278
+ )
279
+
280
+ def save_annotation_for_run(
281
+ self,
282
+ contract_fingerprint: str,
283
+ run_id_str: str,
284
+ annotation: Annotation,
285
+ ) -> int:
286
+ """
287
+ Save an annotation for a specific run.
288
+
289
+ Args:
290
+ contract_fingerprint: The contract fingerprint
291
+ run_id_str: The string run ID (e.g., "2024-01-15T09-30-00_abc123")
292
+ annotation: The annotation to save
293
+
294
+ Returns:
295
+ A synthetic annotation ID (line number)
296
+ """
297
+ ann_file = self._annotations_file(contract_fingerprint, run_id_str)
298
+ ann_file.parent.mkdir(parents=True, exist_ok=True)
299
+
300
+ # Generate a synthetic ID based on existing line count
301
+ line_count = 0
302
+ if ann_file.exists():
303
+ with open(ann_file, encoding="utf-8") as f:
304
+ line_count = sum(1 for _ in f)
305
+ annotation.id = line_count + 1
306
+
307
+ # Append to JSONL file
308
+ with open(ann_file, "a", encoding="utf-8") as f:
309
+ f.write(annotation.to_json() + "\n")
310
+
311
+ return annotation.id
312
+
313
+ def get_annotations(
314
+ self,
315
+ run_id: int,
316
+ rule_result_id: Optional[int] = None,
317
+ ) -> List[Annotation]:
318
+ """
319
+ Get annotations for a run.
320
+
321
+ Note: For file-based backends, run_id is a hash of the run_id string.
322
+ This method may not work directly. Use get_run_with_annotations instead.
323
+ """
324
+ # File-based backends need the fingerprint to locate annotations
325
+ return []
326
+
327
+ def get_annotations_for_contract(
328
+ self,
329
+ contract_fingerprint: str,
330
+ rule_id: Optional[str] = None,
331
+ annotation_type: Optional[str] = None,
332
+ limit: int = 20,
333
+ ) -> List[Annotation]:
334
+ """Get annotations across all runs for a contract."""
335
+ runs_dir = self._runs_dir(contract_fingerprint)
336
+ if not runs_dir.exists():
337
+ return []
338
+
339
+ # Collect all annotations from all .ann.jsonl files
340
+ all_annotations: List[Annotation] = []
341
+
342
+ for ann_file in runs_dir.glob("*.ann.jsonl"):
343
+ with open(ann_file, encoding="utf-8") as f:
344
+ for line in f:
345
+ line = line.strip()
346
+ if not line:
347
+ continue
348
+ try:
349
+ ann = Annotation.from_json(line)
350
+
351
+ # Filter by rule_id if specified
352
+ if rule_id is not None and ann.rule_id != rule_id:
353
+ continue
354
+
355
+ # Filter by annotation_type if specified
356
+ if annotation_type is not None and ann.annotation_type != annotation_type:
357
+ continue
358
+
359
+ all_annotations.append(ann)
360
+ except Exception:
361
+ # Skip malformed annotations
362
+ continue
363
+
364
+ # Sort by created_at descending (newest first)
365
+ all_annotations.sort(
366
+ key=lambda a: a.created_at or datetime.min.replace(tzinfo=timezone.utc),
367
+ reverse=True,
368
+ )
369
+
370
+ # Apply limit
371
+ return all_annotations[:limit]
372
+
373
+ def get_run_with_annotations(
374
+ self,
375
+ contract_fingerprint: str,
376
+ run_id: Optional[int] = None,
377
+ ) -> Optional[ValidationState]:
378
+ """Get a validation state with its annotations loaded."""
379
+ # Get the state
380
+ if run_id is None:
381
+ state = self.get_latest(contract_fingerprint)
382
+ else:
383
+ # Search for state with matching ID hash
384
+ states = self.get_history(contract_fingerprint, limit=100)
385
+ state = None
386
+ for s in states:
387
+ if s.id == run_id:
388
+ state = s
389
+ break
390
+
391
+ if not state:
392
+ return None
393
+
394
+ # Load annotations
395
+ runs_dir = self._runs_dir(contract_fingerprint)
396
+ if not runs_dir.exists():
397
+ state.annotations = []
398
+ for rule in state.rules:
399
+ rule.annotations = []
400
+ return state
401
+
402
+ # Find the corresponding run file to get run_id string
403
+ run_id_str = None
404
+ for filepath in runs_dir.glob("*.json"):
405
+ if filepath.name.endswith(".ann.jsonl"):
406
+ continue
407
+ loaded = self._load_state(filepath)
408
+ if loaded and loaded.id == state.id:
409
+ run_id_str = filepath.stem
410
+ break
411
+
412
+ if not run_id_str:
413
+ state.annotations = []
414
+ for rule in state.rules:
415
+ rule.annotations = []
416
+ return state
417
+
418
+ # Load annotations from JSONL
419
+ ann_file = self._annotations_file(contract_fingerprint, run_id_str)
420
+ annotations = []
421
+ if ann_file.exists():
422
+ with open(ann_file, encoding="utf-8") as f:
423
+ for line in f:
424
+ line = line.strip()
425
+ if line:
426
+ annotations.append(Annotation.from_json(line))
427
+
428
+ self._attach_annotations_to_state(state, annotations)
429
+ return state
430
+
431
+ def get_history_with_annotations(
432
+ self,
433
+ contract_fingerprint: str,
434
+ limit: int = 10,
435
+ ) -> List[ValidationState]:
436
+ """Get recent history with annotations loaded."""
437
+ states = self.get_history(contract_fingerprint, limit=limit)
438
+
439
+ runs_dir = self._runs_dir(contract_fingerprint)
440
+ if not runs_dir.exists():
441
+ for state in states:
442
+ state.annotations = []
443
+ for rule in state.rules:
444
+ rule.annotations = []
445
+ return states
446
+
447
+ # Build ID to run_id_str mapping
448
+ id_to_run_id: Dict[int, str] = {}
449
+ for filepath in runs_dir.glob("*.json"):
450
+ if filepath.name.endswith(".ann.jsonl"):
451
+ continue
452
+ loaded = self._load_state(filepath)
453
+ if loaded and loaded.id:
454
+ id_to_run_id[loaded.id] = filepath.stem
455
+
456
+ # Load annotations for each state
457
+ for state in states:
458
+ if state.id is None or state.id not in id_to_run_id:
459
+ state.annotations = []
460
+ for rule in state.rules:
461
+ rule.annotations = []
462
+ continue
463
+
464
+ run_id_str = id_to_run_id[state.id]
465
+ ann_file = self._annotations_file(contract_fingerprint, run_id_str)
466
+
467
+ annotations = []
468
+ if ann_file.exists():
469
+ with open(ann_file, encoding="utf-8") as f:
470
+ for line in f:
471
+ line = line.strip()
472
+ if line:
473
+ annotations.append(Annotation.from_json(line))
474
+
475
+ self._attach_annotations_to_state(state, annotations)
476
+
477
+ return states
478
+
479
+ def __repr__(self) -> str:
480
+ return f"LocalStore(base_path={self.base_path})"