invar-tools 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- invar/__init__.py +1 -0
- invar/core/contracts.py +80 -10
- invar/core/entry_points.py +367 -0
- invar/core/extraction.py +5 -6
- invar/core/format_specs.py +195 -0
- invar/core/format_strategies.py +197 -0
- invar/core/formatter.py +32 -10
- invar/core/hypothesis_strategies.py +50 -10
- invar/core/inspect.py +1 -1
- invar/core/lambda_helpers.py +3 -2
- invar/core/models.py +30 -18
- invar/core/must_use.py +2 -1
- invar/core/parser.py +13 -6
- invar/core/postcondition_scope.py +128 -0
- invar/core/property_gen.py +86 -42
- invar/core/purity.py +13 -7
- invar/core/purity_heuristics.py +5 -9
- invar/core/references.py +8 -6
- invar/core/review_trigger.py +370 -0
- invar/core/rule_meta.py +69 -2
- invar/core/rules.py +91 -28
- invar/core/shell_analysis.py +247 -0
- invar/core/shell_architecture.py +171 -0
- invar/core/strategies.py +7 -14
- invar/core/suggestions.py +92 -0
- invar/core/sync_helpers.py +238 -0
- invar/core/tautology.py +103 -37
- invar/core/template_parser.py +467 -0
- invar/core/timeout_inference.py +4 -7
- invar/core/utils.py +63 -18
- invar/core/verification_routing.py +155 -0
- invar/mcp/server.py +113 -13
- invar/shell/commands/__init__.py +11 -0
- invar/shell/{cli.py → commands/guard.py} +152 -44
- invar/shell/{init_cmd.py → commands/init.py} +200 -28
- invar/shell/commands/merge.py +256 -0
- invar/shell/commands/mutate.py +184 -0
- invar/shell/{perception.py → commands/perception.py} +2 -0
- invar/shell/commands/sync_self.py +113 -0
- invar/shell/commands/template_sync.py +366 -0
- invar/shell/{test_cmd.py → commands/test.py} +3 -1
- invar/shell/commands/update.py +48 -0
- invar/shell/config.py +247 -10
- invar/shell/coverage.py +351 -0
- invar/shell/fs.py +5 -2
- invar/shell/git.py +2 -0
- invar/shell/guard_helpers.py +116 -20
- invar/shell/guard_output.py +106 -24
- invar/shell/mcp_config.py +3 -0
- invar/shell/mutation.py +314 -0
- invar/shell/property_tests.py +75 -24
- invar/shell/prove/__init__.py +9 -0
- invar/shell/prove/accept.py +113 -0
- invar/shell/{prove.py → prove/crosshair.py} +69 -30
- invar/shell/prove/hypothesis.py +293 -0
- invar/shell/subprocess_env.py +393 -0
- invar/shell/template_engine.py +345 -0
- invar/shell/templates.py +53 -0
- invar/shell/testing.py +77 -37
- invar/templates/CLAUDE.md.template +86 -9
- invar/templates/aider.conf.yml.template +16 -14
- invar/templates/commands/audit.md +138 -0
- invar/templates/commands/guard.md +77 -0
- invar/templates/config/CLAUDE.md.jinja +206 -0
- invar/templates/config/context.md.jinja +92 -0
- invar/templates/config/pre-commit.yaml.jinja +44 -0
- invar/templates/context.md.template +33 -0
- invar/templates/cursorrules.template +25 -13
- invar/templates/examples/README.md +2 -0
- invar/templates/examples/conftest.py +3 -0
- invar/templates/examples/contracts.py +4 -2
- invar/templates/examples/core_shell.py +10 -4
- invar/templates/examples/workflow.md +81 -0
- invar/templates/manifest.toml +137 -0
- invar/templates/protocol/INVAR.md +210 -0
- invar/templates/skills/develop/SKILL.md.jinja +318 -0
- invar/templates/skills/investigate/SKILL.md.jinja +106 -0
- invar/templates/skills/propose/SKILL.md.jinja +104 -0
- invar/templates/skills/review/SKILL.md.jinja +125 -0
- invar_tools-1.3.0.dist-info/METADATA +377 -0
- invar_tools-1.3.0.dist-info/RECORD +95 -0
- invar_tools-1.3.0.dist-info/entry_points.txt +2 -0
- invar_tools-1.3.0.dist-info/licenses/LICENSE +190 -0
- invar_tools-1.3.0.dist-info/licenses/LICENSE-GPL +674 -0
- invar_tools-1.3.0.dist-info/licenses/NOTICE +63 -0
- invar/contracts.py +0 -152
- invar/decorators.py +0 -94
- invar/invariant.py +0 -57
- invar/resource.py +0 -99
- invar/shell/prove_fallback.py +0 -183
- invar/shell/update_cmd.py +0 -191
- invar/templates/INVAR.md +0 -134
- invar_tools-1.0.0.dist-info/METADATA +0 -321
- invar_tools-1.0.0.dist-info/RECORD +0 -64
- invar_tools-1.0.0.dist-info/entry_points.txt +0 -2
- invar_tools-1.0.0.dist-info/licenses/LICENSE +0 -21
- /invar/shell/{prove_cache.py → prove/cache.py} +0 -0
- {invar_tools-1.0.0.dist-info → invar_tools-1.3.0.dist-info}/WHEEL +0 -0
|
@@ -18,23 +18,22 @@ from typing import TYPE_CHECKING
|
|
|
18
18
|
from returns.result import Failure, Result, Success
|
|
19
19
|
from rich.console import Console
|
|
20
20
|
|
|
21
|
-
|
|
22
|
-
from invar.shell.prove_cache import ProveCache # noqa: TC001 - runtime usage
|
|
21
|
+
from invar.shell.prove.cache import ProveCache # noqa: TC001 - runtime usage
|
|
23
22
|
|
|
24
|
-
# DX-12: Hypothesis fallback
|
|
25
|
-
from invar.shell.
|
|
23
|
+
# DX-12: Hypothesis fallback
|
|
24
|
+
from invar.shell.prove.hypothesis import (
|
|
26
25
|
run_hypothesis_fallback as run_hypothesis_fallback,
|
|
27
26
|
)
|
|
28
|
-
from invar.shell.
|
|
27
|
+
from invar.shell.prove.hypothesis import (
|
|
29
28
|
run_prove_with_fallback as run_prove_with_fallback,
|
|
30
29
|
)
|
|
30
|
+
from invar.shell.subprocess_env import build_subprocess_env # DX-52
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from typing import Any
|
|
34
34
|
|
|
35
35
|
console = Console()
|
|
36
36
|
|
|
37
|
-
|
|
38
37
|
# ============================================================
|
|
39
38
|
# CrossHair Status Codes
|
|
40
39
|
# ============================================================
|
|
@@ -56,18 +55,10 @@ class CrossHairStatus:
|
|
|
56
55
|
# ============================================================
|
|
57
56
|
|
|
58
57
|
|
|
58
|
+
# @shell_orchestration: Contract detection for CrossHair prove module
|
|
59
|
+
# @shell_complexity: AST traversal for contract detection
|
|
59
60
|
def has_verifiable_contracts(source: str) -> bool:
|
|
60
|
-
"""
|
|
61
|
-
Check if source has verifiable contracts.
|
|
62
|
-
|
|
63
|
-
DX-13: Hybrid detection - fast string check + AST validation.
|
|
64
|
-
|
|
65
|
-
Args:
|
|
66
|
-
source: Python source code
|
|
67
|
-
|
|
68
|
-
Returns:
|
|
69
|
-
True if file has @pre/@post contracts worth verifying
|
|
70
|
-
"""
|
|
61
|
+
"""Check if source has @pre/@post contracts (DX-13: fast string + AST check)."""
|
|
71
62
|
# Fast path: no contract keywords at all
|
|
72
63
|
if "@pre" not in source and "@post" not in source:
|
|
73
64
|
return False
|
|
@@ -105,9 +96,12 @@ def has_verifiable_contracts(source: str) -> bool:
|
|
|
105
96
|
# ============================================================
|
|
106
97
|
|
|
107
98
|
|
|
99
|
+
# @shell_complexity: CrossHair subprocess with error classification
|
|
108
100
|
def _verify_single_file(
|
|
109
101
|
file_path: str,
|
|
110
102
|
max_iterations: int = 5,
|
|
103
|
+
timeout: int = 300,
|
|
104
|
+
per_condition_timeout: int = 30,
|
|
111
105
|
) -> dict[str, Any]:
|
|
112
106
|
"""
|
|
113
107
|
Verify a single file with CrossHair.
|
|
@@ -117,6 +111,8 @@ def _verify_single_file(
|
|
|
117
111
|
Args:
|
|
118
112
|
file_path: Path to Python file
|
|
119
113
|
max_iterations: Maximum uninteresting iterations (default: 5)
|
|
114
|
+
timeout: Max time per file in seconds (default: 300)
|
|
115
|
+
per_condition_timeout: Max time per contract in seconds (default: 30)
|
|
120
116
|
|
|
121
117
|
Returns:
|
|
122
118
|
Verification result dict
|
|
@@ -132,15 +128,18 @@ def _verify_single_file(
|
|
|
132
128
|
"check",
|
|
133
129
|
file_path,
|
|
134
130
|
f"--max_uninteresting_iterations={max_iterations}",
|
|
131
|
+
f"--per_condition_timeout={per_condition_timeout}",
|
|
135
132
|
"--analysis_kind=deal",
|
|
136
133
|
]
|
|
137
134
|
|
|
138
135
|
try:
|
|
136
|
+
# DX-52: Inject project venv site-packages for uvx compatibility
|
|
139
137
|
result = subprocess.run(
|
|
140
138
|
cmd,
|
|
141
139
|
capture_output=True,
|
|
142
140
|
text=True,
|
|
143
|
-
timeout=
|
|
141
|
+
timeout=timeout,
|
|
142
|
+
env=build_subprocess_env(),
|
|
144
143
|
)
|
|
145
144
|
|
|
146
145
|
elapsed_ms = int((time.time() - start_time) * 1000)
|
|
@@ -153,24 +152,51 @@ def _verify_single_file(
|
|
|
153
152
|
"stdout": result.stdout,
|
|
154
153
|
}
|
|
155
154
|
else:
|
|
155
|
+
# Check if this is an execution error vs actual counterexample
|
|
156
|
+
# CrossHair reports TypeError/AttributeError when it can't
|
|
157
|
+
# symbolically execute C extensions like ast.parse()
|
|
158
|
+
# Check both stdout and stderr for error patterns
|
|
159
|
+
output = result.stdout + "\n" + result.stderr
|
|
160
|
+
execution_errors = [
|
|
161
|
+
"TypeError:",
|
|
162
|
+
"AttributeError:",
|
|
163
|
+
"NotImplementedError:",
|
|
164
|
+
"compile() arg 1 must be", # ast.parse limitation
|
|
165
|
+
"ValueError: wrong parameter order", # CrossHair signature bug
|
|
166
|
+
"ValueError: cannot determine truth", # Symbolic execution limit
|
|
167
|
+
]
|
|
168
|
+
is_execution_error = any(err in output for err in execution_errors)
|
|
169
|
+
|
|
170
|
+
if is_execution_error:
|
|
171
|
+
# Treat as skipped - function uses unsupported operations
|
|
172
|
+
return {
|
|
173
|
+
"file": file_path,
|
|
174
|
+
"status": CrossHairStatus.SKIPPED,
|
|
175
|
+
"time_ms": elapsed_ms,
|
|
176
|
+
"reason": "uses unsupported operations (ast/compile/signature)",
|
|
177
|
+
"stdout": output,
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
# Extract counterexample lines - CrossHair format: "file:line: error: Err when calling func(...)"
|
|
181
|
+
# Include lines with "error:" as they contain the actual counterexamples
|
|
156
182
|
counterexamples = [
|
|
157
183
|
line.strip()
|
|
158
|
-
for line in
|
|
159
|
-
if line.strip() and "error"
|
|
184
|
+
for line in output.split("\n")
|
|
185
|
+
if line.strip() and ": error:" in line.lower()
|
|
160
186
|
]
|
|
161
187
|
return {
|
|
162
188
|
"file": file_path,
|
|
163
189
|
"status": CrossHairStatus.COUNTEREXAMPLE,
|
|
164
190
|
"time_ms": elapsed_ms,
|
|
165
191
|
"counterexamples": counterexamples,
|
|
166
|
-
"stdout":
|
|
192
|
+
"stdout": output,
|
|
167
193
|
}
|
|
168
194
|
|
|
169
195
|
except subprocess.TimeoutExpired:
|
|
170
196
|
return {
|
|
171
197
|
"file": file_path,
|
|
172
198
|
"status": CrossHairStatus.TIMEOUT,
|
|
173
|
-
"time_ms":
|
|
199
|
+
"time_ms": timeout * 1000,
|
|
174
200
|
}
|
|
175
201
|
except Exception as e:
|
|
176
202
|
return {
|
|
@@ -185,22 +211,24 @@ def _verify_single_file(
|
|
|
185
211
|
# ============================================================
|
|
186
212
|
|
|
187
213
|
|
|
214
|
+
# @shell_complexity: Parallel verification with caching and filtering
|
|
188
215
|
def run_crosshair_parallel(
|
|
189
216
|
files: list[Path],
|
|
190
217
|
max_iterations: int = 5,
|
|
191
218
|
max_workers: int | None = None,
|
|
192
219
|
cache: ProveCache | None = None,
|
|
220
|
+
timeout: int = 300,
|
|
221
|
+
per_condition_timeout: int = 30,
|
|
193
222
|
) -> Result[dict, str]:
|
|
194
|
-
"""
|
|
195
|
-
Run CrossHair on multiple files in parallel.
|
|
196
|
-
|
|
197
|
-
DX-13: Parallel execution with caching support.
|
|
223
|
+
"""Run CrossHair on multiple files in parallel (DX-13).
|
|
198
224
|
|
|
199
225
|
Args:
|
|
200
226
|
files: List of Python file paths to verify
|
|
201
227
|
max_iterations: Maximum uninteresting iterations per condition
|
|
202
228
|
max_workers: Number of parallel workers (default: CPU count)
|
|
203
229
|
cache: Optional verification cache
|
|
230
|
+
timeout: Max time per file in seconds (default: 300)
|
|
231
|
+
per_condition_timeout: Max time per contract in seconds (default: 30)
|
|
204
232
|
|
|
205
233
|
Returns:
|
|
206
234
|
Success with verification results or Failure with error message
|
|
@@ -299,7 +327,9 @@ def run_crosshair_parallel(
|
|
|
299
327
|
# Parallel execution
|
|
300
328
|
with ProcessPoolExecutor(max_workers=max_workers) as executor:
|
|
301
329
|
futures = {
|
|
302
|
-
executor.submit(
|
|
330
|
+
executor.submit(
|
|
331
|
+
_verify_single_file, str(f), max_iterations, timeout, per_condition_timeout
|
|
332
|
+
): f
|
|
303
333
|
for f in files_to_verify
|
|
304
334
|
}
|
|
305
335
|
|
|
@@ -321,7 +351,9 @@ def run_crosshair_parallel(
|
|
|
321
351
|
else:
|
|
322
352
|
# Sequential execution (single file or max_workers=1)
|
|
323
353
|
for py_file in files_to_verify:
|
|
324
|
-
result = _verify_single_file(
|
|
354
|
+
result = _verify_single_file(
|
|
355
|
+
str(py_file), max_iterations, timeout, per_condition_timeout
|
|
356
|
+
)
|
|
325
357
|
_process_verification_result(
|
|
326
358
|
result,
|
|
327
359
|
py_file,
|
|
@@ -354,6 +386,8 @@ def run_crosshair_parallel(
|
|
|
354
386
|
)
|
|
355
387
|
|
|
356
388
|
|
|
389
|
+
# @shell_orchestration: Result aggregation helper for parallel verification
|
|
390
|
+
# @shell_complexity: Result classification with cache update
|
|
357
391
|
def _process_verification_result(
|
|
358
392
|
result: dict,
|
|
359
393
|
file_path: Path,
|
|
@@ -389,7 +423,7 @@ def _process_verification_result(
|
|
|
389
423
|
|
|
390
424
|
|
|
391
425
|
def run_crosshair_on_files(
|
|
392
|
-
files: list[Path], timeout: int =
|
|
426
|
+
files: list[Path], timeout: int = 300, per_condition_timeout: int = 30
|
|
393
427
|
) -> Result[dict, str]:
|
|
394
428
|
"""
|
|
395
429
|
Run CrossHair symbolic verification on a list of Python files.
|
|
@@ -398,7 +432,8 @@ def run_crosshair_on_files(
|
|
|
398
432
|
|
|
399
433
|
Args:
|
|
400
434
|
files: List of Python file paths to verify
|
|
401
|
-
timeout:
|
|
435
|
+
timeout: Max time per file in seconds (default: 300)
|
|
436
|
+
per_condition_timeout: Max time per contract in seconds (default: 30)
|
|
402
437
|
|
|
403
438
|
Returns:
|
|
404
439
|
Success with verification results or Failure with error message
|
|
@@ -409,6 +444,8 @@ def run_crosshair_on_files(
|
|
|
409
444
|
max_iterations=5, # Fast mode
|
|
410
445
|
max_workers=None, # Auto-detect
|
|
411
446
|
cache=None, # No cache for basic API
|
|
447
|
+
timeout=timeout,
|
|
448
|
+
per_condition_timeout=per_condition_timeout,
|
|
412
449
|
)
|
|
413
450
|
|
|
414
451
|
|
|
@@ -417,6 +454,8 @@ def run_crosshair_on_files(
|
|
|
417
454
|
# ============================================================
|
|
418
455
|
|
|
419
456
|
|
|
457
|
+
# @shell_orchestration: File selection for incremental verification
|
|
458
|
+
# @shell_complexity: Git integration for incremental verification
|
|
420
459
|
def get_files_to_prove(
|
|
421
460
|
path: Path,
|
|
422
461
|
all_core_files: list[Path],
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Hypothesis fallback for proof verification.
|
|
3
|
+
|
|
4
|
+
DX-12: Provides Hypothesis as automatic fallback when CrossHair
|
|
5
|
+
is unavailable, times out, or skips files.
|
|
6
|
+
|
|
7
|
+
DX-22: Smart routing - detects C extension imports and routes
|
|
8
|
+
directly to Hypothesis without wasting time on CrossHair.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import subprocess
|
|
14
|
+
import sys
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
|
|
18
|
+
from returns.result import Failure, Result, Success
|
|
19
|
+
|
|
20
|
+
from invar.core.verification_routing import get_incompatible_imports
|
|
21
|
+
from invar.shell.subprocess_env import build_subprocess_env
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class FileRouting:
|
|
26
|
+
"""DX-22: Classification of files for smart verification routing."""
|
|
27
|
+
|
|
28
|
+
crosshair_files: list[Path] = field(default_factory=list)
|
|
29
|
+
hypothesis_files: list[Path] = field(default_factory=list)
|
|
30
|
+
skip_files: list[Path] = field(default_factory=list)
|
|
31
|
+
incompatible_reasons: dict[str, set[str]] = field(default_factory=dict)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
# @shell_complexity: File I/O with error handling for import detection
|
|
35
|
+
def classify_files_for_verification(files: list[Path]) -> FileRouting:
|
|
36
|
+
"""
|
|
37
|
+
Classify files for smart verification routing.
|
|
38
|
+
|
|
39
|
+
DX-22: Detects C extension imports and routes files appropriately:
|
|
40
|
+
- Pure Python with contracts -> CrossHair (can prove)
|
|
41
|
+
- C extensions (numpy, pandas, etc.) -> Hypothesis (cannot prove)
|
|
42
|
+
- No contracts -> Skip
|
|
43
|
+
|
|
44
|
+
Returns FileRouting with classified files.
|
|
45
|
+
"""
|
|
46
|
+
routing = FileRouting()
|
|
47
|
+
|
|
48
|
+
for file_path in files:
|
|
49
|
+
if not file_path.exists() or file_path.suffix != ".py":
|
|
50
|
+
routing.skip_files.append(file_path)
|
|
51
|
+
continue
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
source = file_path.read_text()
|
|
55
|
+
except Exception:
|
|
56
|
+
routing.skip_files.append(file_path)
|
|
57
|
+
continue
|
|
58
|
+
|
|
59
|
+
# Check for incompatible imports
|
|
60
|
+
incompatible = get_incompatible_imports(source)
|
|
61
|
+
if incompatible:
|
|
62
|
+
routing.hypothesis_files.append(file_path)
|
|
63
|
+
routing.incompatible_reasons[str(file_path)] = incompatible
|
|
64
|
+
else:
|
|
65
|
+
routing.crosshair_files.append(file_path)
|
|
66
|
+
|
|
67
|
+
return routing
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
# @shell_complexity: Fallback verification with hypothesis availability check
|
|
71
|
+
def run_hypothesis_fallback(
|
|
72
|
+
files: list[Path],
|
|
73
|
+
max_examples: int = 100,
|
|
74
|
+
) -> Result[dict, str]:
|
|
75
|
+
"""
|
|
76
|
+
Run Hypothesis property tests as fallback when CrossHair skips/times out.
|
|
77
|
+
|
|
78
|
+
DX-12: Uses inferred strategies from type hints and @pre contracts.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
files: List of Python file paths to test
|
|
82
|
+
max_examples: Maximum examples per test
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Success with test results or Failure with error message
|
|
86
|
+
"""
|
|
87
|
+
# Import CrossHairStatus here to avoid circular import
|
|
88
|
+
from invar.shell.prove.crosshair import CrossHairStatus
|
|
89
|
+
|
|
90
|
+
# Check if hypothesis is available
|
|
91
|
+
try:
|
|
92
|
+
import hypothesis # noqa: F401
|
|
93
|
+
except ImportError:
|
|
94
|
+
return Success(
|
|
95
|
+
{
|
|
96
|
+
"status": CrossHairStatus.SKIPPED,
|
|
97
|
+
"reason": "Hypothesis not installed (pip install hypothesis)",
|
|
98
|
+
"files": [],
|
|
99
|
+
"tool": "hypothesis",
|
|
100
|
+
}
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
if not files:
|
|
104
|
+
return Success(
|
|
105
|
+
{
|
|
106
|
+
"status": CrossHairStatus.SKIPPED,
|
|
107
|
+
"reason": "no files",
|
|
108
|
+
"files": [],
|
|
109
|
+
"tool": "hypothesis",
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Filter to Python files only
|
|
114
|
+
py_files = [f for f in files if f.suffix == ".py" and f.exists()]
|
|
115
|
+
if not py_files:
|
|
116
|
+
return Success(
|
|
117
|
+
{
|
|
118
|
+
"status": CrossHairStatus.SKIPPED,
|
|
119
|
+
"reason": "no Python files",
|
|
120
|
+
"files": [],
|
|
121
|
+
"tool": "hypothesis",
|
|
122
|
+
}
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
# Use pytest with hypothesis
|
|
126
|
+
cmd = [
|
|
127
|
+
sys.executable,
|
|
128
|
+
"-m",
|
|
129
|
+
"pytest",
|
|
130
|
+
"--hypothesis-show-statistics",
|
|
131
|
+
"--hypothesis-seed=0", # Reproducible
|
|
132
|
+
"-x", # Stop on first failure
|
|
133
|
+
"--tb=short",
|
|
134
|
+
]
|
|
135
|
+
cmd.extend(str(f) for f in py_files)
|
|
136
|
+
|
|
137
|
+
try:
|
|
138
|
+
# DX-52: Inject project venv site-packages for uvx compatibility
|
|
139
|
+
result = subprocess.run(
|
|
140
|
+
cmd,
|
|
141
|
+
capture_output=True,
|
|
142
|
+
text=True,
|
|
143
|
+
timeout=300,
|
|
144
|
+
env=build_subprocess_env(),
|
|
145
|
+
)
|
|
146
|
+
# Pytest exit codes: 0=passed, 5=no tests collected
|
|
147
|
+
is_passed = result.returncode in (0, 5)
|
|
148
|
+
return Success(
|
|
149
|
+
{
|
|
150
|
+
"status": "passed" if is_passed else "failed",
|
|
151
|
+
"files": [str(f) for f in py_files],
|
|
152
|
+
"exit_code": result.returncode,
|
|
153
|
+
"stdout": result.stdout,
|
|
154
|
+
"stderr": result.stderr,
|
|
155
|
+
"tool": "hypothesis",
|
|
156
|
+
"note": "Fallback from CrossHair",
|
|
157
|
+
}
|
|
158
|
+
)
|
|
159
|
+
except subprocess.TimeoutExpired:
|
|
160
|
+
return Failure("Hypothesis timeout (300s)")
|
|
161
|
+
except Exception as e:
|
|
162
|
+
return Failure(f"Hypothesis error: {e}")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# @shell_orchestration: DX-22 smart routing + DX-12/13 fallback chain
|
|
166
|
+
# @shell_complexity: Multiple verification phases with error handling paths
|
|
167
|
+
def run_prove_with_fallback(
|
|
168
|
+
files: list[Path],
|
|
169
|
+
crosshair_timeout: int = 10,
|
|
170
|
+
hypothesis_max_examples: int = 100,
|
|
171
|
+
use_cache: bool = True,
|
|
172
|
+
cache_dir: Path | None = None,
|
|
173
|
+
) -> Result[dict, str]:
|
|
174
|
+
"""
|
|
175
|
+
Run proof verification with smart routing and automatic fallback.
|
|
176
|
+
|
|
177
|
+
DX-22: Smart routing - routes C extension code directly to Hypothesis.
|
|
178
|
+
DX-12 + DX-13: CrossHair with caching, falls back to Hypothesis on failure.
|
|
179
|
+
|
|
180
|
+
Flow:
|
|
181
|
+
1. Classify files (CrossHair-compatible vs C-extension)
|
|
182
|
+
2. Run CrossHair on compatible files only
|
|
183
|
+
3. Run Hypothesis on incompatible files (no wasted CrossHair attempt)
|
|
184
|
+
4. Merge results with de-duplicated statistics
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
files: List of Python file paths to verify
|
|
188
|
+
crosshair_timeout: Ignored (kept for backwards compatibility)
|
|
189
|
+
hypothesis_max_examples: Maximum Hypothesis examples
|
|
190
|
+
use_cache: Whether to use verification cache (DX-13)
|
|
191
|
+
cache_dir: Cache directory (default: .invar/cache/prove)
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Success with verification results including routing statistics
|
|
195
|
+
"""
|
|
196
|
+
# Import here to avoid circular import
|
|
197
|
+
from invar.shell.prove.cache import ProveCache
|
|
198
|
+
from invar.shell.prove.crosshair import CrossHairStatus, run_crosshair_parallel
|
|
199
|
+
|
|
200
|
+
# DX-22: Smart routing - classify files before verification
|
|
201
|
+
routing = classify_files_for_verification(files)
|
|
202
|
+
|
|
203
|
+
# Initialize result structure with DX-22 routing stats
|
|
204
|
+
result = {
|
|
205
|
+
"status": "passed",
|
|
206
|
+
"routing": {
|
|
207
|
+
"crosshair_files": len(routing.crosshair_files),
|
|
208
|
+
"hypothesis_files": len(routing.hypothesis_files),
|
|
209
|
+
"skip_files": len(routing.skip_files),
|
|
210
|
+
"incompatible_reasons": {
|
|
211
|
+
k: list(v) for k, v in routing.incompatible_reasons.items()
|
|
212
|
+
},
|
|
213
|
+
},
|
|
214
|
+
"crosshair": None,
|
|
215
|
+
"hypothesis": None,
|
|
216
|
+
"files": [str(f) for f in files],
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
# DX-13: Initialize cache for CrossHair
|
|
220
|
+
cache = None
|
|
221
|
+
if use_cache:
|
|
222
|
+
if cache_dir is None:
|
|
223
|
+
cache_dir = Path(".invar/cache/prove")
|
|
224
|
+
cache = ProveCache(cache_dir=cache_dir)
|
|
225
|
+
|
|
226
|
+
# Phase 1: Run CrossHair on compatible files
|
|
227
|
+
if routing.crosshair_files:
|
|
228
|
+
crosshair_result = run_crosshair_parallel(
|
|
229
|
+
routing.crosshair_files,
|
|
230
|
+
max_iterations=5, # Fast mode
|
|
231
|
+
max_workers=None, # Auto-detect
|
|
232
|
+
cache=cache,
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
if isinstance(crosshair_result, Success):
|
|
236
|
+
xh_data = crosshair_result.unwrap()
|
|
237
|
+
result["crosshair"] = xh_data
|
|
238
|
+
|
|
239
|
+
# Check if CrossHair needs fallback for any files
|
|
240
|
+
xh_status = xh_data.get("status", "")
|
|
241
|
+
needs_fallback = (
|
|
242
|
+
xh_status == CrossHairStatus.SKIPPED
|
|
243
|
+
or xh_status == CrossHairStatus.TIMEOUT
|
|
244
|
+
or "not installed" in xh_data.get("reason", "")
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
if needs_fallback:
|
|
248
|
+
# CrossHair failed, add these files to Hypothesis batch
|
|
249
|
+
routing.hypothesis_files.extend(routing.crosshair_files)
|
|
250
|
+
result["crosshair"]["fallback_triggered"] = True
|
|
251
|
+
else:
|
|
252
|
+
# CrossHair error, fallback all to Hypothesis
|
|
253
|
+
routing.hypothesis_files.extend(routing.crosshair_files)
|
|
254
|
+
result["crosshair"] = {
|
|
255
|
+
"status": "error",
|
|
256
|
+
"error": str(crosshair_result.failure()),
|
|
257
|
+
"fallback_triggered": True,
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
# Phase 2: Run Hypothesis on incompatible files + fallback files
|
|
261
|
+
if routing.hypothesis_files:
|
|
262
|
+
hypothesis_result = run_hypothesis_fallback(
|
|
263
|
+
routing.hypothesis_files, max_examples=hypothesis_max_examples
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if isinstance(hypothesis_result, Success):
|
|
267
|
+
result["hypothesis"] = hypothesis_result.unwrap()
|
|
268
|
+
else:
|
|
269
|
+
result["hypothesis"] = {
|
|
270
|
+
"status": "error",
|
|
271
|
+
"error": str(hypothesis_result.failure()),
|
|
272
|
+
}
|
|
273
|
+
result["status"] = "failed"
|
|
274
|
+
|
|
275
|
+
# Determine overall status
|
|
276
|
+
xh_status = result.get("crosshair", {}).get("status", "passed")
|
|
277
|
+
hyp_status = result.get("hypothesis", {}).get("status", "passed")
|
|
278
|
+
|
|
279
|
+
if xh_status == "counterexample_found" or hyp_status == "failed":
|
|
280
|
+
result["status"] = "failed"
|
|
281
|
+
elif xh_status in ("error",) or hyp_status in ("error",):
|
|
282
|
+
result["status"] = "error"
|
|
283
|
+
|
|
284
|
+
# DX-22: Add de-duplicated statistics
|
|
285
|
+
result["stats"] = {
|
|
286
|
+
"crosshair_proven": len(
|
|
287
|
+
result.get("crosshair", {}).get("verified", [])
|
|
288
|
+
),
|
|
289
|
+
"hypothesis_tested": len(routing.hypothesis_files),
|
|
290
|
+
"total_verified": len(files) - len(routing.skip_files),
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
return Success(result)
|