python-harness 0.0.8__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {python_harness-0.0.8/python_harness.egg-info → python_harness-0.0.11}/PKG-INFO +6 -6
  2. {python_harness-0.0.8 → python_harness-0.0.11}/pyproject.toml +6 -7
  3. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness/__init__.py +1 -1
  4. python_harness-0.0.11/python_harness/cli.py +320 -0
  5. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness/hard_evaluator.py +67 -23
  6. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness/soft_evaluator.py +248 -153
  7. {python_harness-0.0.8 → python_harness-0.0.11/python_harness.egg-info}/PKG-INFO +6 -6
  8. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness.egg-info/SOURCES.txt +1 -0
  9. python_harness-0.0.11/tests/test_cli.py +503 -0
  10. python_harness-0.0.11/tests/test_evaluator.py +37 -0
  11. python_harness-0.0.11/tests/test_hard_evaluator.py +422 -0
  12. python_harness-0.0.11/tests/test_qc_evaluator.py +65 -0
  13. python_harness-0.0.11/tests/test_soft_evaluator.py +354 -0
  14. python_harness-0.0.8/python_harness/cli.py +0 -253
  15. python_harness-0.0.8/tests/test_cli.py +0 -26
  16. python_harness-0.0.8/tests/test_evaluator.py +0 -18
  17. python_harness-0.0.8/tests/test_hard_evaluator.py +0 -95
  18. python_harness-0.0.8/tests/test_soft_evaluator.py +0 -42
  19. {python_harness-0.0.8 → python_harness-0.0.11}/LICENSE +0 -0
  20. {python_harness-0.0.8 → python_harness-0.0.11}/README.md +0 -0
  21. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness/evaluator.py +0 -0
  22. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness/qc_evaluator.py +0 -0
  23. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness.egg-info/dependency_links.txt +0 -0
  24. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness.egg-info/entry_points.txt +0 -0
  25. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness.egg-info/requires.txt +3 -3
  26. {python_harness-0.0.8 → python_harness-0.0.11}/python_harness.egg-info/top_level.txt +0 -0
  27. {python_harness-0.0.8 → python_harness-0.0.11}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-harness
3
- Version: 0.0.8
3
+ Version: 0.0.11
4
4
  Summary: An agentic codebase evaluation and evolution tool for Python projects.
5
5
  Author-email: Mingli Yuan <mingli.yuan@gmail.com>
6
6
  License: MIT
@@ -15,13 +15,13 @@ Requires-Dist: anthropic>=0.18.0
15
15
  Requires-Dist: tenacity>=8.2.0
16
16
  Requires-Dist: tiktoken>=0.6.0
17
17
  Requires-Dist: python-dotenv>=1.0.0
18
+ Requires-Dist: pytest>=8.0.0
19
+ Requires-Dist: pytest-cov>=4.1.0
20
+ Requires-Dist: ruff>=0.3.0
21
+ Requires-Dist: mypy>=1.9.0
22
+ Requires-Dist: radon>=6.0.1
18
23
  Provides-Extra: dev
19
- Requires-Dist: pytest>=8.0.0; extra == "dev"
20
- Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
21
- Requires-Dist: ruff>=0.3.0; extra == "dev"
22
- Requires-Dist: mypy>=1.9.0; extra == "dev"
23
24
  Requires-Dist: ty>=0.0.1; extra == "dev"
24
- Requires-Dist: radon>=6.0.1; extra == "dev"
25
25
  Dynamic: license-file
26
26
 
27
27
  # Python Harness
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "python-harness"
3
- version = "0.0.8"
3
+ version = "0.0.11"
4
4
  description = "An agentic codebase evaluation and evolution tool for Python projects."
5
5
  requires-python = ">=3.10"
6
6
  readme = "README.md"
@@ -17,18 +17,18 @@ dependencies = [
17
17
  "tenacity>=8.2.0",
18
18
  "tiktoken>=0.6.0",
19
19
  "python-dotenv>=1.0.0",
20
- ]
21
-
22
- [project.optional-dependencies]
23
- dev = [
24
20
  "pytest>=8.0.0",
25
21
  "pytest-cov>=4.1.0",
26
22
  "ruff>=0.3.0",
27
23
  "mypy>=1.9.0",
28
- "ty>=0.0.1", # Assuming ty is available or will be replaced with actual LSP integration
29
24
  "radon>=6.0.1",
30
25
  ]
31
26
 
27
+ [project.optional-dependencies]
28
+ dev = [
29
+ "ty>=0.0.1", # Assuming ty is available or will be replaced with actual LSP integration
30
+ ]
31
+
32
32
  [build-system]
33
33
  requires = ["setuptools>=61.0"]
34
34
  build-backend = "setuptools.build_meta"
@@ -62,4 +62,3 @@ addopts = "-ra -q --cov=python_harness --cov-report=term-missing --cov-report=ht
62
62
  testpaths = [
63
63
  "tests",
64
64
  ]
65
-
@@ -2,4 +2,4 @@
2
2
  Python Harness - An agentic evaluation tool for codebases.
3
3
  """
4
4
 
5
- __version__ = "0.0.1"
5
+ __version__ = "0.0.11"
@@ -0,0 +1,320 @@
1
+ """
2
+ Command-line interface for python-harness.
3
+ """
4
+
5
+ import os
6
+ import sys
7
+ from typing import Any
8
+
9
+ import typer
10
+ from dotenv import load_dotenv
11
+ from rich.console import Console
12
+
13
+ from python_harness.evaluator import Evaluator
14
+
15
+ # Try to find .env file explicitly before anything else executes
16
+ env_path = os.path.join(os.getcwd(), '.env')
17
+ if os.path.exists(env_path):
18
+ load_dotenv(dotenv_path=env_path)
19
+ else:
20
+ load_dotenv() # Fallback to default search
21
+
22
+ app = typer.Typer(help="Agentic harness tool for universal Python codebase evaluation.")
23
+ console = Console()
24
+
25
+
26
+ def _print_detail_block(title: str, details: str, color: str) -> None:
27
+ normalized_details = [
28
+ line.rstrip() for line in details.splitlines() if line.strip()
29
+ ]
30
+ console.print(f"[{color}]{title}:[/{color}]")
31
+ for line in normalized_details:
32
+ console.print(f" {line}")
33
+ console.print()
34
+
35
+
36
+ def _print_ruff_issues(
37
+ issues: list[dict[str, Any]],
38
+ error_message: str = "",
39
+ ) -> None:
40
+ console.print("[red]Ruff issues found:[/red]")
41
+ for issue in issues:
42
+ file = issue.get("filename", "unknown")
43
+ line = issue.get("location", {}).get("row", "?")
44
+ msg = issue.get("message", "unknown issue")
45
+ console.print(f" - {file}:{line} {msg}")
46
+ if not issues and error_message:
47
+ console.print(f" {error_message}")
48
+ console.print()
49
+
50
+
51
+ def _print_ty_result(ty_results: dict[str, Any]) -> None:
52
+ status = ty_results.get("status")
53
+ if status == "warning":
54
+ msg = str(ty_results.get("error_message", "ty not found"))
55
+ _print_detail_block("Ty warning", msg, "yellow")
56
+ return
57
+ if status == "success":
58
+ return
59
+
60
+ output = str(ty_results.get("output", ""))
61
+ error_msg = str(ty_results.get("error_message", ""))
62
+ if output:
63
+ _print_detail_block("Ty issues found", output, "red")
64
+ elif error_msg:
65
+ _print_detail_block("Ty error", error_msg, "red")
66
+ else:
67
+ console.print("[red]Ty failed, but no standard output was captured.[/red]")
68
+
69
+
70
+ def _print_radon_cc_result(radon_results: dict[str, Any]) -> None:
71
+ status = radon_results.get("status")
72
+ if status == "warning":
73
+ err_msg = str(radon_results.get("error_message", ""))
74
+ _print_detail_block("Radon CC warning", err_msg, "yellow")
75
+ return
76
+ if status != "failed":
77
+ return
78
+
79
+ issues = radon_results.get("issues", [])
80
+ if issues:
81
+ console.print(
82
+ f"[red]Cyclomatic Complexity too high "
83
+ f"({len(issues)} functions > 15):[/red]"
84
+ )
85
+ for issue in issues:
86
+ console.print(
87
+ f" - {issue['file']}: {issue['type']} '{issue['name']}' "
88
+ f"has CC {issue['complexity']}"
89
+ )
90
+ console.print()
91
+ return
92
+
93
+ err_msg = str(radon_results.get("error_message", ""))
94
+ if err_msg:
95
+ _print_detail_block("Radon CC error", err_msg, "red")
96
+ return
97
+ console.print("[red]Radon CC failed but no specific issues were parsed.[/red]")
98
+ console.print()
99
+
100
+
101
+ def _print_hard_failure_details(hard_results: dict[str, Any]) -> None:
102
+ console.print("[bold red]Hard Evaluation Failed![/bold red]")
103
+ console.print()
104
+
105
+ ruff_issues = hard_results.get("ruff", {}).get("issues", [])
106
+ if hard_results.get("ruff", {}).get("status") != "success":
107
+ _print_ruff_issues(
108
+ ruff_issues,
109
+ str(hard_results.get("ruff", {}).get("error_message", "")),
110
+ )
111
+
112
+ if hard_results.get("mypy", {}).get("status") != "success":
113
+ output = str(hard_results.get("mypy", {}).get("output", ""))
114
+ _print_detail_block("Mypy issues found", output, "red")
115
+
116
+ _print_ty_result(hard_results.get("ty", {}))
117
+ _print_radon_cc_result(hard_results.get("radon_cc", {}))
118
+
119
+ if hard_results.get("pytest", {}).get("status") == "failed":
120
+ error_msg = str(hard_results.get("pytest", {}).get("error_message", ""))
121
+ _print_detail_block("Pytest/Coverage issues found", error_msg, "red")
122
+
123
+ console.print(
124
+ "[yellow]Continuing to soft evaluation to generate "
125
+ "suggestions despite hard failures...[/yellow]"
126
+ )
127
+
128
+
129
+ def _print_hard_evaluation_summary(hard_results: dict[str, Any]) -> None:
130
+ if hard_results["all_passed"]:
131
+ console.print("[bold green]Hard Evaluation Passed![/bold green]")
132
+ return
133
+ _print_hard_failure_details(hard_results)
134
+
135
+
136
+ def _print_mi_scorecard(hard_results: dict[str, Any]) -> None:
137
+ mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
138
+ if not mi_scores:
139
+ return
140
+
141
+ avg_mi = sum(mi_scores.values()) / len(mi_scores)
142
+ color = "green" if avg_mi > 50 else "yellow" if avg_mi > 20 else "red"
143
+ console.print(f"[{color}]Average Maintainability Index: {avg_mi:.1f}/100[/{color}]")
144
+
145
+
146
+ def _print_qc_summary(qc_results: dict[str, Any]) -> None:
147
+ console.print()
148
+ console.print("[bold blue]Running Governance QC (Second Fence)...[/bold blue]")
149
+
150
+ if qc_results["all_passed"]:
151
+ console.print(
152
+ "[bold green]Governance QC Passed! (Change is admissible)[/bold green]"
153
+ )
154
+ console.print()
155
+ return
156
+
157
+ console.print("[bold red]Governance QC Failed![/bold red]")
158
+ console.print()
159
+ console.print(
160
+ "[red]The proposed changes violate governance constraints "
161
+ "or lack sufficient evidence.[/red]"
162
+ )
163
+ for failure in qc_results["failures"]:
164
+ console.print(f"[red]- {failure}[/red]")
165
+ console.print()
166
+ console.print(
167
+ "[yellow]Continuing to soft evaluation to generate "
168
+ "suggestions despite QC failures...[/yellow]"
169
+ )
170
+ console.print()
171
+
172
+
173
+ def _print_soft_evaluation_start() -> None:
174
+ console.print(
175
+ "[bold blue]Running Soft Evaluation "
176
+ "(Readability & Understandability)...[/bold blue]"
177
+ )
178
+
179
+
180
+ def _print_soft_summary(soft_results: dict[str, Any]) -> None:
181
+ pkg_summary = soft_results["package_summary"]
182
+ console.print(
183
+ f"[green]Analyzed {pkg_summary['total_files']} files with a total of "
184
+ f"{pkg_summary['total_tokens']} tokens.[/green]"
185
+ )
186
+ console.print(
187
+ f"[magenta]Agent's Understanding of the Package:[/magenta]\n"
188
+ f"{pkg_summary['package_understanding']}"
189
+ )
190
+
191
+ console.print()
192
+ console.print(
193
+ f"[cyan]Overall Understandability Score:[/cyan] "
194
+ f"{soft_results['understandability_score']:.1f}/100"
195
+ )
196
+
197
+ qa_results = soft_results.get("qa_results", {}).get("sampled_entities", [])
198
+ if qa_results:
199
+ console.print()
200
+ console.print("[bold yellow]Blind QA Sampling Results:[/bold yellow]")
201
+ for qa in qa_results:
202
+ color = "green" if qa["score"] >= 80 else "red"
203
+ console.print(f" - [{color}]{qa['entity']}: Score {qa['score']}[/{color}]")
204
+ console.print(f" [dim]Feedback: {qa['feedback']}[/dim]")
205
+
206
+ console.print()
207
+ console.print("[yellow]Evaluation completed. Generating report...[/yellow]")
208
+ console.print()
209
+
210
+
211
+ def _print_final_report(final_report: dict[str, Any]) -> None:
212
+ verdict = str(final_report.get("verdict", "Unknown"))
213
+ verdict_color = "bold green" if "Pass" in verdict else "bold red"
214
+
215
+ console.print(
216
+ f"[{verdict_color}]=== FINAL VERDICT: {verdict} ===[/{verdict_color}]"
217
+ )
218
+ console.print(f"[bold]Summary:[/bold] {final_report.get('summary', '')}")
219
+ console.print()
220
+
221
+ suggestions = final_report.get("suggestions", [])
222
+ if suggestions:
223
+ console.print("[bold cyan]Top 3 Improvement Suggestions:[/bold cyan]")
224
+ for i, sug in enumerate(suggestions, 1):
225
+ console.print(
226
+ f" {i}. [bold]{sug.get('title', 'Suggestion')}[/bold] "
227
+ f"(Target: [yellow]{sug.get('target_file', 'unknown')}[/yellow])"
228
+ )
229
+ console.print(f" [dim]{sug.get('description', '')}[/dim]")
230
+
231
+
232
+ @app.command()
233
+ def refine(
234
+ path: str = typer.Argument(".", help="The path to evaluate and evolve"),
235
+ steps: int = typer.Option(1, help="Number of evolution steps to perform"),
236
+ max_retries: int = typer.Option(3, help="Maximum retries per variant if tests fail")
237
+ ) -> None:
238
+ """
239
+ Refine the codebase through an agentic Edit-Test-Improve loop.
240
+ Generates variants based on suggestions, tests them, and picks the best.
241
+ """
242
+ console.print(
243
+ f"[bold magenta]Starting evolution loop for path:[/bold magenta] {path} "
244
+ f"[dim](steps={steps}, max_retries={max_retries})[/dim]"
245
+ )
246
+
247
+ # 1. First, run a baseline evaluation to get suggestions
248
+ evaluator = Evaluator(path)
249
+ console.print("[cyan]Running baseline evaluation...[/cyan]")
250
+ hard_results = evaluator.hard_evaluator.evaluate()
251
+ soft_results = evaluator.soft_evaluator.evaluate()
252
+ baseline_report = evaluator.soft_evaluator.generate_final_report(
253
+ hard_results, {"all_passed": True, "failures": []}, soft_results
254
+ )
255
+
256
+ suggestions = baseline_report.get("suggestions", [])
257
+ if not suggestions:
258
+ console.print("[yellow]No suggestions found to evolve. Exiting.[/yellow]")
259
+ return
260
+
261
+ console.print(
262
+ f"[green]Found {len(suggestions)} suggestions. "
263
+ f"Starting evolution branches...[/green]"
264
+ )
265
+
266
+ # TODO: Implement the Git branching and Agent modification logic here.
267
+ # The loop will be:
268
+ # for step in range(steps):
269
+ # for suggestion in suggestions:
270
+ # checkout new branch variant-X
271
+ # for retry in range(max_retries):
272
+ # ask LLM to apply suggestion to code
273
+ # run pytest
274
+ # if pytest passes:
275
+ # run harness . to get new score
276
+ # break
277
+ # else:
278
+ # feed error back to LLM for retry
279
+ # compare all variants and checkout the best one
280
+
281
+ console.print(
282
+ "[yellow]Evolution engine skeleton ready. "
283
+ "Actual git mutation logic pending.[/yellow]"
284
+ )
285
+ @app.command()
286
+ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> None:
287
+ """
288
+ Measure the codebase against hard, soft, and governance constraints.
289
+ Outputs a final report with scores and actionable improvement suggestions.
290
+ """
291
+ console.print(
292
+ f"[bold green]Starting harness measurement for path:[/bold green] {path}"
293
+ )
294
+
295
+ evaluator = Evaluator(path)
296
+ console.print("[bold blue]Running Hard Evaluation (ruff, mypy)...[/bold blue]")
297
+ hard_results = evaluator.hard_evaluator.evaluate()
298
+ _print_hard_evaluation_summary(hard_results)
299
+ _print_mi_scorecard(hard_results)
300
+
301
+ qc_results = evaluator.qc_evaluator.evaluate()
302
+ _print_qc_summary(qc_results)
303
+
304
+ _print_soft_evaluation_start()
305
+ soft_results = evaluator.soft_evaluator.evaluate()
306
+ _print_soft_summary(soft_results)
307
+
308
+ final_report = evaluator.soft_evaluator.generate_final_report(
309
+ hard_results, qc_results, soft_results
310
+ )
311
+ if not final_report:
312
+ return
313
+
314
+ _print_final_report(final_report)
315
+ if "Fail" in str(final_report.get("verdict", "Unknown")):
316
+ sys.exit(1)
317
+
318
+
319
+ if __name__ == "__main__":
320
+ app()
@@ -4,12 +4,15 @@ Core module for integrating hard evaluation tools like ruff, mypy, and pytest.
4
4
 
5
5
  import json
6
6
  import subprocess
7
+ import sys
8
+ import tempfile
7
9
  from pathlib import Path
8
10
  from typing import Any
9
11
 
10
12
  from rich.console import Console
11
13
 
12
14
  console = Console()
15
+ PYTEST_TIMEOUT_SECONDS = 60
13
16
 
14
17
  class HardEvaluator:
15
18
  """
@@ -25,7 +28,15 @@ class HardEvaluator:
25
28
  """
26
29
  try:
27
30
  result = subprocess.run(
28
- ["ruff", "check", str(self.target_path), "--output-format", "json"],
31
+ [
32
+ sys.executable,
33
+ "-m",
34
+ "ruff",
35
+ "check",
36
+ str(self.target_path),
37
+ "--output-format",
38
+ "json",
39
+ ],
29
40
  capture_output=True,
30
41
  text=True,
31
42
  check=False
@@ -36,6 +47,7 @@ class HardEvaluator:
36
47
  "status": status,
37
48
  "issues": issues,
38
49
  "return_code": result.returncode,
50
+ "error_message": result.stderr.strip(),
39
51
  }
40
52
  except Exception as e:
41
53
  return {"status": "error", "error_message": str(e)}
@@ -46,7 +58,7 @@ class HardEvaluator:
46
58
  """
47
59
  try:
48
60
  result = subprocess.run(
49
- ["mypy", str(self.target_path)],
61
+ [sys.executable, "-m", "mypy", str(self.target_path)],
50
62
  capture_output=True,
51
63
  text=True,
52
64
  check=False
@@ -54,7 +66,7 @@ class HardEvaluator:
54
66
  status = "success" if result.returncode == 0 else "failed"
55
67
  return {
56
68
  "status": status,
57
- "output": result.stdout,
69
+ "output": result.stdout or result.stderr,
58
70
  "return_code": result.returncode,
59
71
  }
60
72
  except Exception as e:
@@ -101,7 +113,15 @@ class HardEvaluator:
101
113
  """
102
114
  try:
103
115
  result = subprocess.run(
104
- ["radon", "cc", "-j", "-a", str(self.target_path)],
116
+ [
117
+ sys.executable,
118
+ "-m",
119
+ "radon",
120
+ "cc",
121
+ "-j",
122
+ "-a",
123
+ str(self.target_path),
124
+ ],
105
125
  capture_output=True,
106
126
  text=True,
107
127
  check=False
@@ -143,7 +163,7 @@ class HardEvaluator:
143
163
  "error_message": "radon executable not found. Please install it."
144
164
  }
145
165
  except Exception as e:
146
- if "No such file or directory: 'radon'" in str(e):
166
+ if "No module named radon" in str(e) or "radon" in str(e):
147
167
  return {
148
168
  "status": "warning",
149
169
  "issues": [],
@@ -159,7 +179,7 @@ class HardEvaluator:
159
179
  """
160
180
  try:
161
181
  result = subprocess.run(
162
- ["radon", "mi", "-j", str(self.target_path)],
182
+ [sys.executable, "-m", "radon", "mi", "-j", str(self.target_path)],
163
183
  capture_output=True,
164
184
  text=True,
165
185
  check=False
@@ -183,7 +203,7 @@ class HardEvaluator:
183
203
  "error_message": "radon executable not found. Please install it."
184
204
  }
185
205
  except Exception as e:
186
- if "No such file or directory: 'radon'" in str(e):
206
+ if "No module named radon" in str(e) or "radon" in str(e):
187
207
  return {
188
208
  "status": "warning",
189
209
  "mi_scores": {},
@@ -196,22 +216,45 @@ class HardEvaluator:
196
216
  Run Pytest test suite and return coverage results.
197
217
  """
198
218
  try:
199
- # When pytest is run within pytest, it can cause issues or hang.
200
- # Here we just run it as a subprocess to gather results.
201
- result = subprocess.run(
202
- ["pytest", str(self.target_path), "--cov", "--cov-report=json"],
203
- capture_output=True,
204
- text=True,
205
- check=False
206
- )
219
+ with tempfile.TemporaryDirectory() as tmp_dir:
220
+ coverage_report = Path(tmp_dir) / "coverage.json"
221
+ result = subprocess.run(
222
+ [
223
+ sys.executable,
224
+ "-m",
225
+ "pytest",
226
+ str(self.target_path),
227
+ "--cov",
228
+ f"--cov-report=json:{coverage_report}",
229
+ ],
230
+ capture_output=True,
231
+ text=True,
232
+ check=False,
233
+ timeout=PYTEST_TIMEOUT_SECONDS,
234
+ )
235
+ coverage_percentage = None
236
+ if coverage_report.exists():
237
+ coverage_data = json.loads(coverage_report.read_text())
238
+ coverage_percentage = coverage_data.get("totals", {}).get(
239
+ "percent_covered"
240
+ )
207
241
  status = "success" if result.returncode == 0 else "failed"
208
242
  return {
209
243
  "status": status,
210
244
  "output": result.stdout,
211
245
  "return_code": result.returncode,
246
+ "coverage_percentage": coverage_percentage,
247
+ "error_message": result.stderr.strip(),
248
+ }
249
+ except subprocess.TimeoutExpired:
250
+ return {
251
+ "status": "failed",
252
+ "error_message": (
253
+ f"Pytest run timed out after {PYTEST_TIMEOUT_SECONDS} seconds."
254
+ ),
212
255
  }
213
256
  except Exception as e:
214
- return {"status": "error", "error_message": str(e)}
257
+ return {"status": "error", "error_message": str(e)}
215
258
 
216
259
  def evaluate(self) -> dict[str, Any]:
217
260
  """
@@ -226,19 +269,20 @@ class HardEvaluator:
226
269
  pytest_res = self.run_pytest()
227
270
 
228
271
  # Parse pytest coverage to check if it's < 90%
229
- cov_percentage = 0.0
230
- if pytest_res.get("status") == "success" and pytest_res.get("output"):
231
- try:
232
- cov_data = json.loads(pytest_res["output"])
233
- cov_percentage = cov_data.get("totals", {}).get("percent_covered", 0.0)
272
+ cov_percentage = pytest_res.get("coverage_percentage")
273
+ if pytest_res.get("status") == "success":
274
+ if isinstance(cov_percentage, (int, float)):
234
275
  if cov_percentage < 90.0:
235
276
  pytest_res["status"] = "failed"
236
277
  pytest_res["error_message"] = (
237
278
  f"Test coverage is {cov_percentage:.2f}%, "
238
279
  f"which is below the 90% threshold."
239
280
  )
240
- except Exception:
241
- pass
281
+ else:
282
+ pytest_res["status"] = "failed"
283
+ pytest_res["error_message"] = (
284
+ "Coverage report was missing or unreadable."
285
+ )
242
286
 
243
287
  all_passed = (
244
288
  ruff_res.get("status") == "success" and