python-harness 0.0.6__tar.gz → 0.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (21) hide show
  1. {python_harness-0.0.6/python_harness.egg-info → python_harness-0.0.8}/PKG-INFO +1 -1
  2. {python_harness-0.0.6 → python_harness-0.0.8}/pyproject.toml +1 -1
  3. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/cli.py +26 -8
  4. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/evaluator.py +1 -1
  5. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/hard_evaluator.py +44 -3
  6. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/soft_evaluator.py +38 -7
  7. {python_harness-0.0.6 → python_harness-0.0.8/python_harness.egg-info}/PKG-INFO +1 -1
  8. {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_hard_evaluator.py +1 -1
  9. {python_harness-0.0.6 → python_harness-0.0.8}/LICENSE +0 -0
  10. {python_harness-0.0.6 → python_harness-0.0.8}/README.md +0 -0
  11. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/__init__.py +0 -0
  12. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/qc_evaluator.py +0 -0
  13. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/SOURCES.txt +0 -0
  14. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/dependency_links.txt +0 -0
  15. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/entry_points.txt +0 -0
  16. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/requires.txt +0 -0
  17. {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/top_level.txt +0 -0
  18. {python_harness-0.0.6 → python_harness-0.0.8}/setup.cfg +0 -0
  19. {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_cli.py +0 -0
  20. {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_evaluator.py +0 -0
  21. {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_soft_evaluator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-harness
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: An agentic codebase evaluation and evolution tool for Python projects.
5
5
  Author-email: Mingli Yuan <mingli.yuan@gmail.com>
6
6
  License: MIT
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "python-harness"
3
- version = "0.0.6"
3
+ version = "0.0.8"
4
4
  description = "An agentic codebase evaluation and evolution tool for Python projects."
5
5
  requires-python = ">=3.10"
6
6
  readme = "README.md"
@@ -140,9 +140,22 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
140
140
  console.print(
141
141
  "[red]Radon CC failed but no specific issues were parsed.[/red]"
142
142
  )
143
- sys.exit(1)
143
+ elif hard_results["radon_cc"]["status"] == "warning":
144
+ err_msg = hard_results['radon_cc'].get('error_message')
145
+ console.print(f"[yellow]Radon CC warning:[/yellow] {err_msg}")
144
146
 
145
- console.print("[bold green]Hard Evaluation Passed![/bold green]")
147
+ if hard_results.get("pytest", {}).get("status") == "failed":
148
+ error_msg = hard_results["pytest"].get("error_message", "Tests failed")
149
+ console.print(f"[red]Pytest/Coverage issues found:[/red] {error_msg}")
150
+
151
+ # DO NOT sys.exit(1) here anymore!
152
+ # We want to generate the report even if it fails.
153
+ console.print(
154
+ "[yellow]Continuing to soft evaluation to generate "
155
+ "suggestions despite hard failures...[/yellow]"
156
+ )
157
+ else:
158
+ console.print("[bold green]Hard Evaluation Passed![/bold green]")
146
159
 
147
160
  # Print Maintainability Index scorecard
148
161
  mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
@@ -165,11 +178,15 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
165
178
  )
166
179
  for failure in qc_results["failures"]:
167
180
  console.print(f"[red]- {failure}[/red]")
168
- sys.exit(1)
169
-
170
- console.print(
171
- "[bold green]Governance QC Passed! (Change is admissible)[/bold green]"
172
- )
181
+ # DO NOT sys.exit(1) here! We want to generate suggestions for QC failures too.
182
+ console.print(
183
+ "[yellow]Continuing to soft evaluation to generate "
184
+ "suggestions despite QC failures...[/yellow]"
185
+ )
186
+ else:
187
+ console.print(
188
+ "[bold green]Governance QC Passed! (Change is admissible)[/bold green]"
189
+ )
173
190
 
174
191
  # 3. Soft Evaluation/Readability (Third Fence)
175
192
  console.print(
@@ -204,8 +221,9 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
204
221
  console.print("\n[yellow]Evaluation completed. Generating report...[/yellow]\n")
205
222
 
206
223
  # Generate Final Report
224
+ # Pass all results to the reporter so it knows *why* things failed
207
225
  final_report = evaluator.soft_evaluator.generate_final_report(
208
- hard_results, soft_results
226
+ hard_results, qc_results, soft_results
209
227
  )
210
228
 
211
229
  if final_report:
@@ -30,7 +30,7 @@ class Evaluator:
30
30
 
31
31
  # Generate Final Synthesized Report with 3 Suggestions
32
32
  final_report = self.soft_evaluator.generate_final_report(
33
- hard_results, soft_results
33
+ hard_results, qc_results, soft_results
34
34
  )
35
35
 
36
36
  return {
@@ -136,7 +136,19 @@ class HardEvaluator:
136
136
  "output": result.stdout,
137
137
  "error_message": result.stderr if result.returncode != 0 else ""
138
138
  }
139
+ except FileNotFoundError:
140
+ return {
141
+ "status": "warning",
142
+ "issues": [],
143
+ "error_message": "radon executable not found. Please install it."
144
+ }
139
145
  except Exception as e:
146
+ if "No such file or directory: 'radon'" in str(e):
147
+ return {
148
+ "status": "warning",
149
+ "issues": [],
150
+ "error_message": "radon executable not found. Please install it."
151
+ }
140
152
  return {"status": "error", "error_message": str(e)}
141
153
 
142
154
  def run_radon_mi(self) -> dict[str, Any]:
@@ -164,7 +176,19 @@ class HardEvaluator:
164
176
  "mi_scores": mi_scores,
165
177
  "return_code": result.returncode,
166
178
  }
179
+ except FileNotFoundError:
180
+ return {
181
+ "status": "warning",
182
+ "mi_scores": {},
183
+ "error_message": "radon executable not found. Please install it."
184
+ }
167
185
  except Exception as e:
186
+ if "No such file or directory: 'radon'" in str(e):
187
+ return {
188
+ "status": "warning",
189
+ "mi_scores": {},
190
+ "error_message": "radon executable not found. Please install it."
191
+ }
168
192
  return {"status": "error", "error_message": str(e)}
169
193
 
170
194
  def run_pytest(self) -> dict[str, Any]:
@@ -199,13 +223,29 @@ class HardEvaluator:
199
223
  ty_res = self.run_ty()
200
224
  radon_cc_res = self.run_radon_cc()
201
225
  radon_mi_res = self.run_radon_mi()
202
- # pytest_res = self.run_pytest() # Better handled as a separate stage
226
+ pytest_res = self.run_pytest()
203
227
 
228
+ # Parse pytest coverage to check if it's < 90%
229
+ cov_percentage = 0.0
230
+ if pytest_res.get("status") == "success" and pytest_res.get("output"):
231
+ try:
232
+ cov_data = json.loads(pytest_res["output"])
233
+ cov_percentage = cov_data.get("totals", {}).get("percent_covered", 0.0)
234
+ if cov_percentage < 90.0:
235
+ pytest_res["status"] = "failed"
236
+ pytest_res["error_message"] = (
237
+ f"Test coverage is {cov_percentage:.2f}%, "
238
+ f"which is below the 90% threshold."
239
+ )
240
+ except Exception:
241
+ pass
242
+
204
243
  all_passed = (
205
244
  ruff_res.get("status") == "success" and
206
245
  mypy_res.get("status") == "success" and
207
246
  ty_res.get("status") in ("success", "warning") and
208
- radon_cc_res.get("status") == "success"
247
+ radon_cc_res.get("status") in ("success", "warning") and
248
+ pytest_res.get("status") == "success"
209
249
  )
210
250
 
211
251
  return {
@@ -214,5 +254,6 @@ class HardEvaluator:
214
254
  "mypy": mypy_res,
215
255
  "ty": ty_res,
216
256
  "radon_cc": radon_cc_res,
217
- "radon_mi": radon_mi_res
257
+ "radon_mi": radon_mi_res,
258
+ "pytest": pytest_res
218
259
  }
@@ -373,7 +373,10 @@ class SoftEvaluator:
373
373
  }
374
374
 
375
375
  def generate_final_report(
376
- self, hard_results: dict[str, Any], soft_results: dict[str, Any]
376
+ self,
377
+ hard_results: dict[str, Any],
378
+ qc_results: dict[str, Any],
379
+ soft_results: dict[str, Any]
377
380
  ) -> dict[str, Any]:
378
381
  """
379
382
  Synthesize all evaluation results into a final verdict and exactly
@@ -408,25 +411,48 @@ class SoftEvaluator:
408
411
  mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
409
412
  avg_mi = sum(mi_scores.values()) / len(mi_scores) if mi_scores else 100.0
410
413
 
414
+ # Extract failures
415
+ hard_failed = not hard_results.get("all_passed", True)
416
+
417
+ hard_errors = []
418
+ if hard_failed:
419
+ if hard_results.get("ruff", {}).get("status") != "success":
420
+ hard_errors.append("Linter (Ruff) failed.")
421
+ if hard_results.get("mypy", {}).get("status") != "success":
422
+ hard_errors.append("Type checker (Mypy) failed.")
423
+ if hard_results.get("pytest", {}).get("status") != "success":
424
+ pytest_err = hard_results.get("pytest", {}).get(
425
+ "error_message", "Tests or Coverage failed."
426
+ )
427
+ hard_errors.append(pytest_err)
428
+
429
+ qc_errors = qc_results.get("failures", [])
430
+
411
431
  qa_score = soft_results.get("understandability_score", 100.0)
412
432
  qa_entities = soft_results.get("qa_results", {}).get("sampled_entities", [])
413
433
 
414
434
  sys_prompt = (
415
435
  "You are an elite Python Codebase Evaluator. You have just analyzed "
416
436
  "a repository. Your task is to provide a final judgment and EXACTLY "
417
- "3 concrete, actionable improvement suggestions. These suggestions "
418
- "MUST NOT change the external functionality (they are refactoring/"
419
- "quality improvements).\n\n"
437
+ "3 concrete, actionable improvement suggestions.\n"
438
+ "If the codebase failed its Hard or QC evaluations (e.g. tests "
439
+ "failed, coverage is low, or governance violated), your suggestions "
440
+ "MUST prioritize fixing those issues.\n"
441
+ "Otherwise, focus on refactoring/quality improvements without "
442
+ "changing external functionality.\n\n"
420
443
  "Output MUST be in valid JSON matching this schema:\n"
421
444
  "{\n"
422
445
  ' "verdict": "Pass" or "Fail",\n'
423
- ' "summary": "One paragraph summary of codebase health",\n'
446
+ ' "summary": "One paragraph summary of codebase health and '
447
+ 'any critical failures",\n'
424
448
  ' "suggestions": [\n'
425
449
  ' {"title": "str", "description": "str", "target_file": "str"}\n'
426
450
  " ]\n"
427
451
  "}\n"
428
- "Rule for Verdict: Pass if Average Maintainability > 50 and "
429
- "QA Score > 75 and no Critical CC issues (>15). Otherwise Fail."
452
+ "Rule for Verdict: If there are Hard Failures or QC Failures, "
453
+ "verdict MUST be Fail. Otherwise, Pass if Average Maintainability "
454
+ "> 50 and QA Score > 75 and no Critical CC issues (>15). "
455
+ "Otherwise Fail."
430
456
  )
431
457
 
432
458
  user_content = (
@@ -435,6 +461,11 @@ class SoftEvaluator:
435
461
  f"- Number of functions with Cyclomatic Complexity > 15: "
436
462
  f"{len(cc_issues)}\n"
437
463
  f"- Agent QA Readability Score: {qa_score:.1f}/100\n\n"
464
+ f"Failures (Prioritize these!):\n"
465
+ f"- Hard Evaluation Errors: "
466
+ f"{hard_errors if hard_errors else 'None'}\n"
467
+ f"- QC/Governance Errors: "
468
+ f"{qc_errors if qc_errors else 'None'}\n\n"
438
469
  f"QA Feedback Snippets:\n"
439
470
  + "\n".join(
440
471
  [f" * {q['entity']}: {q['feedback']}" for q in qa_entities]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-harness
3
- Version: 0.0.6
3
+ Version: 0.0.8
4
4
  Summary: An agentic codebase evaluation and evolution tool for Python projects.
5
5
  Author-email: Mingli Yuan <mingli.yuan@gmail.com>
6
6
  License: MIT
@@ -90,6 +90,6 @@ def test_radon_cc_syntax_error(monkeypatch: Any, tmp_path: Path) -> None:
90
90
  result = evaluator.run_radon_cc()
91
91
 
92
92
  assert result["status"] == "failed"
93
- assert len(result["issues"]) == 0
93
+ assert len(result.get("issues", [])) == 0
94
94
  # Radon should output to stderr because of the syntax error
95
95
  assert "SyntaxError" in result["error_message"] or result["return_code"] != 0
File without changes
File without changes
File without changes