python-harness 0.0.6__tar.gz → 0.0.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_harness-0.0.6/python_harness.egg-info → python_harness-0.0.8}/PKG-INFO +1 -1
- {python_harness-0.0.6 → python_harness-0.0.8}/pyproject.toml +1 -1
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/cli.py +26 -8
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/evaluator.py +1 -1
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/hard_evaluator.py +44 -3
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/soft_evaluator.py +38 -7
- {python_harness-0.0.6 → python_harness-0.0.8/python_harness.egg-info}/PKG-INFO +1 -1
- {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_hard_evaluator.py +1 -1
- {python_harness-0.0.6 → python_harness-0.0.8}/LICENSE +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/README.md +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/__init__.py +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness/qc_evaluator.py +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/SOURCES.txt +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/dependency_links.txt +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/entry_points.txt +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/requires.txt +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/python_harness.egg-info/top_level.txt +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/setup.cfg +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_cli.py +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_evaluator.py +0 -0
- {python_harness-0.0.6 → python_harness-0.0.8}/tests/test_soft_evaluator.py +0 -0
|
@@ -140,9 +140,22 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
|
|
|
140
140
|
console.print(
|
|
141
141
|
"[red]Radon CC failed but no specific issues were parsed.[/red]"
|
|
142
142
|
)
|
|
143
|
-
|
|
143
|
+
elif hard_results["radon_cc"]["status"] == "warning":
|
|
144
|
+
err_msg = hard_results['radon_cc'].get('error_message')
|
|
145
|
+
console.print(f"[yellow]Radon CC warning:[/yellow] {err_msg}")
|
|
144
146
|
|
|
145
|
-
|
|
147
|
+
if hard_results.get("pytest", {}).get("status") == "failed":
|
|
148
|
+
error_msg = hard_results["pytest"].get("error_message", "Tests failed")
|
|
149
|
+
console.print(f"[red]Pytest/Coverage issues found:[/red] {error_msg}")
|
|
150
|
+
|
|
151
|
+
# DO NOT sys.exit(1) here anymore!
|
|
152
|
+
# We want to generate the report even if it fails.
|
|
153
|
+
console.print(
|
|
154
|
+
"[yellow]Continuing to soft evaluation to generate "
|
|
155
|
+
"suggestions despite hard failures...[/yellow]"
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
console.print("[bold green]Hard Evaluation Passed![/bold green]")
|
|
146
159
|
|
|
147
160
|
# Print Maintainability Index scorecard
|
|
148
161
|
mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
|
|
@@ -165,11 +178,15 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
|
|
|
165
178
|
)
|
|
166
179
|
for failure in qc_results["failures"]:
|
|
167
180
|
console.print(f"[red]- {failure}[/red]")
|
|
168
|
-
sys.exit(1)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
181
|
+
# DO NOT sys.exit(1) here! We want to generate suggestions for QC failures too.
|
|
182
|
+
console.print(
|
|
183
|
+
"[yellow]Continuing to soft evaluation to generate "
|
|
184
|
+
"suggestions despite QC failures...[/yellow]"
|
|
185
|
+
)
|
|
186
|
+
else:
|
|
187
|
+
console.print(
|
|
188
|
+
"[bold green]Governance QC Passed! (Change is admissible)[/bold green]"
|
|
189
|
+
)
|
|
173
190
|
|
|
174
191
|
# 3. Soft Evaluation/Readability (Third Fence)
|
|
175
192
|
console.print(
|
|
@@ -204,8 +221,9 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
|
|
|
204
221
|
console.print("\n[yellow]Evaluation completed. Generating report...[/yellow]\n")
|
|
205
222
|
|
|
206
223
|
# Generate Final Report
|
|
224
|
+
# Pass all results to the reporter so it knows *why* things failed
|
|
207
225
|
final_report = evaluator.soft_evaluator.generate_final_report(
|
|
208
|
-
hard_results, soft_results
|
|
226
|
+
hard_results, qc_results, soft_results
|
|
209
227
|
)
|
|
210
228
|
|
|
211
229
|
if final_report:
|
|
@@ -136,7 +136,19 @@ class HardEvaluator:
|
|
|
136
136
|
"output": result.stdout,
|
|
137
137
|
"error_message": result.stderr if result.returncode != 0 else ""
|
|
138
138
|
}
|
|
139
|
+
except FileNotFoundError:
|
|
140
|
+
return {
|
|
141
|
+
"status": "warning",
|
|
142
|
+
"issues": [],
|
|
143
|
+
"error_message": "radon executable not found. Please install it."
|
|
144
|
+
}
|
|
139
145
|
except Exception as e:
|
|
146
|
+
if "No such file or directory: 'radon'" in str(e):
|
|
147
|
+
return {
|
|
148
|
+
"status": "warning",
|
|
149
|
+
"issues": [],
|
|
150
|
+
"error_message": "radon executable not found. Please install it."
|
|
151
|
+
}
|
|
140
152
|
return {"status": "error", "error_message": str(e)}
|
|
141
153
|
|
|
142
154
|
def run_radon_mi(self) -> dict[str, Any]:
|
|
@@ -164,7 +176,19 @@ class HardEvaluator:
|
|
|
164
176
|
"mi_scores": mi_scores,
|
|
165
177
|
"return_code": result.returncode,
|
|
166
178
|
}
|
|
179
|
+
except FileNotFoundError:
|
|
180
|
+
return {
|
|
181
|
+
"status": "warning",
|
|
182
|
+
"mi_scores": {},
|
|
183
|
+
"error_message": "radon executable not found. Please install it."
|
|
184
|
+
}
|
|
167
185
|
except Exception as e:
|
|
186
|
+
if "No such file or directory: 'radon'" in str(e):
|
|
187
|
+
return {
|
|
188
|
+
"status": "warning",
|
|
189
|
+
"mi_scores": {},
|
|
190
|
+
"error_message": "radon executable not found. Please install it."
|
|
191
|
+
}
|
|
168
192
|
return {"status": "error", "error_message": str(e)}
|
|
169
193
|
|
|
170
194
|
def run_pytest(self) -> dict[str, Any]:
|
|
@@ -199,13 +223,29 @@ class HardEvaluator:
|
|
|
199
223
|
ty_res = self.run_ty()
|
|
200
224
|
radon_cc_res = self.run_radon_cc()
|
|
201
225
|
radon_mi_res = self.run_radon_mi()
|
|
202
|
-
|
|
226
|
+
pytest_res = self.run_pytest()
|
|
203
227
|
|
|
228
|
+
# Parse pytest coverage to check if it's < 90%
|
|
229
|
+
cov_percentage = 0.0
|
|
230
|
+
if pytest_res.get("status") == "success" and pytest_res.get("output"):
|
|
231
|
+
try:
|
|
232
|
+
cov_data = json.loads(pytest_res["output"])
|
|
233
|
+
cov_percentage = cov_data.get("totals", {}).get("percent_covered", 0.0)
|
|
234
|
+
if cov_percentage < 90.0:
|
|
235
|
+
pytest_res["status"] = "failed"
|
|
236
|
+
pytest_res["error_message"] = (
|
|
237
|
+
f"Test coverage is {cov_percentage:.2f}%, "
|
|
238
|
+
f"which is below the 90% threshold."
|
|
239
|
+
)
|
|
240
|
+
except Exception:
|
|
241
|
+
pass
|
|
242
|
+
|
|
204
243
|
all_passed = (
|
|
205
244
|
ruff_res.get("status") == "success" and
|
|
206
245
|
mypy_res.get("status") == "success" and
|
|
207
246
|
ty_res.get("status") in ("success", "warning") and
|
|
208
|
-
radon_cc_res.get("status")
|
|
247
|
+
radon_cc_res.get("status") in ("success", "warning") and
|
|
248
|
+
pytest_res.get("status") == "success"
|
|
209
249
|
)
|
|
210
250
|
|
|
211
251
|
return {
|
|
@@ -214,5 +254,6 @@ class HardEvaluator:
|
|
|
214
254
|
"mypy": mypy_res,
|
|
215
255
|
"ty": ty_res,
|
|
216
256
|
"radon_cc": radon_cc_res,
|
|
217
|
-
"radon_mi": radon_mi_res
|
|
257
|
+
"radon_mi": radon_mi_res,
|
|
258
|
+
"pytest": pytest_res
|
|
218
259
|
}
|
|
@@ -373,7 +373,10 @@ class SoftEvaluator:
|
|
|
373
373
|
}
|
|
374
374
|
|
|
375
375
|
def generate_final_report(
|
|
376
|
-
self,
|
|
376
|
+
self,
|
|
377
|
+
hard_results: dict[str, Any],
|
|
378
|
+
qc_results: dict[str, Any],
|
|
379
|
+
soft_results: dict[str, Any]
|
|
377
380
|
) -> dict[str, Any]:
|
|
378
381
|
"""
|
|
379
382
|
Synthesize all evaluation results into a final verdict and exactly
|
|
@@ -408,25 +411,48 @@ class SoftEvaluator:
|
|
|
408
411
|
mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
|
|
409
412
|
avg_mi = sum(mi_scores.values()) / len(mi_scores) if mi_scores else 100.0
|
|
410
413
|
|
|
414
|
+
# Extract failures
|
|
415
|
+
hard_failed = not hard_results.get("all_passed", True)
|
|
416
|
+
|
|
417
|
+
hard_errors = []
|
|
418
|
+
if hard_failed:
|
|
419
|
+
if hard_results.get("ruff", {}).get("status") != "success":
|
|
420
|
+
hard_errors.append("Linter (Ruff) failed.")
|
|
421
|
+
if hard_results.get("mypy", {}).get("status") != "success":
|
|
422
|
+
hard_errors.append("Type checker (Mypy) failed.")
|
|
423
|
+
if hard_results.get("pytest", {}).get("status") != "success":
|
|
424
|
+
pytest_err = hard_results.get("pytest", {}).get(
|
|
425
|
+
"error_message", "Tests or Coverage failed."
|
|
426
|
+
)
|
|
427
|
+
hard_errors.append(pytest_err)
|
|
428
|
+
|
|
429
|
+
qc_errors = qc_results.get("failures", [])
|
|
430
|
+
|
|
411
431
|
qa_score = soft_results.get("understandability_score", 100.0)
|
|
412
432
|
qa_entities = soft_results.get("qa_results", {}).get("sampled_entities", [])
|
|
413
433
|
|
|
414
434
|
sys_prompt = (
|
|
415
435
|
"You are an elite Python Codebase Evaluator. You have just analyzed "
|
|
416
436
|
"a repository. Your task is to provide a final judgment and EXACTLY "
|
|
417
|
-
"3 concrete, actionable improvement suggestions
|
|
418
|
-
"
|
|
419
|
-
"
|
|
437
|
+
"3 concrete, actionable improvement suggestions.\n"
|
|
438
|
+
"If the codebase failed its Hard or QC evaluations (e.g. tests "
|
|
439
|
+
"failed, coverage is low, or governance violated), your suggestions "
|
|
440
|
+
"MUST prioritize fixing those issues.\n"
|
|
441
|
+
"Otherwise, focus on refactoring/quality improvements without "
|
|
442
|
+
"changing external functionality.\n\n"
|
|
420
443
|
"Output MUST be in valid JSON matching this schema:\n"
|
|
421
444
|
"{\n"
|
|
422
445
|
' "verdict": "Pass" or "Fail",\n'
|
|
423
|
-
' "summary": "One paragraph summary of codebase health
|
|
446
|
+
' "summary": "One paragraph summary of codebase health and '
|
|
447
|
+
'any critical failures",\n'
|
|
424
448
|
' "suggestions": [\n'
|
|
425
449
|
' {"title": "str", "description": "str", "target_file": "str"}\n'
|
|
426
450
|
" ]\n"
|
|
427
451
|
"}\n"
|
|
428
|
-
"Rule for Verdict:
|
|
429
|
-
"
|
|
452
|
+
"Rule for Verdict: If there are Hard Failures or QC Failures, "
|
|
453
|
+
"verdict MUST be Fail. Otherwise, Pass if Average Maintainability "
|
|
454
|
+
"> 50 and QA Score > 75 and no Critical CC issues (>15). "
|
|
455
|
+
"Otherwise Fail."
|
|
430
456
|
)
|
|
431
457
|
|
|
432
458
|
user_content = (
|
|
@@ -435,6 +461,11 @@ class SoftEvaluator:
|
|
|
435
461
|
f"- Number of functions with Cyclomatic Complexity > 15: "
|
|
436
462
|
f"{len(cc_issues)}\n"
|
|
437
463
|
f"- Agent QA Readability Score: {qa_score:.1f}/100\n\n"
|
|
464
|
+
f"Failures (Prioritize these!):\n"
|
|
465
|
+
f"- Hard Evaluation Errors: "
|
|
466
|
+
f"{hard_errors if hard_errors else 'None'}\n"
|
|
467
|
+
f"- QC/Governance Errors: "
|
|
468
|
+
f"{qc_errors if qc_errors else 'None'}\n\n"
|
|
438
469
|
f"QA Feedback Snippets:\n"
|
|
439
470
|
+ "\n".join(
|
|
440
471
|
[f" * {q['entity']}: {q['feedback']}" for q in qa_entities]
|
|
@@ -90,6 +90,6 @@ def test_radon_cc_syntax_error(monkeypatch: Any, tmp_path: Path) -> None:
|
|
|
90
90
|
result = evaluator.run_radon_cc()
|
|
91
91
|
|
|
92
92
|
assert result["status"] == "failed"
|
|
93
|
-
assert len(result
|
|
93
|
+
assert len(result.get("issues", [])) == 0
|
|
94
94
|
# Radon should output to stderr because of the syntax error
|
|
95
95
|
assert "SyntaxError" in result["error_message"] or result["return_code"] != 0
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|