PyPI - python-harness - Versions diffs - 0.0.6__tar.gz → 0.0.8__tar.gz - Mend

python-harness 0.0.6tar.gz → 0.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{python_harness-0.0.6/python_harness.egg-info → python_harness-0.0.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-harness
-Version: 0.0.6
+Version: 0.0.8
 Summary: An agentic codebase evaluation and evolution tool for Python projects.
 Author-email: Mingli Yuan <mingli.yuan@gmail.com>
 License: MIT

{python_harness-0.0.6 → python_harness-0.0.8}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "python-harness"
-version = "0.0.6"
+version = "0.0.8"
 description = "An agentic codebase evaluation and evolution tool for Python projects."
 requires-python = ">=3.10"
 readme = "README.md"

{python_harness-0.0.6 → python_harness-0.0.8}/python_harness/cli.py RENAMED Viewed

@@ -140,9 +140,22 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
                 console.print(
                     "[red]Radon CC failed but no specific issues were parsed.[/red]"
                 )
-        sys.exit(1)
+        elif hard_results["radon_cc"]["status"] == "warning":
+            err_msg = hard_results['radon_cc'].get('error_message')
+            console.print(f"[yellow]Radon CC warning:[/yellow] {err_msg}")
-    console.print("[bold green]Hard Evaluation Passed![/bold green]")
+        if hard_results.get("pytest", {}).get("status") == "failed":
+            error_msg = hard_results["pytest"].get("error_message", "Tests failed")
+            console.print(f"[red]Pytest/Coverage issues found:[/red] {error_msg}")
+        # DO NOT sys.exit(1) here anymore!
+        # We want to generate the report even if it fails.
+        console.print(
+            "[yellow]Continuing to soft evaluation to generate "
+            "suggestions despite hard failures...[/yellow]"
+        )
+    else:
+        console.print("[bold green]Hard Evaluation Passed![/bold green]")
     # Print Maintainability Index scorecard
     mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
@@ -165,11 +178,15 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
         )
         for failure in qc_results["failures"]:
             console.print(f"[red]- {failure}[/red]")
-        sys.exit(1)
-    console.print(
-        "[bold green]Governance QC Passed! (Change is admissible)[/bold green]"
-    )
+        # DO NOT sys.exit(1) here! We want to generate suggestions for QC failures too.
+        console.print(
+            "[yellow]Continuing to soft evaluation to generate "
+            "suggestions despite QC failures...[/yellow]"
+        )
+    else:
+        console.print(
+            "[bold green]Governance QC Passed! (Change is admissible)[/bold green]"
+        )
     # 3. Soft Evaluation/Readability (Third Fence)
     console.print(
@@ -204,8 +221,9 @@ def measure(path: str = typer.Argument(".", help="The path to evaluate")) -> Non
     console.print("\n[yellow]Evaluation completed. Generating report...[/yellow]\n")
     # Generate Final Report
+    # Pass all results to the reporter so it knows *why* things failed
     final_report = evaluator.soft_evaluator.generate_final_report(
-        hard_results, soft_results
+        hard_results, qc_results, soft_results
     )
     if final_report:

{python_harness-0.0.6 → python_harness-0.0.8}/python_harness/evaluator.py RENAMED Viewed

@@ -30,7 +30,7 @@ class Evaluator:
         # Generate Final Synthesized Report with 3 Suggestions
         final_report = self.soft_evaluator.generate_final_report(
-            hard_results, soft_results
+            hard_results, qc_results, soft_results
         )
         return {

{python_harness-0.0.6 → python_harness-0.0.8}/python_harness/hard_evaluator.py RENAMED Viewed

@@ -136,7 +136,19 @@ class HardEvaluator:
                 "output": result.stdout,
                 "error_message": result.stderr if result.returncode != 0 else ""
             }
+        except FileNotFoundError:
+            return {
+                "status": "warning",
+                "issues": [],
+                "error_message": "radon executable not found. Please install it."
+            }
         except Exception as e:
+            if "No such file or directory: 'radon'" in str(e):
+                return {
+                    "status": "warning",
+                    "issues": [],
+                    "error_message": "radon executable not found. Please install it."
+                }
             return {"status": "error", "error_message": str(e)}
     def run_radon_mi(self) -> dict[str, Any]:
@@ -164,7 +176,19 @@ class HardEvaluator:
                 "mi_scores": mi_scores,
                 "return_code": result.returncode,
             }
+        except FileNotFoundError:
+            return {
+                "status": "warning",
+                "mi_scores": {},
+                "error_message": "radon executable not found. Please install it."
+            }
         except Exception as e:
+            if "No such file or directory: 'radon'" in str(e):
+                return {
+                    "status": "warning",
+                    "mi_scores": {},
+                    "error_message": "radon executable not found. Please install it."
+                }
             return {"status": "error", "error_message": str(e)}
     def run_pytest(self) -> dict[str, Any]:
@@ -199,13 +223,29 @@ class HardEvaluator:
         ty_res = self.run_ty()
         radon_cc_res = self.run_radon_cc()
         radon_mi_res = self.run_radon_mi()
-        # pytest_res = self.run_pytest() # Better handled as a separate stage
+        pytest_res = self.run_pytest()
+        # Parse pytest coverage to check if it's < 90%
+        cov_percentage = 0.0
+        if pytest_res.get("status") == "success" and pytest_res.get("output"):
+            try:
+                cov_data = json.loads(pytest_res["output"])
+                cov_percentage = cov_data.get("totals", {}).get("percent_covered", 0.0)
+                if cov_percentage < 90.0:
+                    pytest_res["status"] = "failed"
+                    pytest_res["error_message"] = (
+                        f"Test coverage is {cov_percentage:.2f}%, "
+                        f"which is below the 90% threshold."
+                    )
+            except Exception:
+                pass
         all_passed = (
             ruff_res.get("status") == "success" and
             mypy_res.get("status") == "success" and
             ty_res.get("status") in ("success", "warning") and
-            radon_cc_res.get("status") == "success"
+            radon_cc_res.get("status") in ("success", "warning") and
+            pytest_res.get("status") == "success"
         )
         return {
@@ -214,5 +254,6 @@ class HardEvaluator:
             "mypy": mypy_res,
             "ty": ty_res,
             "radon_cc": radon_cc_res,
-            "radon_mi": radon_mi_res
+            "radon_mi": radon_mi_res,
+            "pytest": pytest_res
         }

{python_harness-0.0.6 → python_harness-0.0.8}/python_harness/soft_evaluator.py RENAMED Viewed

@@ -373,7 +373,10 @@ class SoftEvaluator:
         }
     def generate_final_report(
-        self, hard_results: dict[str, Any], soft_results: dict[str, Any]
+        self,
+        hard_results: dict[str, Any],
+        qc_results: dict[str, Any],
+        soft_results: dict[str, Any]
     ) -> dict[str, Any]:
         """
         Synthesize all evaluation results into a final verdict and exactly
@@ -408,25 +411,48 @@ class SoftEvaluator:
             mi_scores = hard_results.get("radon_mi", {}).get("mi_scores", {})
             avg_mi = sum(mi_scores.values()) / len(mi_scores) if mi_scores else 100.0
+            # Extract failures
+            hard_failed = not hard_results.get("all_passed", True)
+            hard_errors = []
+            if hard_failed:
+                if hard_results.get("ruff", {}).get("status") != "success":
+                    hard_errors.append("Linter (Ruff) failed.")
+                if hard_results.get("mypy", {}).get("status") != "success":
+                    hard_errors.append("Type checker (Mypy) failed.")
+                if hard_results.get("pytest", {}).get("status") != "success":
+                    pytest_err = hard_results.get("pytest", {}).get(
+                        "error_message", "Tests or Coverage failed."
+                    )
+                    hard_errors.append(pytest_err)
+            qc_errors = qc_results.get("failures", [])
             qa_score = soft_results.get("understandability_score", 100.0)
             qa_entities = soft_results.get("qa_results", {}).get("sampled_entities", [])
             sys_prompt = (
                 "You are an elite Python Codebase Evaluator. You have just analyzed "
                 "a repository. Your task is to provide a final judgment and EXACTLY "
-                "3 concrete, actionable improvement suggestions. These suggestions "
-                "MUST NOT change the external functionality (they are refactoring/"
-                "quality improvements).\n\n"
+                "3 concrete, actionable improvement suggestions.\n"
+                "If the codebase failed its Hard or QC evaluations (e.g. tests "
+                "failed, coverage is low, or governance violated), your suggestions "
+                "MUST prioritize fixing those issues.\n"
+                "Otherwise, focus on refactoring/quality improvements without "
+                "changing external functionality.\n\n"
                 "Output MUST be in valid JSON matching this schema:\n"
                 "{\n"
                 '  "verdict": "Pass" or "Fail",\n'
-                '  "summary": "One paragraph summary of codebase health",\n'
+                '  "summary": "One paragraph summary of codebase health and '
+                'any critical failures",\n'
                 '  "suggestions": [\n'
                 '    {"title": "str", "description": "str", "target_file": "str"}\n'
                 "  ]\n"
                 "}\n"
-                "Rule for Verdict: Pass if Average Maintainability > 50 and "
-                "QA Score > 75 and no Critical CC issues (>15). Otherwise Fail."
+                "Rule for Verdict: If there are Hard Failures or QC Failures, "
+                "verdict MUST be Fail. Otherwise, Pass if Average Maintainability "
+                "> 50 and QA Score > 75 and no Critical CC issues (>15). "
+                "Otherwise Fail."
             )
             user_content = (
@@ -435,6 +461,11 @@ class SoftEvaluator:
                 f"- Number of functions with Cyclomatic Complexity > 15: "
                 f"{len(cc_issues)}\n"
                 f"- Agent QA Readability Score: {qa_score:.1f}/100\n\n"
+                f"Failures (Prioritize these!):\n"
+                f"- Hard Evaluation Errors: "
+                f"{hard_errors if hard_errors else 'None'}\n"
+                f"- QC/Governance Errors: "
+                f"{qc_errors if qc_errors else 'None'}\n\n"
                 f"QA Feedback Snippets:\n"
                 + "\n".join(
                     [f"  * {q['entity']}: {q['feedback']}" for q in qa_entities]

{python_harness-0.0.6 → python_harness-0.0.8/python_harness.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: python-harness
-Version: 0.0.6
+Version: 0.0.8
 Summary: An agentic codebase evaluation and evolution tool for Python projects.
 Author-email: Mingli Yuan <mingli.yuan@gmail.com>
 License: MIT

{python_harness-0.0.6 → python_harness-0.0.8}/tests/test_hard_evaluator.py RENAMED Viewed

@@ -90,6 +90,6 @@ def test_radon_cc_syntax_error(monkeypatch: Any, tmp_path: Path) -> None:
     result = evaluator.run_radon_cc()
     assert result["status"] == "failed"
-    assert len(result["issues"]) == 0
+    assert len(result.get("issues", [])) == 0
     # Radon should output to stderr because of the syntax error
     assert "SyntaxError" in result["error_message"] or result["return_code"] != 0