python-harness 0.0.10__tar.gz → 0.0.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {python_harness-0.0.10/python_harness.egg-info → python_harness-0.0.11}/PKG-INFO +6 -6
  2. {python_harness-0.0.10 → python_harness-0.0.11}/pyproject.toml +6 -6
  3. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/__init__.py +1 -1
  4. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/cli.py +10 -2
  5. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/hard_evaluator.py +3 -1
  6. {python_harness-0.0.10 → python_harness-0.0.11/python_harness.egg-info}/PKG-INFO +6 -6
  7. {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_cli.py +74 -0
  8. {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_hard_evaluator.py +64 -0
  9. {python_harness-0.0.10 → python_harness-0.0.11}/LICENSE +0 -0
  10. {python_harness-0.0.10 → python_harness-0.0.11}/README.md +0 -0
  11. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/evaluator.py +0 -0
  12. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/qc_evaluator.py +0 -0
  13. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/soft_evaluator.py +0 -0
  14. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/SOURCES.txt +0 -0
  15. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/dependency_links.txt +0 -0
  16. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/entry_points.txt +0 -0
  17. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/requires.txt +3 -3
  18. {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/top_level.txt +0 -0
  19. {python_harness-0.0.10 → python_harness-0.0.11}/setup.cfg +0 -0
  20. {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_evaluator.py +0 -0
  21. {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_qc_evaluator.py +0 -0
  22. {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_soft_evaluator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-harness
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Summary: An agentic codebase evaluation and evolution tool for Python projects.
5
5
  Author-email: Mingli Yuan <mingli.yuan@gmail.com>
6
6
  License: MIT
@@ -15,13 +15,13 @@ Requires-Dist: anthropic>=0.18.0
15
15
  Requires-Dist: tenacity>=8.2.0
16
16
  Requires-Dist: tiktoken>=0.6.0
17
17
  Requires-Dist: python-dotenv>=1.0.0
18
+ Requires-Dist: pytest>=8.0.0
19
+ Requires-Dist: pytest-cov>=4.1.0
20
+ Requires-Dist: ruff>=0.3.0
21
+ Requires-Dist: mypy>=1.9.0
22
+ Requires-Dist: radon>=6.0.1
18
23
  Provides-Extra: dev
19
- Requires-Dist: pytest>=8.0.0; extra == "dev"
20
- Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
21
- Requires-Dist: ruff>=0.3.0; extra == "dev"
22
- Requires-Dist: mypy>=1.9.0; extra == "dev"
23
24
  Requires-Dist: ty>=0.0.1; extra == "dev"
24
- Requires-Dist: radon>=6.0.1; extra == "dev"
25
25
  Dynamic: license-file
26
26
 
27
27
  # Python Harness
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "python-harness"
3
- version = "0.0.10"
3
+ version = "0.0.11"
4
4
  description = "An agentic codebase evaluation and evolution tool for Python projects."
5
5
  requires-python = ">=3.10"
6
6
  readme = "README.md"
@@ -17,18 +17,18 @@ dependencies = [
17
17
  "tenacity>=8.2.0",
18
18
  "tiktoken>=0.6.0",
19
19
  "python-dotenv>=1.0.0",
20
- ]
21
-
22
- [project.optional-dependencies]
23
- dev = [
24
20
  "pytest>=8.0.0",
25
21
  "pytest-cov>=4.1.0",
26
22
  "ruff>=0.3.0",
27
23
  "mypy>=1.9.0",
28
- "ty>=0.0.1", # Assuming ty is available or will be replaced with actual LSP integration
29
24
  "radon>=6.0.1",
30
25
  ]
31
26
 
27
+ [project.optional-dependencies]
28
+ dev = [
29
+ "ty>=0.0.1", # Assuming ty is available or will be replaced with actual LSP integration
30
+ ]
31
+
32
32
  [build-system]
33
33
  requires = ["setuptools>=61.0"]
34
34
  build-backend = "setuptools.build_meta"
@@ -2,4 +2,4 @@
2
2
  Python Harness - An agentic evaluation tool for codebases.
3
3
  """
4
4
 
5
- __version__ = "0.0.10"
5
+ __version__ = "0.0.11"
@@ -33,13 +33,18 @@ def _print_detail_block(title: str, details: str, color: str) -> None:
33
33
  console.print()
34
34
 
35
35
 
36
- def _print_ruff_issues(issues: list[dict[str, Any]]) -> None:
36
+ def _print_ruff_issues(
37
+ issues: list[dict[str, Any]],
38
+ error_message: str = "",
39
+ ) -> None:
37
40
  console.print("[red]Ruff issues found:[/red]")
38
41
  for issue in issues:
39
42
  file = issue.get("filename", "unknown")
40
43
  line = issue.get("location", {}).get("row", "?")
41
44
  msg = issue.get("message", "unknown issue")
42
45
  console.print(f" - {file}:{line} {msg}")
46
+ if not issues and error_message:
47
+ console.print(f" {error_message}")
43
48
  console.print()
44
49
 
45
50
 
@@ -99,7 +104,10 @@ def _print_hard_failure_details(hard_results: dict[str, Any]) -> None:
99
104
 
100
105
  ruff_issues = hard_results.get("ruff", {}).get("issues", [])
101
106
  if hard_results.get("ruff", {}).get("status") != "success":
102
- _print_ruff_issues(ruff_issues)
107
+ _print_ruff_issues(
108
+ ruff_issues,
109
+ str(hard_results.get("ruff", {}).get("error_message", "")),
110
+ )
103
111
 
104
112
  if hard_results.get("mypy", {}).get("status") != "success":
105
113
  output = str(hard_results.get("mypy", {}).get("output", ""))
@@ -47,6 +47,7 @@ class HardEvaluator:
47
47
  "status": status,
48
48
  "issues": issues,
49
49
  "return_code": result.returncode,
50
+ "error_message": result.stderr.strip(),
50
51
  }
51
52
  except Exception as e:
52
53
  return {"status": "error", "error_message": str(e)}
@@ -65,7 +66,7 @@ class HardEvaluator:
65
66
  status = "success" if result.returncode == 0 else "failed"
66
67
  return {
67
68
  "status": status,
68
- "output": result.stdout,
69
+ "output": result.stdout or result.stderr,
69
70
  "return_code": result.returncode,
70
71
  }
71
72
  except Exception as e:
@@ -243,6 +244,7 @@ class HardEvaluator:
243
244
  "output": result.stdout,
244
245
  "return_code": result.returncode,
245
246
  "coverage_percentage": coverage_percentage,
247
+ "error_message": result.stderr.strip(),
246
248
  }
247
249
  except subprocess.TimeoutExpired:
248
250
  return {
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: python-harness
3
- Version: 0.0.10
3
+ Version: 0.0.11
4
4
  Summary: An agentic codebase evaluation and evolution tool for Python projects.
5
5
  Author-email: Mingli Yuan <mingli.yuan@gmail.com>
6
6
  License: MIT
@@ -15,13 +15,13 @@ Requires-Dist: anthropic>=0.18.0
15
15
  Requires-Dist: tenacity>=8.2.0
16
16
  Requires-Dist: tiktoken>=0.6.0
17
17
  Requires-Dist: python-dotenv>=1.0.0
18
+ Requires-Dist: pytest>=8.0.0
19
+ Requires-Dist: pytest-cov>=4.1.0
20
+ Requires-Dist: ruff>=0.3.0
21
+ Requires-Dist: mypy>=1.9.0
22
+ Requires-Dist: radon>=6.0.1
18
23
  Provides-Extra: dev
19
- Requires-Dist: pytest>=8.0.0; extra == "dev"
20
- Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
21
- Requires-Dist: ruff>=0.3.0; extra == "dev"
22
- Requires-Dist: mypy>=1.9.0; extra == "dev"
23
24
  Requires-Dist: ty>=0.0.1; extra == "dev"
24
- Requires-Dist: radon>=6.0.1; extra == "dev"
25
25
  Dynamic: license-file
26
26
 
27
27
  # Python Harness
@@ -427,3 +427,77 @@ def test_refine_reports_suggestions(monkeypatch: Any) -> None:
427
427
  assert result.exit_code == 0
428
428
  assert "Found 2 suggestions. Starting evolution branches..." in result.stdout
429
429
  assert "Evolution engine skeleton ready." in result.stdout
430
+
431
+
432
+ def test_measure_surfaces_hard_tool_errors(monkeypatch: Any) -> None:
433
+ """
434
+ Test that measure prints hard-tool error details when tool invocations fail early.
435
+ """
436
+ class DummyHardEvaluator:
437
+ def evaluate(self) -> dict[str, Any]:
438
+ return {
439
+ "all_passed": False,
440
+ "ruff": {
441
+ "status": "failed",
442
+ "issues": [],
443
+ "error_message": "No module named ruff",
444
+ },
445
+ "mypy": {"status": "failed", "output": "No module named mypy"},
446
+ "ty": {
447
+ "status": "warning",
448
+ "error_message": "ty executable not found. Skipping ty checks.",
449
+ },
450
+ "radon_cc": {
451
+ "status": "warning",
452
+ "issues": [],
453
+ "error_message": "No module named radon",
454
+ },
455
+ "radon_mi": {"status": "success", "mi_scores": {}},
456
+ "pytest": {
457
+ "status": "failed",
458
+ "error_message": "No module named pytest",
459
+ },
460
+ }
461
+
462
+ class DummyQcEvaluator:
463
+ def evaluate(self) -> dict[str, Any]:
464
+ return {"all_passed": True, "failures": []}
465
+
466
+ class DummySoftEvaluator:
467
+ def evaluate(self) -> dict[str, Any]:
468
+ return {
469
+ "package_summary": {
470
+ "total_files": 1,
471
+ "total_tokens": 1,
472
+ "package_understanding": "Mock understanding",
473
+ },
474
+ "understandability_score": 100.0,
475
+ "qa_results": {"sampled_entities": []},
476
+ }
477
+
478
+ def generate_final_report(
479
+ self,
480
+ hard_results: dict[str, Any],
481
+ qc_results: dict[str, Any],
482
+ soft_results: dict[str, Any],
483
+ ) -> dict[str, Any]:
484
+ return {"verdict": "Fail", "summary": "Mock summary", "suggestions": []}
485
+
486
+ class DummyEvaluator:
487
+ def __init__(self, path: str):
488
+ self.path = path
489
+ self.hard_evaluator = DummyHardEvaluator()
490
+ self.qc_evaluator = DummyQcEvaluator()
491
+ self.soft_evaluator = DummySoftEvaluator()
492
+
493
+ monkeypatch.setattr(cli_module, "Evaluator", DummyEvaluator)
494
+
495
+ result = runner.invoke(app, ["measure", "."])
496
+
497
+ assert result.exit_code == 1
498
+ assert "Ruff issues found" in result.stdout
499
+ assert "No module named ruff" in result.stdout
500
+ assert "Mypy issues found" in result.stdout
501
+ assert "No module named mypy" in result.stdout
502
+ assert "Pytest/Coverage issues found" in result.stdout
503
+ assert "No module named pytest" in result.stdout
@@ -287,6 +287,49 @@ def test_run_mypy_returns_stdout(monkeypatch: Any) -> None:
287
287
  assert "error: nope" in result["output"]
288
288
 
289
289
 
290
+ def test_run_ruff_surfaces_stderr_when_no_json_issues(monkeypatch: Any) -> None:
291
+ """
292
+ Test that run_ruff preserves stderr when Ruff fails before emitting JSON.
293
+ """
294
+ def mock_run(args: Any, **kwargs: Any) -> Any:
295
+ class MockResult:
296
+ returncode = 1
297
+ stdout = ""
298
+ stderr = "No module named ruff"
299
+
300
+ return MockResult()
301
+
302
+ monkeypatch.setattr("subprocess.run", mock_run)
303
+
304
+ evaluator = HardEvaluator(".")
305
+ result = evaluator.run_ruff()
306
+
307
+ assert result["status"] == "failed"
308
+ assert result["issues"] == []
309
+ assert result["error_message"] == "No module named ruff"
310
+
311
+
312
+ def test_run_mypy_surfaces_stderr(monkeypatch: Any) -> None:
313
+ """
314
+ Test that run_mypy preserves stderr when mypy fails before stdout output.
315
+ """
316
+ def mock_run(args: Any, **kwargs: Any) -> Any:
317
+ class MockResult:
318
+ returncode = 1
319
+ stdout = ""
320
+ stderr = "No module named mypy"
321
+
322
+ return MockResult()
323
+
324
+ monkeypatch.setattr("subprocess.run", mock_run)
325
+
326
+ evaluator = HardEvaluator(".")
327
+ result = evaluator.run_mypy()
328
+
329
+ assert result["status"] == "failed"
330
+ assert result["output"] == "No module named mypy"
331
+
332
+
290
333
  def test_run_radon_mi_reads_scores(monkeypatch: Any) -> None:
291
334
  """
292
335
  Test that run_radon_mi parses maintainability scores from JSON.
@@ -308,6 +351,27 @@ def test_run_radon_mi_reads_scores(monkeypatch: Any) -> None:
308
351
  assert result["mi_scores"] == {"a.py": 77.0}
309
352
 
310
353
 
354
+ def test_run_pytest_surfaces_stderr(monkeypatch: Any, tmp_path: Path) -> None:
355
+ """
356
+ Test that run_pytest preserves stderr when pytest fails early.
357
+ """
358
+ def mock_run(args: Any, **kwargs: Any) -> Any:
359
+ class MockResult:
360
+ returncode = 1
361
+ stdout = ""
362
+ stderr = "No module named pytest"
363
+
364
+ return MockResult()
365
+
366
+ monkeypatch.setattr("subprocess.run", mock_run)
367
+
368
+ evaluator = HardEvaluator(str(tmp_path))
369
+ result = evaluator.run_pytest()
370
+
371
+ assert result["status"] == "failed"
372
+ assert result["error_message"] == "No module named pytest"
373
+
374
+
311
375
  def test_evaluate_fails_when_coverage_report_missing(monkeypatch: Any) -> None:
312
376
  """
313
377
  Test that missing coverage data fails the hard gate even when tests pass.
File without changes
@@ -6,11 +6,11 @@ anthropic>=0.18.0
6
6
  tenacity>=8.2.0
7
7
  tiktoken>=0.6.0
8
8
  python-dotenv>=1.0.0
9
-
10
- [dev]
11
9
  pytest>=8.0.0
12
10
  pytest-cov>=4.1.0
13
11
  ruff>=0.3.0
14
12
  mypy>=1.9.0
15
- ty>=0.0.1
16
13
  radon>=6.0.1
14
+
15
+ [dev]
16
+ ty>=0.0.1