python-harness 0.0.10__tar.gz → 0.0.11__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {python_harness-0.0.10/python_harness.egg-info → python_harness-0.0.11}/PKG-INFO +6 -6
- {python_harness-0.0.10 → python_harness-0.0.11}/pyproject.toml +6 -6
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/__init__.py +1 -1
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/cli.py +10 -2
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/hard_evaluator.py +3 -1
- {python_harness-0.0.10 → python_harness-0.0.11/python_harness.egg-info}/PKG-INFO +6 -6
- {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_cli.py +74 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_hard_evaluator.py +64 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/LICENSE +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/README.md +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/evaluator.py +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/qc_evaluator.py +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness/soft_evaluator.py +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/SOURCES.txt +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/dependency_links.txt +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/entry_points.txt +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/requires.txt +3 -3
- {python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/top_level.txt +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/setup.cfg +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_evaluator.py +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_qc_evaluator.py +0 -0
- {python_harness-0.0.10 → python_harness-0.0.11}/tests/test_soft_evaluator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-harness
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary: An agentic codebase evaluation and evolution tool for Python projects.
|
|
5
5
|
Author-email: Mingli Yuan <mingli.yuan@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -15,13 +15,13 @@ Requires-Dist: anthropic>=0.18.0
|
|
|
15
15
|
Requires-Dist: tenacity>=8.2.0
|
|
16
16
|
Requires-Dist: tiktoken>=0.6.0
|
|
17
17
|
Requires-Dist: python-dotenv>=1.0.0
|
|
18
|
+
Requires-Dist: pytest>=8.0.0
|
|
19
|
+
Requires-Dist: pytest-cov>=4.1.0
|
|
20
|
+
Requires-Dist: ruff>=0.3.0
|
|
21
|
+
Requires-Dist: mypy>=1.9.0
|
|
22
|
+
Requires-Dist: radon>=6.0.1
|
|
18
23
|
Provides-Extra: dev
|
|
19
|
-
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
20
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
21
|
-
Requires-Dist: ruff>=0.3.0; extra == "dev"
|
|
22
|
-
Requires-Dist: mypy>=1.9.0; extra == "dev"
|
|
23
24
|
Requires-Dist: ty>=0.0.1; extra == "dev"
|
|
24
|
-
Requires-Dist: radon>=6.0.1; extra == "dev"
|
|
25
25
|
Dynamic: license-file
|
|
26
26
|
|
|
27
27
|
# Python Harness
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "python-harness"
|
|
3
|
-
version = "0.0.
|
|
3
|
+
version = "0.0.11"
|
|
4
4
|
description = "An agentic codebase evaluation and evolution tool for Python projects."
|
|
5
5
|
requires-python = ">=3.10"
|
|
6
6
|
readme = "README.md"
|
|
@@ -17,18 +17,18 @@ dependencies = [
|
|
|
17
17
|
"tenacity>=8.2.0",
|
|
18
18
|
"tiktoken>=0.6.0",
|
|
19
19
|
"python-dotenv>=1.0.0",
|
|
20
|
-
]
|
|
21
|
-
|
|
22
|
-
[project.optional-dependencies]
|
|
23
|
-
dev = [
|
|
24
20
|
"pytest>=8.0.0",
|
|
25
21
|
"pytest-cov>=4.1.0",
|
|
26
22
|
"ruff>=0.3.0",
|
|
27
23
|
"mypy>=1.9.0",
|
|
28
|
-
"ty>=0.0.1", # Assuming ty is available or will be replaced with actual LSP integration
|
|
29
24
|
"radon>=6.0.1",
|
|
30
25
|
]
|
|
31
26
|
|
|
27
|
+
[project.optional-dependencies]
|
|
28
|
+
dev = [
|
|
29
|
+
"ty>=0.0.1", # Assuming ty is available or will be replaced with actual LSP integration
|
|
30
|
+
]
|
|
31
|
+
|
|
32
32
|
[build-system]
|
|
33
33
|
requires = ["setuptools>=61.0"]
|
|
34
34
|
build-backend = "setuptools.build_meta"
|
|
@@ -33,13 +33,18 @@ def _print_detail_block(title: str, details: str, color: str) -> None:
|
|
|
33
33
|
console.print()
|
|
34
34
|
|
|
35
35
|
|
|
36
|
-
def _print_ruff_issues(
|
|
36
|
+
def _print_ruff_issues(
|
|
37
|
+
issues: list[dict[str, Any]],
|
|
38
|
+
error_message: str = "",
|
|
39
|
+
) -> None:
|
|
37
40
|
console.print("[red]Ruff issues found:[/red]")
|
|
38
41
|
for issue in issues:
|
|
39
42
|
file = issue.get("filename", "unknown")
|
|
40
43
|
line = issue.get("location", {}).get("row", "?")
|
|
41
44
|
msg = issue.get("message", "unknown issue")
|
|
42
45
|
console.print(f" - {file}:{line} {msg}")
|
|
46
|
+
if not issues and error_message:
|
|
47
|
+
console.print(f" {error_message}")
|
|
43
48
|
console.print()
|
|
44
49
|
|
|
45
50
|
|
|
@@ -99,7 +104,10 @@ def _print_hard_failure_details(hard_results: dict[str, Any]) -> None:
|
|
|
99
104
|
|
|
100
105
|
ruff_issues = hard_results.get("ruff", {}).get("issues", [])
|
|
101
106
|
if hard_results.get("ruff", {}).get("status") != "success":
|
|
102
|
-
_print_ruff_issues(
|
|
107
|
+
_print_ruff_issues(
|
|
108
|
+
ruff_issues,
|
|
109
|
+
str(hard_results.get("ruff", {}).get("error_message", "")),
|
|
110
|
+
)
|
|
103
111
|
|
|
104
112
|
if hard_results.get("mypy", {}).get("status") != "success":
|
|
105
113
|
output = str(hard_results.get("mypy", {}).get("output", ""))
|
|
@@ -47,6 +47,7 @@ class HardEvaluator:
|
|
|
47
47
|
"status": status,
|
|
48
48
|
"issues": issues,
|
|
49
49
|
"return_code": result.returncode,
|
|
50
|
+
"error_message": result.stderr.strip(),
|
|
50
51
|
}
|
|
51
52
|
except Exception as e:
|
|
52
53
|
return {"status": "error", "error_message": str(e)}
|
|
@@ -65,7 +66,7 @@ class HardEvaluator:
|
|
|
65
66
|
status = "success" if result.returncode == 0 else "failed"
|
|
66
67
|
return {
|
|
67
68
|
"status": status,
|
|
68
|
-
"output": result.stdout,
|
|
69
|
+
"output": result.stdout or result.stderr,
|
|
69
70
|
"return_code": result.returncode,
|
|
70
71
|
}
|
|
71
72
|
except Exception as e:
|
|
@@ -243,6 +244,7 @@ class HardEvaluator:
|
|
|
243
244
|
"output": result.stdout,
|
|
244
245
|
"return_code": result.returncode,
|
|
245
246
|
"coverage_percentage": coverage_percentage,
|
|
247
|
+
"error_message": result.stderr.strip(),
|
|
246
248
|
}
|
|
247
249
|
except subprocess.TimeoutExpired:
|
|
248
250
|
return {
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: python-harness
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.11
|
|
4
4
|
Summary: An agentic codebase evaluation and evolution tool for Python projects.
|
|
5
5
|
Author-email: Mingli Yuan <mingli.yuan@gmail.com>
|
|
6
6
|
License: MIT
|
|
@@ -15,13 +15,13 @@ Requires-Dist: anthropic>=0.18.0
|
|
|
15
15
|
Requires-Dist: tenacity>=8.2.0
|
|
16
16
|
Requires-Dist: tiktoken>=0.6.0
|
|
17
17
|
Requires-Dist: python-dotenv>=1.0.0
|
|
18
|
+
Requires-Dist: pytest>=8.0.0
|
|
19
|
+
Requires-Dist: pytest-cov>=4.1.0
|
|
20
|
+
Requires-Dist: ruff>=0.3.0
|
|
21
|
+
Requires-Dist: mypy>=1.9.0
|
|
22
|
+
Requires-Dist: radon>=6.0.1
|
|
18
23
|
Provides-Extra: dev
|
|
19
|
-
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
20
|
-
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
21
|
-
Requires-Dist: ruff>=0.3.0; extra == "dev"
|
|
22
|
-
Requires-Dist: mypy>=1.9.0; extra == "dev"
|
|
23
24
|
Requires-Dist: ty>=0.0.1; extra == "dev"
|
|
24
|
-
Requires-Dist: radon>=6.0.1; extra == "dev"
|
|
25
25
|
Dynamic: license-file
|
|
26
26
|
|
|
27
27
|
# Python Harness
|
|
@@ -427,3 +427,77 @@ def test_refine_reports_suggestions(monkeypatch: Any) -> None:
|
|
|
427
427
|
assert result.exit_code == 0
|
|
428
428
|
assert "Found 2 suggestions. Starting evolution branches..." in result.stdout
|
|
429
429
|
assert "Evolution engine skeleton ready." in result.stdout
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
def test_measure_surfaces_hard_tool_errors(monkeypatch: Any) -> None:
|
|
433
|
+
"""
|
|
434
|
+
Test that measure prints hard-tool error details when tool invocations fail early.
|
|
435
|
+
"""
|
|
436
|
+
class DummyHardEvaluator:
|
|
437
|
+
def evaluate(self) -> dict[str, Any]:
|
|
438
|
+
return {
|
|
439
|
+
"all_passed": False,
|
|
440
|
+
"ruff": {
|
|
441
|
+
"status": "failed",
|
|
442
|
+
"issues": [],
|
|
443
|
+
"error_message": "No module named ruff",
|
|
444
|
+
},
|
|
445
|
+
"mypy": {"status": "failed", "output": "No module named mypy"},
|
|
446
|
+
"ty": {
|
|
447
|
+
"status": "warning",
|
|
448
|
+
"error_message": "ty executable not found. Skipping ty checks.",
|
|
449
|
+
},
|
|
450
|
+
"radon_cc": {
|
|
451
|
+
"status": "warning",
|
|
452
|
+
"issues": [],
|
|
453
|
+
"error_message": "No module named radon",
|
|
454
|
+
},
|
|
455
|
+
"radon_mi": {"status": "success", "mi_scores": {}},
|
|
456
|
+
"pytest": {
|
|
457
|
+
"status": "failed",
|
|
458
|
+
"error_message": "No module named pytest",
|
|
459
|
+
},
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
class DummyQcEvaluator:
|
|
463
|
+
def evaluate(self) -> dict[str, Any]:
|
|
464
|
+
return {"all_passed": True, "failures": []}
|
|
465
|
+
|
|
466
|
+
class DummySoftEvaluator:
|
|
467
|
+
def evaluate(self) -> dict[str, Any]:
|
|
468
|
+
return {
|
|
469
|
+
"package_summary": {
|
|
470
|
+
"total_files": 1,
|
|
471
|
+
"total_tokens": 1,
|
|
472
|
+
"package_understanding": "Mock understanding",
|
|
473
|
+
},
|
|
474
|
+
"understandability_score": 100.0,
|
|
475
|
+
"qa_results": {"sampled_entities": []},
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
def generate_final_report(
|
|
479
|
+
self,
|
|
480
|
+
hard_results: dict[str, Any],
|
|
481
|
+
qc_results: dict[str, Any],
|
|
482
|
+
soft_results: dict[str, Any],
|
|
483
|
+
) -> dict[str, Any]:
|
|
484
|
+
return {"verdict": "Fail", "summary": "Mock summary", "suggestions": []}
|
|
485
|
+
|
|
486
|
+
class DummyEvaluator:
|
|
487
|
+
def __init__(self, path: str):
|
|
488
|
+
self.path = path
|
|
489
|
+
self.hard_evaluator = DummyHardEvaluator()
|
|
490
|
+
self.qc_evaluator = DummyQcEvaluator()
|
|
491
|
+
self.soft_evaluator = DummySoftEvaluator()
|
|
492
|
+
|
|
493
|
+
monkeypatch.setattr(cli_module, "Evaluator", DummyEvaluator)
|
|
494
|
+
|
|
495
|
+
result = runner.invoke(app, ["measure", "."])
|
|
496
|
+
|
|
497
|
+
assert result.exit_code == 1
|
|
498
|
+
assert "Ruff issues found" in result.stdout
|
|
499
|
+
assert "No module named ruff" in result.stdout
|
|
500
|
+
assert "Mypy issues found" in result.stdout
|
|
501
|
+
assert "No module named mypy" in result.stdout
|
|
502
|
+
assert "Pytest/Coverage issues found" in result.stdout
|
|
503
|
+
assert "No module named pytest" in result.stdout
|
|
@@ -287,6 +287,49 @@ def test_run_mypy_returns_stdout(monkeypatch: Any) -> None:
|
|
|
287
287
|
assert "error: nope" in result["output"]
|
|
288
288
|
|
|
289
289
|
|
|
290
|
+
def test_run_ruff_surfaces_stderr_when_no_json_issues(monkeypatch: Any) -> None:
|
|
291
|
+
"""
|
|
292
|
+
Test that run_ruff preserves stderr when Ruff fails before emitting JSON.
|
|
293
|
+
"""
|
|
294
|
+
def mock_run(args: Any, **kwargs: Any) -> Any:
|
|
295
|
+
class MockResult:
|
|
296
|
+
returncode = 1
|
|
297
|
+
stdout = ""
|
|
298
|
+
stderr = "No module named ruff"
|
|
299
|
+
|
|
300
|
+
return MockResult()
|
|
301
|
+
|
|
302
|
+
monkeypatch.setattr("subprocess.run", mock_run)
|
|
303
|
+
|
|
304
|
+
evaluator = HardEvaluator(".")
|
|
305
|
+
result = evaluator.run_ruff()
|
|
306
|
+
|
|
307
|
+
assert result["status"] == "failed"
|
|
308
|
+
assert result["issues"] == []
|
|
309
|
+
assert result["error_message"] == "No module named ruff"
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
def test_run_mypy_surfaces_stderr(monkeypatch: Any) -> None:
|
|
313
|
+
"""
|
|
314
|
+
Test that run_mypy preserves stderr when mypy fails before stdout output.
|
|
315
|
+
"""
|
|
316
|
+
def mock_run(args: Any, **kwargs: Any) -> Any:
|
|
317
|
+
class MockResult:
|
|
318
|
+
returncode = 1
|
|
319
|
+
stdout = ""
|
|
320
|
+
stderr = "No module named mypy"
|
|
321
|
+
|
|
322
|
+
return MockResult()
|
|
323
|
+
|
|
324
|
+
monkeypatch.setattr("subprocess.run", mock_run)
|
|
325
|
+
|
|
326
|
+
evaluator = HardEvaluator(".")
|
|
327
|
+
result = evaluator.run_mypy()
|
|
328
|
+
|
|
329
|
+
assert result["status"] == "failed"
|
|
330
|
+
assert result["output"] == "No module named mypy"
|
|
331
|
+
|
|
332
|
+
|
|
290
333
|
def test_run_radon_mi_reads_scores(monkeypatch: Any) -> None:
|
|
291
334
|
"""
|
|
292
335
|
Test that run_radon_mi parses maintainability scores from JSON.
|
|
@@ -308,6 +351,27 @@ def test_run_radon_mi_reads_scores(monkeypatch: Any) -> None:
|
|
|
308
351
|
assert result["mi_scores"] == {"a.py": 77.0}
|
|
309
352
|
|
|
310
353
|
|
|
354
|
+
def test_run_pytest_surfaces_stderr(monkeypatch: Any, tmp_path: Path) -> None:
|
|
355
|
+
"""
|
|
356
|
+
Test that run_pytest preserves stderr when pytest fails early.
|
|
357
|
+
"""
|
|
358
|
+
def mock_run(args: Any, **kwargs: Any) -> Any:
|
|
359
|
+
class MockResult:
|
|
360
|
+
returncode = 1
|
|
361
|
+
stdout = ""
|
|
362
|
+
stderr = "No module named pytest"
|
|
363
|
+
|
|
364
|
+
return MockResult()
|
|
365
|
+
|
|
366
|
+
monkeypatch.setattr("subprocess.run", mock_run)
|
|
367
|
+
|
|
368
|
+
evaluator = HardEvaluator(str(tmp_path))
|
|
369
|
+
result = evaluator.run_pytest()
|
|
370
|
+
|
|
371
|
+
assert result["status"] == "failed"
|
|
372
|
+
assert result["error_message"] == "No module named pytest"
|
|
373
|
+
|
|
374
|
+
|
|
311
375
|
def test_evaluate_fails_when_coverage_report_missing(monkeypatch: Any) -> None:
|
|
312
376
|
"""
|
|
313
377
|
Test that missing coverage data fails the hard gate even when tests pass.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{python_harness-0.0.10 → python_harness-0.0.11}/python_harness.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|