PyPI - agentrepocoach - Versions diffs - 0.2.0__py3-none-any.whl - Mend

agentrepocoach 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

agentrepocoach/__init__.py +14 -0
agentrepocoach/__main__.py +4 -0
agentrepocoach/adapters/__init__.py +64 -0
agentrepocoach/adapters/base.py +195 -0
agentrepocoach/adapters/csharp.py +419 -0
agentrepocoach/adapters/go.py +283 -0
agentrepocoach/adapters/python.py +244 -0
agentrepocoach/adapters/rust.py +304 -0
agentrepocoach/adapters/typescript.py +351 -0
agentrepocoach/cli.py +155 -0
agentrepocoach/components/__init__.py +27 -0
agentrepocoach/components/decision_queryability.py +192 -0
agentrepocoach/components/documentation.py +205 -0
agentrepocoach/components/error_quality.py +162 -0
agentrepocoach/components/module_hygiene.py +175 -0
agentrepocoach/components/test_quality.py +179 -0
agentrepocoach/compute.py +84 -0
agentrepocoach/config.py +263 -0
agentrepocoach/output.py +267 -0
agentrepocoach/scoring.py +34 -0
agentrepocoach-0.2.0.dist-info/METADATA +202 -0
agentrepocoach-0.2.0.dist-info/RECORD +26 -0
agentrepocoach-0.2.0.dist-info/WHEEL +5 -0
agentrepocoach-0.2.0.dist-info/entry_points.txt +2 -0
agentrepocoach-0.2.0.dist-info/licenses/LICENSE +202 -0
agentrepocoach-0.2.0.dist-info/top_level.txt +1 -0

agentrepocoach/components/error_quality.py ADDED Viewed

@@ -0,0 +1,162 @@
+"""Error quality component.
+Scores how actionable a repo's exceptions are for an AI agent. Agents fail
+fastest on unactionable errors — a cryptic ``InvalidOperationException("bad
+state")`` gives an agent nothing to work with.
+- 50 pts: % of throw sites whose message contains an actionable fix hint.
+- 30 pts: % of throws that use a user-defined (domain) exception subclass.
+- 20 pts: language-stdlib generic exceptions do NOT dominate (bonus if rare).
+All exception classification goes through the active language adapter.
+Zero hard-coded exception type names in this file — every domain exception
+name comes from config or adapter auto-discovery.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from ..adapters import LanguageAdapter, ThrowSite
+from ..config import Config
+from ..scoring import scale_linear
+_HINT_WEIGHT = 50
+_SUBCLASS_WEIGHT = 30
+_GENERIC_WEIGHT = 20
+_HINT_FULL_PCT = 50.0
+_SUBCLASS_FULL_RATIO = 0.50
+_GENERIC_LOW_PCT = 20.0
+_GENERIC_HIGH_PCT = 40.0
+def compute_error_quality(repo_root: Path, config: Config, adapter: LanguageAdapter) -> dict[str, Any]:
+    """Score error-message quality: hint coverage + exception typing."""
+    production_files = adapter.find_production_files(repo_root)
+    domain_types = _resolve_domain_exception_types(config, adapter, production_files)
+    sites = adapter.scan_throw_sites(
+        production_files,
+        hint_marker=config.error_quality.hint_marker,
+        domain_exception_types=domain_types,
+    )
+    hint = _score_hint_coverage(sites)
+    subclass = _score_domain_subclass_ratio(sites)
+    generic = _score_generic_dominance(sites, adapter.generic_exception_names())
+    total = hint["score"] + subclass["score"] + generic["score"]
+    return {
+        "score": round(total, 2),
+        "total": 100,
+        "breakdown": {
+            "hint_coverage": hint,
+            "exception_subclass_ratio": subclass,
+            "generic_exception_dominance": generic,
+        },
+    }
+def _resolve_domain_exception_types(
+    config: Config,
+    adapter: LanguageAdapter,
+    production_files: list[Path],
+) -> set[str]:
+    """Build the set of 'user-defined' exception type names.
+    Priority:
+    1. Explicit config ``error_quality.domain_exception_types`` list.
+    2. Auto-discovery from the repo's own source (scan declarations whose
+       name ends in 'Exception' or 'Error' — language-neutral heuristic).
+    """
+    explicit = set(config.error_quality.domain_exception_types)
+    if explicit:
+        return explicit
+    # Auto-discover: scan declarations and keep any ending in Exception/Error.
+    declarations = adapter.scan_declarations(production_files)
+    discovered: set[str] = set()
+    for decl in declarations:
+        if decl.name.endswith("Exception") or decl.name.endswith("Error"):
+            discovered.add(decl.name)
+    return discovered
+def _score_hint_coverage(sites: list[ThrowSite]) -> dict[str, Any]:
+    """50 pts: % of throws with an actionable fix hint, scaled 0% -> 50%."""
+    total = len(sites)
+    if total == 0:
+        return {
+            "score": _HINT_WEIGHT,
+            "max": _HINT_WEIGHT,
+            "coverage_pct": 100.0,
+            "total_sites": 0,
+            "with_hint": 0,
+            "note": "no throw sites",
+        }
+    with_hint = sum(1 for s in sites if s.has_fix_hint)
+    pct = 100.0 * with_hint / total
+    score = scale_linear(pct, zero_at=0.0, full_at=_HINT_FULL_PCT, max_pts=_HINT_WEIGHT)
+    return {
+        "score": round(score, 2),
+        "max": _HINT_WEIGHT,
+        "coverage_pct": round(pct, 2),
+        "total_sites": total,
+        "with_hint": with_hint,
+    }
+def _score_domain_subclass_ratio(sites: list[ThrowSite]) -> dict[str, Any]:
+    """30 pts: % of throws using a user-defined (domain) exception class."""
+    total = len(sites)
+    if total == 0:
+        return {
+            "score": _SUBCLASS_WEIGHT,
+            "max": _SUBCLASS_WEIGHT,
+            "ratio": 1.0,
+            "note": "no throw sites",
+        }
+    subclass_count = sum(1 for s in sites if s.is_user_defined)
+    ratio = subclass_count / total
+    score = scale_linear(
+        ratio,
+        zero_at=0.0,
+        full_at=_SUBCLASS_FULL_RATIO,
+        max_pts=_SUBCLASS_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _SUBCLASS_WEIGHT,
+        "ratio": round(ratio, 3),
+        "subclass_count": subclass_count,
+        "total_throws": total,
+    }
+def _score_generic_dominance(
+    sites: list[ThrowSite],
+    generic_names: set[str],
+) -> dict[str, Any]:
+    """20 pts: generic stdlib exceptions should not dominate. Lower is better."""
+    total = len(sites)
+    if total == 0:
+        return {
+            "score": _GENERIC_WEIGHT,
+            "max": _GENERIC_WEIGHT,
+            "pct": 0.0,
+            "note": "no throw sites",
+        }
+    generic_count = sum(1 for s in sites if s.exception_type in generic_names)
+    pct = 100.0 * generic_count / total
+    score = scale_linear(
+        pct,
+        zero_at=_GENERIC_HIGH_PCT,
+        full_at=_GENERIC_LOW_PCT,
+        max_pts=_GENERIC_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _GENERIC_WEIGHT,
+        "pct": round(pct, 2),
+        "generic_count": generic_count,
+        "total_throws": total,
+    }

agentrepocoach/components/module_hygiene.py ADDED Viewed

@@ -0,0 +1,175 @@
+"""Module hygiene component.
+Scores how neatly a codebase's production modules are organized:
+- 30 pts: enough files declare internal / non-public types (visibility hygiene).
+- 30 pts: god files (files over a size threshold) are rare.
+- 20 pts: public declarations have doc comments.
+- 20 pts: architecture doc is fresh.
+Every file scan goes through the active language adapter — the component
+never looks at file suffixes directly.
+"""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from ..adapters import LanguageAdapter
+from ..adapters.base import count_file_loc
+from ..config import Config
+from ..scoring import file_mtime_age_days, scale_linear
+_INTERNAL_WEIGHT = 30
+_GOD_FILE_WEIGHT = 30
+_DOC_COMMENT_WEIGHT = 20
+_ARCH_WEIGHT = 20
+_GOD_FILE_FULL_COUNT = 5
+_GOD_FILE_ZERO_COUNT = 15
+def compute_module_hygiene(repo_root: Path, config: Config, adapter: LanguageAdapter) -> dict[str, Any]:
+    """Score internal visibility + god files + doc coverage + arch doc freshness."""
+    production_files = adapter.find_production_files(repo_root)
+    declarations = adapter.scan_declarations(production_files)
+    internal = _score_internal_visibility(declarations, production_files, config)
+    god = _score_god_files(production_files, config)
+    docs = _score_doc_coverage(declarations, config)
+    arch = _score_architecture_doc(repo_root, config)
+    total = internal["score"] + god["score"] + docs["score"] + arch["score"]
+    return {
+        "score": round(total, 2),
+        "total": 100,
+        "breakdown": {
+            "internal_visibility": internal,
+            "god_files": god,
+            "doc_comment_coverage": docs,
+            "architecture_doc": arch,
+        },
+    }
+def _score_internal_visibility(
+    declarations: list[Any],
+    production_files: list[Path],
+    config: Config,
+) -> dict[str, Any]:
+    """30 pts: proportion of production files that declare a non-public type."""
+    if not production_files:
+        return {
+            "score": 0,
+            "max": _INTERNAL_WEIGHT,
+            "internal_files": 0,
+            "total_files": 0,
+            "ratio": 0.0,
+        }
+    files_with_internal: set[Path] = set()
+    for decl in declarations:
+        if decl.visibility in ("internal", "private"):
+            files_with_internal.add(decl.file)
+    ratio = len(files_with_internal) / len(production_files)
+    score = scale_linear(
+        ratio,
+        zero_at=0.0,
+        full_at=config.module_hygiene.internal_visibility_full_ratio,
+        max_pts=_INTERNAL_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _INTERNAL_WEIGHT,
+        "internal_files": len(files_with_internal),
+        "total_files": len(production_files),
+        "ratio": round(ratio, 3),
+    }
+def _score_god_files(production_files: list[Path], config: Config) -> dict[str, Any]:
+    """30 pts: count of production files over the god-file LOC threshold."""
+    threshold = config.thresholds.god_file_loc
+    max_bytes = config.thresholds.max_file_bytes
+    god: list[dict[str, Any]] = []
+    for path in production_files:
+        loc = count_file_loc(path, max_bytes=max_bytes)
+        if loc > threshold:
+            god.append({"path": str(path), "loc": loc})
+    count = len(god)
+    score = scale_linear(
+        count,
+        zero_at=_GOD_FILE_ZERO_COUNT,
+        full_at=_GOD_FILE_FULL_COUNT,
+        max_pts=_GOD_FILE_WEIGHT,
+    )
+    god.sort(key=lambda d: d["loc"], reverse=True)
+    return {
+        "score": round(score, 2),
+        "max": _GOD_FILE_WEIGHT,
+        "god_file_count": count,
+        "top_5": [_relative_god_entry(d, production_files) for d in god[:5]],
+    }
+def _relative_god_entry(entry: dict[str, Any], production_files: list[Path]) -> dict[str, Any]:
+    """Format a god-file entry with repo-relative path (no string splits)."""
+    path = Path(entry["path"])
+    # Find the closest common ancestor among production files (language-neutral).
+    try:
+        common = Path(*path.parts[:-1])
+        rel = path.name if str(common) == "." else str(path)
+    except (ValueError, IndexError):
+        rel = str(path)
+    return {"path": rel, "loc": entry["loc"]}
+def _score_doc_coverage(declarations: list[Any], config: Config) -> dict[str, Any]:
+    """20 pts: % of public declarations with a doc comment. Full at 90%."""
+    public = [d for d in declarations if d.visibility == "public"]
+    total = len(public)
+    if total == 0:
+        return {
+            "score": _DOC_COMMENT_WEIGHT,
+            "max": _DOC_COMMENT_WEIGHT,
+            "total_public_declarations": 0,
+            "documented": 0,
+            "pct": 100.0,
+        }
+    documented = sum(1 for d in public if d.has_doc_comment)
+    pct = 100.0 * documented / total
+    score = scale_linear(
+        pct,
+        zero_at=0.0,
+        full_at=config.thresholds.doc_comment_min_coverage_pct,
+        max_pts=_DOC_COMMENT_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _DOC_COMMENT_WEIGHT,
+        "total_public_declarations": total,
+        "documented": documented,
+        "pct": round(pct, 2),
+    }
+def _score_architecture_doc(repo_root: Path, config: Config) -> dict[str, Any]:
+    """20 pts: architecture doc exists AND was touched recently."""
+    path = repo_root / config.paths.architecture_doc
+    if not path.is_file():
+        return {"score": 0, "max": _ARCH_WEIGHT, "exists": False, "age_days": None}
+    age = file_mtime_age_days(path)
+    fresh_days = config.module_hygiene.architecture_doc_fresh_days
+    if age <= fresh_days:
+        return {
+            "score": _ARCH_WEIGHT,
+            "max": _ARCH_WEIGHT,
+            "exists": True,
+            "age_days": round(age, 2),
+        }
+    return {
+        "score": _ARCH_WEIGHT / 2,
+        "max": _ARCH_WEIGHT,
+        "exists": True,
+        "age_days": round(age, 2),
+        "stale": True,
+    }

agentrepocoach/components/test_quality.py ADDED Viewed

@@ -0,0 +1,179 @@
+"""Test quality component.
+Scores test readability and fixture hygiene — not test coverage. Coverage is
+a solved problem (codecov etc.); this component measures whether the test
+suite tells an agent what each test does without running it.
+- 40 pts: % of test methods that match the idiomatic naming convention.
+- 30 pts: enough reusable helper files to discourage copy-paste fixtures.
+- 30 pts: configured fixture-duplication patterns appear sparingly.
+``fixture_duplication_patterns`` is empty by default — the sub-score gives
+full credit unless the user opts in by listing project-specific patterns.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+from typing import Any
+from ..adapters import LanguageAdapter
+from ..adapters.base import iter_source_files
+from ..config import Config
+from ..scoring import scale_linear
+_NAMING_WEIGHT = 40
+_HELPERS_WEIGHT = 30
+_DUPLICATION_WEIGHT = 30
+_DUP_FULL_MAX = 50
+_DUP_ZERO_MAX = 200
+def compute_test_quality(repo_root: Path, config: Config, adapter: LanguageAdapter) -> dict[str, Any]:
+    """Score test naming convention + helper count + fixture duplication."""
+    test_files = adapter.find_test_files(repo_root)
+    naming = _score_test_naming(test_files, adapter)
+    helpers = _score_test_helpers(repo_root, config, test_files)
+    duplication = _score_fixture_duplication(test_files, config)
+    total = naming["score"] + helpers["score"] + duplication["score"]
+    return {
+        "score": round(total, 2),
+        "total": 100,
+        "breakdown": {
+            "naming_convention": naming,
+            "helper_files": helpers,
+            "fixture_duplication": duplication,
+        },
+    }
+def _score_test_naming(
+    test_files: list[Path],
+    adapter: LanguageAdapter,
+) -> dict[str, Any]:
+    """40 pts: % of test methods matching the adapter's naming convention."""
+    methods = adapter.find_test_methods(test_files)
+    pattern = adapter.test_naming_pattern()
+    total = len(methods)
+    if total == 0:
+        return {
+            "score": 0,
+            "max": _NAMING_WEIGHT,
+            "total_methods": 0,
+            "matching_methods": 0,
+            "pct": 0.0,
+        }
+    matching = sum(1 for _, name in methods if pattern.match(name))
+    pct = 100.0 * matching / total
+    score = scale_linear(pct, zero_at=0.0, full_at=100.0, max_pts=_NAMING_WEIGHT)
+    return {
+        "score": round(score, 2),
+        "max": _NAMING_WEIGHT,
+        "total_methods": total,
+        "matching_methods": matching,
+        "pct": round(pct, 2),
+    }
+def _score_test_helpers(
+    repo_root: Path,
+    config: Config,
+    test_files: list[Path],
+) -> dict[str, Any]:
+    """30 pts: count of helper files under the configured helpers directory."""
+    helpers_dir = _resolve_helpers_dir(repo_root, config, test_files)
+    if helpers_dir is None or not helpers_dir.is_dir():
+        return {"score": 0, "max": _HELPERS_WEIGHT, "helper_count": 0}
+    # Count helpers using a neutral suffix list: any source file under the
+    # helpers dir. The active adapter's production-file suffix set is a fair
+    # proxy; we reuse iter_source_files to respect symlink/size guards.
+    helpers = iter_source_files(
+        helpers_dir,
+        suffixes=(".cs", ".py", ".ts", ".tsx", ".js", ".jsx", ".rs", ".go"),
+    )
+    count = len(helpers)
+    score = scale_linear(
+        count,
+        zero_at=0,
+        full_at=config.test_quality.helpers_full_count,
+        max_pts=_HELPERS_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _HELPERS_WEIGHT,
+        "helper_count": count,
+    }
+def _resolve_helpers_dir(
+    repo_root: Path,
+    config: Config,
+    test_files: list[Path],
+) -> Path | None:
+    """Resolve the helpers directory from config ('auto' means guess)."""
+    configured = config.paths.test_helpers_dir
+    if configured and configured != "auto":
+        return repo_root / configured
+    # Auto-discovery: look for a TestHelpers / fixtures / helpers directory
+    # under any test file's parent chain.
+    candidates = ("TestHelpers", "test_helpers", "helpers", "fixtures", "conftest")
+    seen: set[Path] = set()
+    for test_file in test_files:
+        for parent in test_file.parents:
+            if parent == repo_root or parent == repo_root.parent:
+                break
+            for name in candidates:
+                candidate = parent / name
+                if candidate.is_dir() and candidate not in seen:
+                    seen.add(candidate)
+                    return candidate
+    return None
+def _score_fixture_duplication(
+    test_files: list[Path],
+    config: Config,
+) -> dict[str, Any]:
+    """30 pts: configured fixture-duplication patterns are rare."""
+    patterns = config.test_quality.fixture_duplication_patterns
+    if not patterns:
+        return {
+            "score": _DUPLICATION_WEIGHT,
+            "max": _DUPLICATION_WEIGHT,
+            "duplicate_builder_count": 0,
+            "note": "no fixture_duplication_patterns configured",
+        }
+    compiled: list[re.Pattern[str]] = []
+    for raw in patterns:
+        try:
+            compiled.append(re.compile(raw))
+        except re.error:
+            continue
+    total = 0
+    for path in test_files:
+        try:
+            text = path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            continue
+        for pattern in compiled:
+            total += len(pattern.findall(text))
+    score = scale_linear(
+        total,
+        zero_at=_DUP_ZERO_MAX,
+        full_at=_DUP_FULL_MAX,
+        max_pts=_DUPLICATION_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _DUPLICATION_WEIGHT,
+        "duplicate_builder_count": total,
+    }

agentrepocoach/compute.py ADDED Viewed

@@ -0,0 +1,84 @@
+"""Composite orchestrator — combines the 5 components into the CAH score."""
+from __future__ import annotations
+from pathlib import Path
+from typing import Any
+from .adapters import LanguageAdapter, detect_primary, get_adapter_by_name
+from .components import (
+    compute_decision_queryability,
+    compute_error_quality,
+    compute_module_hygiene,
+    compute_navigability,
+    compute_test_quality,
+)
+from .config import Config, load_config
+_GENERATOR_NAME = "agentrepocoach"
+def compute_cah(repo_root: Path, config: Config | None = None, adapter: LanguageAdapter | None = None) -> dict[str, Any]:
+    """Compute every component and assemble the weighted composite.
+    Args:
+        repo_root: Path to the repository to score.
+        config: Optional explicit config. If None, loads from
+            ``<repo_root>/.agentrepocoach.toml`` with defaults.
+        adapter: Optional explicit language adapter. If None, auto-detects.
+    Returns:
+        A dict with ``schema_version``, ``generator``, ``total``, ``weights``,
+        ``components``, and ``language``.
+    """
+    from . import VERSION  # local import to avoid circular reference
+    repo_root = repo_root.resolve()
+    if config is None:
+        config = load_config(repo_root)
+    if adapter is None:
+        adapter = _pick_adapter(repo_root, config)
+    components = {
+        "navigability": compute_navigability(repo_root, config, adapter),
+        "error_quality": compute_error_quality(repo_root, config, adapter),
+        "decision_queryability": compute_decision_queryability(repo_root, config, adapter),
+        "test_quality": compute_test_quality(repo_root, config, adapter),
+        "module_hygiene": compute_module_hygiene(repo_root, config, adapter),
+    }
+    total = 0.0
+    for name, weight in config.weights.items():
+        total += weight * components[name]["score"]
+    result = {
+        "schema_version": config.schema_version,
+        "generator": f"{_GENERATOR_NAME} {VERSION}",
+        "total": round(total, 2),
+        "weights": dict(config.weights),
+        "language": adapter.name,
+        "components": components,
+    }
+    # Generate coaching recommendations from the scored result.
+    from .output import generate_coaching
+    tips = generate_coaching(result)
+    if tips:
+        result["coaching"] = [
+            {
+                "component": t["component"],
+                "sub_component": t["sub_component"],
+                "label": t["label"],
+                "tip": t["tip"],
+                "gap": round(t["gap"], 2),
+            }
+            for t in tips
+        ]
+    return result
+def _pick_adapter(repo_root: Path, config: Config) -> LanguageAdapter:
+    """Pick the adapter either by explicit config or auto-detection."""
+    if config.language and config.language != "auto":
+        return get_adapter_by_name(config.language)
+    return detect_primary(repo_root)