PyPI - agentrepocoach - Versions diffs - 0.2.0__py3-none-any.whl - Mend

agentrepocoach 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

agentrepocoach/__init__.py +14 -0
agentrepocoach/__main__.py +4 -0
agentrepocoach/adapters/__init__.py +64 -0
agentrepocoach/adapters/base.py +195 -0
agentrepocoach/adapters/csharp.py +419 -0
agentrepocoach/adapters/go.py +283 -0
agentrepocoach/adapters/python.py +244 -0
agentrepocoach/adapters/rust.py +304 -0
agentrepocoach/adapters/typescript.py +351 -0
agentrepocoach/cli.py +155 -0
agentrepocoach/components/__init__.py +27 -0
agentrepocoach/components/decision_queryability.py +192 -0
agentrepocoach/components/documentation.py +205 -0
agentrepocoach/components/error_quality.py +162 -0
agentrepocoach/components/module_hygiene.py +175 -0
agentrepocoach/components/test_quality.py +179 -0
agentrepocoach/compute.py +84 -0
agentrepocoach/config.py +263 -0
agentrepocoach/output.py +267 -0
agentrepocoach/scoring.py +34 -0
agentrepocoach-0.2.0.dist-info/METADATA +202 -0
agentrepocoach-0.2.0.dist-info/RECORD +26 -0
agentrepocoach-0.2.0.dist-info/WHEEL +5 -0
agentrepocoach-0.2.0.dist-info/entry_points.txt +2 -0
agentrepocoach-0.2.0.dist-info/licenses/LICENSE +202 -0
agentrepocoach-0.2.0.dist-info/top_level.txt +1 -0

agentrepocoach/cli.py ADDED Viewed

@@ -0,0 +1,155 @@
+"""AgentRepoCoach CLI entry point."""
+from __future__ import annotations
+import argparse
+import sys
+from pathlib import Path
+from . import VERSION
+from .adapters import NoAdapterError
+from .compute import compute_cah
+from .config import ConfigError, load_config
+from .output import (
+    format_summary,
+    format_verbose,
+    write_json,
+    write_markdown_comment,
+    write_prometheus,
+)
+def build_parser() -> argparse.ArgumentParser:
+    """Build the argument parser for the ``agentrepocoach`` CLI."""
+    parser = argparse.ArgumentParser(
+        prog="agentrepocoach",
+        description="Compute the Codebase Agent Health (CAH) composite score for a repository.",
+    )
+    parser.add_argument(
+        "--repo",
+        type=Path,
+        default=Path.cwd(),
+        help="Path to the repository to score (default: current directory).",
+    )
+    parser.add_argument(
+        "--config",
+        type=Path,
+        default=None,
+        help="Explicit config file path (default: <repo>/.agentrepocoach.toml).",
+    )
+    parser.add_argument(
+        "--language",
+        type=str,
+        default=None,
+        help="Override language detection (csharp|python|auto).",
+    )
+    parser.add_argument("--json", type=Path, help="Write full JSON result to this path.")
+    parser.add_argument("--prometheus", type=Path, help="Write Prometheus metrics to this path.")
+    parser.add_argument("--comment", type=Path, help="Write a PR-comment markdown file to this path.")
+    parser.add_argument(
+        "--format",
+        choices=["json", "markdown", "both"],
+        default=None,
+        help="Output format when using --output. 'json' writes the full report, "
+             "'markdown' writes a PR-comment summary, 'both' writes both (markdown "
+             "path derived from --output by swapping the extension to .md).",
+    )
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=None,
+        help="Output path for --format. Ignored if --format is not set.",
+    )
+    parser.add_argument("--verbose", action="store_true", help="Print per-sub-component breakdown.")
+    parser.add_argument("--quiet", action="store_true", help="Print only the total score.")
+    parser.add_argument("--version", action="version", version=f"agentrepocoach {VERSION}")
+    return parser
+def main(argv: list[str] | None = None) -> int:
+    """Run the CLI, parse arguments, compute the CAH score, and write outputs."""
+    parser = build_parser()
+    args = parser.parse_args(argv)
+    repo_root = args.repo.resolve()
+    if not repo_root.is_dir():
+        print(f"error: repo path is not a directory: {repo_root}", file=sys.stderr)
+        return 2
+    try:
+        config = load_config(repo_root, config_path=args.config)
+    except ConfigError as exc:
+        print(f"error: {exc}", file=sys.stderr)
+        return 2
+    if args.language:
+        # Replace the config's language field. Dataclass is frozen -> rebuild.
+        from dataclasses import replace as _replace
+        config = _replace(config, language=args.language)
+    try:
+        result = compute_cah(repo_root, config=config)
+    except NoAdapterError as exc:
+        print(f"error: {exc}", file=sys.stderr)
+        return 2
+    if args.quiet:
+        print(f"{result['total']:.2f}")
+    elif args.verbose:
+        print(format_verbose(result))
+    else:
+        print(format_summary(result))
+    if args.json:
+        write_json(result, args.json)
+        if not args.quiet:
+            print(f"\nJSON report written to {args.json}")
+    if args.prometheus:
+        write_prometheus(result, args.prometheus)
+        if not args.quiet:
+            print(f"Prometheus metrics written to {args.prometheus}")
+    if args.comment:
+        write_markdown_comment(result, args.comment)
+        if not args.quiet:
+            print(f"PR comment written to {args.comment}")
+    if args.format and args.output:
+        _write_formatted(result, args.format, args.output, quiet=args.quiet)
+    elif args.format and not args.output:
+        print("error: --format requires --output", file=sys.stderr)
+        return 2
+    return 0
+def _write_formatted(
+    result: dict,
+    fmt: str,
+    output: Path,
+    *,
+    quiet: bool,
+) -> None:
+    """Dispatch --format/--output combinations to the underlying writers."""
+    if fmt == "json":
+        write_json(result, output)
+        if not quiet:
+            print(f"\nJSON report written to {output}")
+        return
+    if fmt == "markdown":
+        write_markdown_comment(result, output)
+        if not quiet:
+            print(f"\nMarkdown report written to {output}")
+        return
+    # fmt == "both"
+    json_path = output
+    markdown_path = output.with_suffix(".md")
+    write_json(result, json_path)
+    write_markdown_comment(result, markdown_path)
+    if not quiet:
+        print(f"\nJSON report written to {json_path}")
+        print(f"Markdown report written to {markdown_path}")
+if __name__ == "__main__":
+    sys.exit(main())

agentrepocoach/components/__init__.py ADDED Viewed

@@ -0,0 +1,27 @@
+"""AgentRepoCoach scoring components.
+Each component returns a dict with ``{"score": float, "total": 100,
+"breakdown": {...}}``. The orchestrator in :mod:`agentrepocoach.compute` combines
+them with weights from config to produce the final composite score.
+File-to-component mapping:
+- ``documentation.py``       -> ``navigability`` (AGENTS.md, codebase map, CLI manifest, root hygiene)
+- ``error_quality.py``       -> ``error_quality``
+- ``decision_queryability.py`` -> ``decision_queryability``
+- ``test_quality.py``        -> ``test_quality``
+- ``module_hygiene.py``      -> ``module_hygiene``
+"""
+from .decision_queryability import compute_decision_queryability
+from .documentation import compute_navigability
+from .error_quality import compute_error_quality
+from .module_hygiene import compute_module_hygiene
+from .test_quality import compute_test_quality
+__all__ = [
+    "compute_decision_queryability",
+    "compute_error_quality",
+    "compute_module_hygiene",
+    "compute_navigability",
+    "compute_test_quality",
+]

agentrepocoach/components/decision_queryability.py ADDED Viewed

@@ -0,0 +1,192 @@
+"""Decision queryability component.
+Scores how easily an AI agent can discover *why* the code is the way it is:
+- 60 pts: ADR catalog has enough entries with valid frontmatter.
+- 40 pts: inline references in source code resolve to an ADR body or filename.
+The original research included a third sub-score (MCP tool availability)
+worth 30 pts, but that sub-score required importing a proprietary internal
+MCP server module at score-compute time. It has been **dropped** for the
+public tool; the 30 pts were reallocated: adr_catalog 40 -> 60, and
+inline_ref_resolution 30 -> 40. Total still sums to 100.
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+from typing import Any
+from ..adapters import LanguageAdapter
+from ..config import Config
+from ..scoring import scale_linear
+_ADR_COUNT_WEIGHT = 60
+_REF_RESOLVE_WEIGHT = 40
+_REF_FULL_PCT = 90.0
+def compute_decision_queryability(repo_root: Path, config: Config, adapter: LanguageAdapter) -> dict[str, Any]:
+    """Score ADR catalog health + inline-ref resolution."""
+    adr = _score_adr_catalog(repo_root, config)
+    refs = _score_inline_ref_resolution(repo_root, config, adapter)
+    total = adr["score"] + refs["score"]
+    return {
+        "score": round(total, 2),
+        "total": 100,
+        "breakdown": {
+            "adr_catalog": adr,
+            "inline_ref_resolution": refs,
+        },
+    }
+def _score_adr_catalog(repo_root: Path, config: Config) -> dict[str, Any]:
+    """60 pts: enough ADRs under the configured ADR dir, with valid frontmatter."""
+    adr_dir = repo_root / config.paths.adr_dir
+    if not adr_dir.is_dir():
+        return {
+            "score": 0,
+            "max": _ADR_COUNT_WEIGHT,
+            "count": 0,
+            "valid_count": 0,
+        }
+    files = [p for p in sorted(adr_dir.glob("*.md")) if p.name.lower() != "readme.md"]
+    valid = 0
+    for path in files:
+        try:
+            text = path.read_text(encoding="utf-8")
+        except OSError:
+            continue
+        if _has_valid_frontmatter(text):
+            valid += 1
+    score = scale_linear(
+        valid,
+        zero_at=0,
+        full_at=config.thresholds.adr_min_count,
+        max_pts=_ADR_COUNT_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _ADR_COUNT_WEIGHT,
+        "count": len(files),
+        "valid_count": valid,
+    }
+def _has_valid_frontmatter(text: str) -> bool:
+    """Return True if ``text`` begins with a --- fence and parses an id: key."""
+    if not text.startswith("---"):
+        return False
+    lines = text.splitlines()
+    if len(lines) < 2 or lines[0] != "---":
+        return False
+    for i in range(1, min(len(lines), 40)):
+        if lines[i] == "---":
+            break
+        if lines[i].strip().lower().startswith("id:"):
+            return True
+    return False
+def _score_inline_ref_resolution(
+    repo_root: Path,
+    config: Config,
+    adapter: LanguageAdapter,
+) -> dict[str, Any]:
+    """40 pts: % of unique inline refs in production code that resolve to an ADR."""
+    patterns = _compile_inline_ref_patterns(config.decision_queryability.inline_ref_patterns)
+    if not patterns:
+        return {
+            "score": _REF_RESOLVE_WEIGHT,
+            "max": _REF_RESOLVE_WEIGHT,
+            "note": "no inline ref patterns configured",
+        }
+    production_files = adapter.find_production_files(repo_root)
+    refs = _extract_refs(production_files, patterns)
+    if not refs:
+        return {
+            "score": _REF_RESOLVE_WEIGHT,
+            "max": _REF_RESOLVE_WEIGHT,
+            "total_refs": 0,
+            "resolved_refs": 0,
+            "resolve_pct": 100.0,
+            "note": "no inline refs found",
+        }
+    resolved, unresolved = _resolve_refs_against_adrs(refs, repo_root, config)
+    pct = 100.0 * resolved / len(refs)
+    score = scale_linear(pct, zero_at=0.0, full_at=_REF_FULL_PCT, max_pts=_REF_RESOLVE_WEIGHT)
+    return {
+        "score": round(score, 2),
+        "max": _REF_RESOLVE_WEIGHT,
+        "total_refs": len(refs),
+        "resolved_refs": resolved,
+        "resolve_pct": round(pct, 2),
+        "unresolved_sample": unresolved[:10],
+    }
+def _compile_inline_ref_patterns(patterns: tuple[str, ...]) -> list[re.Pattern[str]]:
+    """Compile config-provided pattern strings with word-boundary anchoring."""
+    compiled: list[re.Pattern[str]] = []
+    for raw in patterns:
+        # Wrap in word boundaries if the user did not already supply them.
+        anchored = raw if raw.startswith("\\b") else rf"\b{raw}\b"
+        try:
+            compiled.append(re.compile(anchored, re.IGNORECASE))
+        except re.error:
+            # Malformed regex -> skip silently; the user sees it in --verbose.
+            continue
+    return compiled
+def _extract_refs(files: list[Path], patterns: list[re.Pattern[str]]) -> set[str]:
+    refs: set[str] = set()
+    for path in files:
+        try:
+            text = path.read_text(encoding="utf-8", errors="ignore")
+        except OSError:
+            continue
+        for pattern in patterns:
+            for match in pattern.finditer(text):
+                token = re.sub(r"\s+", " ", match.group(0)).upper()
+                refs.add(token)
+    return refs
+def _resolve_refs_against_adrs(
+    refs: set[str],
+    repo_root: Path,
+    config: Config,
+) -> tuple[int, list[str]]:
+    adr_dir = repo_root / config.paths.adr_dir
+    adr_bodies: list[str] = []
+    adr_filenames: list[str] = []
+    if adr_dir.is_dir():
+        for path in adr_dir.glob("*.md"):
+            if path.name.lower() == "readme.md":
+                continue
+            try:
+                adr_bodies.append(path.read_text(encoding="utf-8", errors="ignore").lower())
+                adr_filenames.append(path.name.lower())
+            except OSError:
+                continue
+    resolved = 0
+    unresolved: list[str] = []
+    for ref in sorted(refs):
+        needle = ref.lower()
+        if any(needle in body for body in adr_bodies):
+            resolved += 1
+            continue
+        if any(needle in name for name in adr_filenames):
+            resolved += 1
+            continue
+        unresolved.append(ref)
+    return resolved, unresolved

agentrepocoach/components/documentation.py ADDED Viewed

@@ -0,0 +1,205 @@
+"""Navigability component ('documentation' file in the package).
+Scores the agent navigability layer — the docs and entry points an AI agent
+reads first when opening an unfamiliar repo:
+- 30 pts: ``AGENTS.md`` exists and links to the codebase map, CLI manifest, and ADR dir.
+- 30 pts: ``docs/codebase-map.md`` exists and mentions every production module.
+- 20 pts: ``docs/cli-manifest.json`` exists, is fresh, and has enough commands.
+- 20 pts: Root directory is free of stale artifacts.
+All paths and thresholds are configurable via ``.agentrepocoach.toml``.
+"""
+from __future__ import annotations
+import json
+import re
+from pathlib import Path
+from typing import Any
+from ..adapters import LanguageAdapter
+from ..config import Config
+from ..scoring import file_mtime_age_days, scale_linear
+_AGENTS_MD_WEIGHT = 30
+_CODEBASE_MAP_WEIGHT = 30
+_CLI_MANIFEST_WEIGHT = 20
+_ROOT_CLEAN_WEIGHT = 20
+_STALE_ARTIFACT_PATTERNS = (
+    re.compile(r".*\.json$"),
+    re.compile(r".*-results\..*"),
+    re.compile(r".*-backup\..*"),
+    re.compile(r".*\.bak$"),
+)
+def compute_navigability(repo_root: Path, config: Config, adapter: LanguageAdapter) -> dict[str, Any]:
+    """Score the agent navigability layer."""
+    agents = _score_agents_md(repo_root, config)
+    codebase_map = _score_codebase_map(repo_root, config, adapter)
+    cli_manifest = _score_cli_manifest(repo_root, config)
+    root_cleanliness = _score_root_cleanliness(repo_root, config)
+    total = (
+        agents["score"]
+        + codebase_map["score"]
+        + cli_manifest["score"]
+        + root_cleanliness["score"]
+    )
+    return {
+        "score": round(total, 2),
+        "total": 100,
+        "breakdown": {
+            "agents_md": agents,
+            "codebase_map": codebase_map,
+            "cli_manifest": cli_manifest,
+            "root_cleanliness": root_cleanliness,
+        },
+    }
+def _score_agents_md(repo_root: Path, config: Config) -> dict[str, Any]:
+    """30 pts: AGENTS.md exists AND links to map, manifest, and ADR dir."""
+    path = repo_root / config.paths.agents_md
+    required = [
+        config.paths.codebase_map,
+        config.paths.cli_manifest,
+        config.paths.adr_dir.rstrip("/"),
+    ]
+    if not path.is_file():
+        return {
+            "score": 0,
+            "max": _AGENTS_MD_WEIGHT,
+            "exists": False,
+            "missing_links": required,
+        }
+    text = path.read_text(encoding="utf-8", errors="ignore")
+    missing = [link for link in required if link not in text]
+    if missing:
+        partial = 10 + (len(required) - len(missing)) / len(required) * 20
+        return {
+            "score": round(partial, 2),
+            "max": _AGENTS_MD_WEIGHT,
+            "exists": True,
+            "missing_links": missing,
+        }
+    return {
+        "score": _AGENTS_MD_WEIGHT,
+        "max": _AGENTS_MD_WEIGHT,
+        "exists": True,
+        "missing_links": [],
+    }
+def _score_codebase_map(
+    repo_root: Path,
+    config: Config,
+    adapter: LanguageAdapter,
+) -> dict[str, Any]:
+    """30 pts: codebase map exists AND mentions every production module."""
+    path = repo_root / config.paths.codebase_map
+    required_modules = adapter.find_production_modules(repo_root)
+    total_modules = len(required_modules)
+    if not path.is_file():
+        return {
+            "score": 0,
+            "max": _CODEBASE_MAP_WEIGHT,
+            "exists": False,
+            "matched_projects": 0,
+            "total_projects": total_modules,
+        }
+    if total_modules == 0:
+        # Nothing to check -> give full credit, noting the adapter found no
+        # modules (which the module_hygiene component will also reflect).
+        return {
+            "score": _CODEBASE_MAP_WEIGHT,
+            "max": _CODEBASE_MAP_WEIGHT,
+            "exists": True,
+            "matched_projects": 0,
+            "total_projects": 0,
+            "note": "no production modules discovered",
+        }
+    text = path.read_text(encoding="utf-8", errors="ignore")
+    matched = sum(1 for name in required_modules if name in text)
+    ratio = matched / total_modules
+    score = round(ratio * _CODEBASE_MAP_WEIGHT, 2)
+    return {
+        "score": score,
+        "max": _CODEBASE_MAP_WEIGHT,
+        "exists": True,
+        "matched_projects": matched,
+        "total_projects": total_modules,
+    }
+def _score_cli_manifest(repo_root: Path, config: Config) -> dict[str, Any]:
+    """20 pts: manifest exists, is fresh, and has enough commands."""
+    path = repo_root / config.paths.cli_manifest
+    if not path.is_file():
+        return {"score": 0, "max": _CLI_MANIFEST_WEIGHT, "exists": False}
+    try:
+        data = json.loads(path.read_text(encoding="utf-8"))
+    except (OSError, json.JSONDecodeError) as exc:
+        return {
+            "score": 0,
+            "max": _CLI_MANIFEST_WEIGHT,
+            "exists": True,
+            "parse_error": str(exc),
+        }
+    command_count = len(data.get("commands", []) or [])
+    age_days = file_mtime_age_days(path)
+    thresholds = config.thresholds
+    if age_days <= thresholds.cli_manifest_fresh_days:
+        freshness_pts = float(_CLI_MANIFEST_WEIGHT)
+    elif age_days <= thresholds.cli_manifest_stale_days:
+        freshness_pts = _CLI_MANIFEST_WEIGHT / 2.0
+    else:
+        freshness_pts = 0.0
+    if command_count < thresholds.cli_manifest_min_commands:
+        freshness_pts /= 2.0
+    return {
+        "score": round(freshness_pts, 2),
+        "max": _CLI_MANIFEST_WEIGHT,
+        "exists": True,
+        "age_days": round(age_days, 2),
+        "command_count": command_count,
+    }
+def _score_root_cleanliness(repo_root: Path, config: Config) -> dict[str, Any]:
+    """20 pts: no stale artifacts in the repo root."""
+    allowlist = set(config.root_allowlist)
+    violations: list[str] = []
+    for entry in sorted(repo_root.iterdir()):
+        if entry.is_dir():
+            continue
+        name = entry.name
+        if name in allowlist:
+            continue
+        for pattern in _STALE_ARTIFACT_PATTERNS:
+            if pattern.match(name):
+                violations.append(name)
+                break
+    count = len(violations)
+    score = scale_linear(
+        count,
+        zero_at=config.thresholds.root_stale_max_penalty_count,
+        full_at=0,
+        max_pts=_ROOT_CLEAN_WEIGHT,
+    )
+    return {
+        "score": round(score, 2),
+        "max": _ROOT_CLEAN_WEIGHT,
+        "violation_count": count,
+        "violations": violations[:10],
+    }