PyPI - agentrepocoach - Versions diffs - 0.2.0__py3-none-any.whl - Mend

agentrepocoach 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

agentrepocoach/__init__.py +14 -0
agentrepocoach/__main__.py +4 -0
agentrepocoach/adapters/__init__.py +64 -0
agentrepocoach/adapters/base.py +195 -0
agentrepocoach/adapters/csharp.py +419 -0
agentrepocoach/adapters/go.py +283 -0
agentrepocoach/adapters/python.py +244 -0
agentrepocoach/adapters/rust.py +304 -0
agentrepocoach/adapters/typescript.py +351 -0
agentrepocoach/cli.py +155 -0
agentrepocoach/components/__init__.py +27 -0
agentrepocoach/components/decision_queryability.py +192 -0
agentrepocoach/components/documentation.py +205 -0
agentrepocoach/components/error_quality.py +162 -0
agentrepocoach/components/module_hygiene.py +175 -0
agentrepocoach/components/test_quality.py +179 -0
agentrepocoach/compute.py +84 -0
agentrepocoach/config.py +263 -0
agentrepocoach/output.py +267 -0
agentrepocoach/scoring.py +34 -0
agentrepocoach-0.2.0.dist-info/METADATA +202 -0
agentrepocoach-0.2.0.dist-info/RECORD +26 -0
agentrepocoach-0.2.0.dist-info/WHEEL +5 -0
agentrepocoach-0.2.0.dist-info/entry_points.txt +2 -0
agentrepocoach-0.2.0.dist-info/licenses/LICENSE +202 -0
agentrepocoach-0.2.0.dist-info/top_level.txt +1 -0

agentrepocoach/__init__.py ADDED Viewed

@@ -0,0 +1,14 @@
+"""AgentRepoCoach — Codebase Agent Health (CAH) composite score.
+Public entry points:
+    from agentrepocoach import compute_cah, VERSION
+    result = compute_cah(Path("/path/to/repo"))
+"""
+from __future__ import annotations
+from .compute import compute_cah
+VERSION = "0.2.0"
+__all__ = ["compute_cah", "VERSION"]

agentrepocoach/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+"""Enable ``python -m agentrepocoach``."""
+from .cli import main
+raise SystemExit(main())  # See cli.py:main() for argument parsing

agentrepocoach/adapters/__init__.py ADDED Viewed

@@ -0,0 +1,64 @@
+"""Adapter registry and language detection."""
+from __future__ import annotations
+from pathlib import Path
+from .base import Declaration, LanguageAdapter, NotSupportedError, ThrowSite
+from .csharp import CSharpAdapter
+from .go import GoAdapter
+from .python import PythonAdapter
+from .rust import RustAdapter
+from .typescript import TypeScriptAdapter
+_REGISTRY: dict[str, type[LanguageAdapter]] = {
+    "csharp": CSharpAdapter,
+    "python": PythonAdapter,
+    "typescript": TypeScriptAdapter,
+    "rust": RustAdapter,
+    "go": GoAdapter,
+}
+class NoAdapterError(RuntimeError):
+    """Raised when no adapter can handle the repository."""
+def get_adapter_by_name(name: str) -> LanguageAdapter:
+    """Instantiate an adapter by its registered name."""
+    if name not in _REGISTRY:
+        supported = ", ".join(sorted(_REGISTRY))
+        msg = f"Unknown adapter '{name}'. Supported: {supported}."
+        raise NoAdapterError(f"{msg} Check spelling or use --language to specify one of: {supported}.")
+    return _REGISTRY[name]()
+def detect_primary(repo_path: Path) -> LanguageAdapter:
+    """Try every adapter and return the one with the highest detect() confidence."""
+    candidates: list[tuple[float, LanguageAdapter]] = []
+    for cls in _REGISTRY.values():
+        adapter = cls()
+        confidence = adapter.detect(repo_path)
+        if confidence > 0.0:
+            candidates.append((confidence, adapter))
+    if not candidates:
+        supported = ", ".join(sorted(_REGISTRY))
+        msg = f"No supported language detected in {repo_path}. Supported: {supported}."
+        raise NoAdapterError(f"{msg} Try using --language to force an adapter, or check that the repo contains a recognized project file.")
+    candidates.sort(key=lambda pair: pair[0], reverse=True)
+    return candidates[0][1]
+__all__ = [
+    "CSharpAdapter",
+    "Declaration",
+    "GoAdapter",
+    "LanguageAdapter",
+    "NoAdapterError",
+    "NotSupportedError",
+    "PythonAdapter",
+    "RustAdapter",
+    "ThrowSite",
+    "TypeScriptAdapter",
+    "detect_primary",
+    "get_adapter_by_name",
+]

agentrepocoach/adapters/base.py ADDED Viewed

@@ -0,0 +1,195 @@
+"""Language adapter abstract base class.
+Every supported language contributes one concrete adapter subclass with a
+single-file footprint. The base class declares the 9 methods components need
+to compute the Codebase Agent Health (CAH) score.
+Adapters are language-neutral contracts. No component should contain a
+language-specific regex or file-extension check; that belongs here.
+"""
+from __future__ import annotations
+import os
+import re
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable
+@dataclass(frozen=True)
+class ThrowSite:
+    """A language-neutral throw/raise site descriptor."""
+    file: Path
+    line: int
+    exception_type: str
+    has_fix_hint: bool
+    is_user_defined: bool
+    is_generic: bool
+@dataclass(frozen=True)
+class Declaration:
+    """A top-level declaration (class/struct/function) with visibility info."""
+    file: Path
+    line: int
+    name: str
+    visibility: str  # "public" | "internal" | "private"
+    has_doc_comment: bool
+class NotSupportedError(NotImplementedError):
+    """Raised by stub adapters that detect the language but cannot analyze it."""
+class LanguageAdapter(ABC):
+    """Abstract language adapter. One concrete implementation per language."""
+    name: str = "base"
+    # ------- Detection -------
+    @abstractmethod
+    def detect(self, repo_path: Path) -> float:
+        """Return a 0.0-1.0 confidence score that this adapter applies."""
+    # ------- File discovery -------
+    @abstractmethod
+    def find_production_files(self, repo_path: Path) -> list[Path]:
+        """All source files for production modules, filtered for generated/build artifacts."""
+    @abstractmethod
+    def find_test_files(self, repo_path: Path) -> list[Path]:
+        """All source files under the repo's test directory convention."""
+    @abstractmethod
+    def find_production_modules(self, repo_path: Path) -> list[str]:
+        """Logical module names (projects/packages) used by navigability's codebase_map check."""
+    # ------- Throw-site analysis (error_quality) -------
+    @abstractmethod
+    def scan_throw_sites(
+        self,
+        files: Iterable[Path],
+        hint_marker: str,
+        domain_exception_types: set[str],
+    ) -> list[ThrowSite]:
+        """Find every throw/raise and classify it."""
+    @abstractmethod
+    def generic_exception_names(self) -> set[str]:
+        """Language-stdlib exception types considered 'too generic'."""
+    # ------- Declarations (module_hygiene) -------
+    @abstractmethod
+    def scan_declarations(self, files: Iterable[Path]) -> list[Declaration]:
+        """Find every top-level declaration with visibility and doc-comment flag."""
+    # ------- Test-method analysis (test_quality) -------
+    @abstractmethod
+    def find_test_methods(self, files: Iterable[Path]) -> list[tuple[Path, str]]:
+        """Return list of (file, method_name) for every test method."""
+    @abstractmethod
+    def test_naming_pattern(self) -> re.Pattern[str]:
+        """Regex matching the idiomatic test-method naming convention."""
+# ---------------------------------------------------------------------------
+# Safe file iteration helpers — shared by all adapters.
+# ---------------------------------------------------------------------------
+def iter_source_files(root: Path, suffixes: tuple[str, ...], exclude_substrings: tuple[str, ...] = (), exclude_suffixes: tuple[str, ...] = (), follow_symlinks: bool = False, max_file_bytes: int = 10_485_760) -> list[Path]:
+    """Walk ``root`` and return files matching ``suffixes``.
+    Hardened against three threat-model risks:
+    1. Symlink traversal — ``follow_symlinks=False`` by default.
+    2. Large-file OOM — files over ``max_file_bytes`` are skipped.
+    3. Path injection via resolved-outside-root — only entries under ``root``
+       after ``os.walk`` are returned.
+    """
+    results: list[Path] = []
+    if not root.is_dir():
+        return results
+    for dirpath, dirnames, filenames in os.walk(root, followlinks=follow_symlinks):
+        # Prune excluded directories in-place so os.walk does not descend.
+        dirnames[:] = [d for d in dirnames if not _is_excluded_segment(d)]
+        for filename in filenames:
+            if not any(filename.endswith(sfx) for sfx in suffixes):
+                continue
+            if exclude_suffixes and any(filename.endswith(sfx) for sfx in exclude_suffixes):
+                continue
+            path_str = os.path.join(dirpath, filename)
+            if exclude_substrings and any(needle in path_str for needle in exclude_substrings):
+                continue
+            path = Path(path_str)
+            if not follow_symlinks and path.is_symlink():
+                continue
+            try:
+                if path.stat().st_size > max_file_bytes:
+                    continue
+            except OSError:
+                continue
+            results.append(path)
+    return results
+# Default directories to prune during iteration. Covers common build / cache
+# directories across all languages.
+_EXCLUDED_SEGMENTS: frozenset[str] = frozenset({
+    ".git",
+    ".hg",
+    ".svn",
+    "node_modules",
+    "vendor",
+    "third_party",
+    "bin",
+    "obj",
+    "__pycache__",
+    ".venv",
+    "venv",
+    ".tox",
+    ".mypy_cache",
+    ".pytest_cache",
+    ".ruff_cache",
+    "dist",
+    "build",
+    "target",
+})
+def _is_excluded_segment(name: str) -> bool:
+    return name in _EXCLUDED_SEGMENTS
+def read_text_safely(path: Path, max_bytes: int = 10_485_760) -> str:
+    """Read a file as UTF-8 with errors='ignore'. Returns '' on failure."""
+    try:
+        if path.stat().st_size > max_bytes:
+            return ""
+    except OSError:
+        return ""
+    try:
+        return path.read_text(encoding="utf-8", errors="ignore")
+    except OSError:
+        return ""
+def count_file_loc(path: Path, max_bytes: int = 10_485_760) -> int:
+    """Count lines in ``path`` safely, returning 0 on error."""
+    try:
+        if path.stat().st_size > max_bytes:
+            return 0
+    except OSError:
+        return 0
+    try:
+        with path.open(encoding="utf-8", errors="ignore") as handle:
+            return sum(1 for _ in handle)
+    except OSError:
+        return 0

agentrepocoach/adapters/csharp.py ADDED Viewed

@@ -0,0 +1,419 @@
+"""C# language adapter.
+Auto-discovers production modules by scanning ``*.csproj`` files at the repo
+root and mapping each project's containing directory to a production source
+tree. Tests directories are detected by conventional naming (``*.Tests``,
+``*Test``) and by ``*Tests.csproj`` filename patterns.
+Throw-site scanning is ported from the methodology research throw-site
+extractor — a minimal C# tokenizer that walks ``throw new <Name>Exception(``
+sites, extracts the message argument text, and classifies each site by:
+- ``exception_type`` — the C# type name raised
+- ``has_fix_hint`` — does the message contain the configured hint marker
+- ``is_user_defined`` — is the type declared inside the repo (user exception)
+- ``is_generic`` — is the type one of the language-stdlib "too generic" names
+"""
+from __future__ import annotations
+import re
+from pathlib import Path
+from typing import Iterable
+from .base import (
+    Declaration,
+    LanguageAdapter,
+    ThrowSite,
+    count_file_loc,
+    iter_source_files,
+    read_text_safely,
+)
+# Source-file patterns.
+_CSHARP_SUFFIX: tuple[str, ...] = (".cs",)
+_CSHARP_EXCLUDE_SUFFIXES: tuple[str, ...] = (".Designer.cs", ".g.cs", ".g.i.cs")
+_CSHARP_EXCLUDE_PATH_SUBSTRINGS: tuple[str, ...] = ("/bin/", "/obj/")
+# Throw-site scanning.
+_THROW_PATTERN = re.compile(r"\bthrow\s+new\s+([A-Za-z_][A-Za-z0-9_]*Exception)\s*\(")
+# Declaration scanning.
+_PUBLIC_DECL_PATTERN = re.compile(
+    r"\bpublic\s+(?:sealed\s+|abstract\s+|static\s+|partial\s+)*"
+    r"(?:class|interface|record|enum|struct)\s+(\w+)",
+)
+_INTERNAL_DECL_PATTERN = re.compile(
+    r"\binternal\s+(?:sealed\s+|abstract\s+|static\s+|partial\s+)*"
+    r"(?:class|interface|record|enum|struct)\s+(\w+)",
+)
+_PRIVATE_DECL_PATTERN = re.compile(
+    r"\bprivate\s+(?:sealed\s+|abstract\s+|static\s+|partial\s+)*"
+    r"(?:class|interface|record|enum|struct)\s+(\w+)",
+)
+# Test-method naming conventions (xUnit / NUnit / MSTest Method_Scenario_Expected).
+_TEST_METHOD_PATTERN = re.compile(r"\bpublic\s+(?:async\s+)?(?:Task|void)\s+(\w+)\s*\(")
+_MSE_NAMING_PATTERN = re.compile(r"^[A-Za-z][A-Za-z0-9]*_[A-Za-z0-9]+_[A-Za-z0-9]+")
+# Keywords that indicate an actionable fix hint in an error message. These are
+# intentionally simple substrings (case-insensitive) so adapters do not need a
+# full natural-language parser.
+_FIX_HINT_WORD_KEYWORDS: tuple[str, ...] = (
+    "run",
+    "use",
+    "try",
+    "check",
+    "see",
+    "set",
+    "add",
+    "install",
+    "provide",
+    "ensure",
+    "verify",
+    "enable",
+    "configure",
+    "register",
+    "retry",
+    "rerun",
+)
+_FIX_HINT_SUBSTRING_KEYWORDS: tuple[str, ...] = (
+    "did you mean",
+    "available:",
+    "expected:",
+    "allowed:",
+    "supported:",
+    "valid:",
+    "valid values:",
+    "matches:",
+    "hint:",
+    "fix:",
+    "example:",
+    "to fix",
+    "to resolve",
+    "suggested fix",
+    "environment variable",
+    ".md",
+    ".json",
+    ".cs",
+)
+# Language-stdlib exception types considered "too generic" for good agent UX.
+_GENERIC_EXCEPTION_NAMES: frozenset[str] = frozenset({
+    "Exception",
+    "SystemException",
+    "InvalidOperationException",
+    "ApplicationException",
+})
+class CSharpAdapter(LanguageAdapter):
+    """C# / .NET adapter. MVP implementation."""
+    name = "csharp"
+    # ------------------------------------------------------------------
+    # Detection
+    # ------------------------------------------------------------------
+    def detect(self, repo_path: Path) -> float:
+        """1.0 if any *.sln, 0.8 if any *.csproj, else 0.0."""
+        if any(repo_path.rglob("*.sln")):
+            return 1.0
+        if any(repo_path.rglob("*.csproj")):
+            return 0.8
+        return 0.0
+    # ------------------------------------------------------------------
+    # File discovery
+    # ------------------------------------------------------------------
+    def find_production_files(self, repo_path: Path) -> list[Path]:
+        """Return all production *.cs files, skipping bin/obj/generated files."""
+        project_dirs = self._find_production_project_dirs(repo_path)
+        results: list[Path] = []
+        for proj_dir in project_dirs:
+            results.extend(self._iter_cs_files(proj_dir))
+        return results
+    def find_test_files(self, repo_path: Path) -> list[Path]:
+        """Return all test *.cs files (projects whose name matches test conventions)."""
+        project_dirs = self._find_test_project_dirs(repo_path)
+        results: list[Path] = []
+        for proj_dir in project_dirs:
+            results.extend(self._iter_cs_files(proj_dir))
+        return results
+    def find_production_modules(self, repo_path: Path) -> list[str]:
+        """Return logical project names for every production *.csproj."""
+        names: list[str] = []
+        for proj_path in self._iter_csproj_files(repo_path):
+            if self._looks_like_test_project(proj_path):
+                continue
+            names.append(proj_path.stem)
+        return sorted(set(names))
+    def _iter_csproj_files(self, repo_path: Path) -> list[Path]:
+        return [
+            p for p in repo_path.rglob("*.csproj")
+            if "/bin/" not in str(p) and "/obj/" not in str(p)
+        ]
+    def _find_production_project_dirs(self, repo_path: Path) -> list[Path]:
+        return [
+            proj.parent for proj in self._iter_csproj_files(repo_path)
+            if not self._looks_like_test_project(proj)
+        ]
+    def _find_test_project_dirs(self, repo_path: Path) -> list[Path]:
+        return [
+            proj.parent for proj in self._iter_csproj_files(repo_path)
+            if self._looks_like_test_project(proj)
+        ]
+    @staticmethod
+    def _looks_like_test_project(csproj_path: Path) -> bool:
+        name = csproj_path.stem.lower()
+        return name.endswith(".tests") or name.endswith("tests") or name.endswith(".test")
+    def _iter_cs_files(self, dir_path: Path) -> list[Path]:
+        return iter_source_files(
+            dir_path,
+            suffixes=_CSHARP_SUFFIX,
+            exclude_substrings=_CSHARP_EXCLUDE_PATH_SUBSTRINGS,
+            exclude_suffixes=_CSHARP_EXCLUDE_SUFFIXES,
+        )
+    # ------------------------------------------------------------------
+    # Throw-site analysis
+    # ------------------------------------------------------------------
+    def scan_throw_sites(
+        self,
+        files: Iterable[Path],
+        hint_marker: str,
+        domain_exception_types: set[str],
+    ) -> list[ThrowSite]:
+        """Scan every file for ``throw new X(...)`` sites."""
+        sites: list[ThrowSite] = []
+        for path in files:
+            text = read_text_safely(path)
+            if not text:
+                continue
+            sites.extend(
+                self._scan_throw_sites_in_text(
+                    path,
+                    text,
+                    hint_marker,
+                    domain_exception_types,
+                )
+            )
+        return sites
+    def _scan_throw_sites_in_text(
+        self,
+        path: Path,
+        text: str,
+        hint_marker: str,
+        domain_exception_types: set[str],
+    ) -> list[ThrowSite]:
+        results: list[ThrowSite] = []
+        for match in _THROW_PATTERN.finditer(text):
+            exception_type = match.group(1)
+            paren_start = match.end() - 1
+            args_text, _ = _extract_throw_message(text, paren_start)
+            line_no = text.count("\n", 0, match.start()) + 1
+            results.append(
+                ThrowSite(
+                    file=path,
+                    line=line_no,
+                    exception_type=exception_type,
+                    has_fix_hint=_has_fix_hint(args_text, hint_marker),
+                    is_user_defined=exception_type in domain_exception_types,
+                    is_generic=exception_type in _GENERIC_EXCEPTION_NAMES,
+                )
+            )
+        return results
+    def generic_exception_names(self) -> set[str]:
+        return set(_GENERIC_EXCEPTION_NAMES)
+    # ------------------------------------------------------------------
+    # Declarations
+    # ------------------------------------------------------------------
+    def scan_declarations(self, files: Iterable[Path]) -> list[Declaration]:
+        declarations: list[Declaration] = []
+        for path in files:
+            text = read_text_safely(path)
+            if not text:
+                continue
+            declarations.extend(self._scan_declarations_in_text(path, text))
+        return declarations
+    def _scan_declarations_in_text(self, path: Path, text: str) -> list[Declaration]:
+        lines = text.splitlines()
+        results: list[Declaration] = []
+        for i, line in enumerate(lines):
+            visibility = _declaration_visibility(line)
+            if visibility is None:
+                continue
+            name_match = _declaration_name(line, visibility)
+            if name_match is None:
+                continue
+            has_doc = _has_preceding_xml_doc(lines, i)
+            results.append(
+                Declaration(
+                    file=path,
+                    line=i + 1,
+                    name=name_match,
+                    visibility=visibility,
+                    has_doc_comment=has_doc,
+                )
+            )
+        return results
+    # ------------------------------------------------------------------
+    # Test methods
+    # ------------------------------------------------------------------
+    def find_test_methods(self, files: Iterable[Path]) -> list[tuple[Path, str]]:
+        results: list[tuple[Path, str]] = []
+        for path in files:
+            text = read_text_safely(path)
+            if not text:
+                continue
+            for match in _TEST_METHOD_PATTERN.finditer(text):
+                results.append((path, match.group(1)))
+        return results
+    def test_naming_pattern(self) -> re.Pattern[str]:
+        return _MSE_NAMING_PATTERN
+# ---------------------------------------------------------------------------
+# Module-level helpers (kept outside the class so they can be unit-tested
+# without instantiating the adapter).
+# ---------------------------------------------------------------------------
+def _extract_throw_message(source: str, start: int) -> tuple[str, int]:
+    """Walk from the opening paren at ``start`` to the matching close-paren.
+    Handles nested parens, string literals (including verbatim @"..." and
+    escaped), and line comments. Returns the argument text and the index
+    just past the closing paren. This is a minimal C# tokenizer good enough
+    for throw-site message extraction.
+    """
+    depth = 1
+    i = start + 1
+    n = len(source)
+    buf: list[str] = []
+    while i < n and depth > 0:
+        ch = source[i]
+        # String literals.
+        if ch == '"':
+            buf.append(ch)
+            i += 1
+            is_verbatim = i >= 2 and source[i - 2] == "@"
+            while i < n:
+                cur = source[i]
+                buf.append(cur)
+                if is_verbatim:
+                    if cur == '"':
+                        if i + 1 < n and source[i + 1] == '"':
+                            buf.append(source[i + 1])
+                            i += 2
+                            continue
+                        i += 1
+                        break
+                    i += 1
+                else:
+                    if cur == "\\" and i + 1 < n:
+                        buf.append(source[i + 1])
+                        i += 2
+                        continue
+                    if cur == '"':
+                        i += 1
+                        break
+                    i += 1
+            continue
+        # Line comments.
+        if ch == "/" and i + 1 < n and source[i + 1] == "/":
+            while i < n and source[i] != "\n":
+                i += 1
+            continue
+        if ch == "(":
+            depth += 1
+        elif ch == ")":
+            depth -= 1
+            if depth == 0:
+                return "".join(buf), i + 1
+        buf.append(ch)
+        i += 1
+    return "".join(buf), i
+def _has_fix_hint(message_text: str, hint_marker: str) -> bool:
+    """Return True if the message text contains any fix-hint signal.
+    The configurable ``hint_marker`` (e.g. "Suggested fix:") is matched first.
+    If absent, a small set of generic action-verb and substring keywords is
+    checked.
+    """
+    lower = message_text.lower()
+    if hint_marker and hint_marker.lower() in lower:
+        return True
+    for substring in _FIX_HINT_SUBSTRING_KEYWORDS:
+        if substring in lower:
+            return True
+    # Word-bounded action verbs.
+    for verb in _FIX_HINT_WORD_KEYWORDS:
+        if re.search(rf"\b{re.escape(verb)}\b", lower):
+            return True
+    return False
+def _declaration_visibility(line: str) -> str | None:
+    if _PUBLIC_DECL_PATTERN.search(line):
+        return "public"
+    if _INTERNAL_DECL_PATTERN.search(line):
+        return "internal"
+    if _PRIVATE_DECL_PATTERN.search(line):
+        return "private"
+    return None
+def _declaration_name(line: str, visibility: str) -> str | None:
+    patterns = {
+        "public": _PUBLIC_DECL_PATTERN,
+        "internal": _INTERNAL_DECL_PATTERN,
+        "private": _PRIVATE_DECL_PATTERN,
+    }
+    match = patterns[visibility].search(line)
+    return match.group(1) if match else None
+def _has_preceding_xml_doc(lines: list[str], index: int) -> bool:
+    """Walk backwards past attributes / blank lines. Return True if the first
+    non-attribute / non-blank line above ``index`` starts with ``///``.
+    """
+    j = index - 1
+    while j >= 0:
+        stripped = lines[j].strip()
+        if stripped.startswith("[") and stripped.endswith("]"):
+            j -= 1
+            continue
+        if stripped == "":
+            j -= 1
+            continue
+        return stripped.startswith("///")
+    return False
+# Expose count_file_loc for components to use via adapter.
+CSharpAdapter.count_file_loc = staticmethod(count_file_loc)  # type: ignore[attr-defined]