PyPI - python-code-quality - Versions diffs - 0.1.16__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

python-code-quality 0.1.16py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

py_cq/__init__.py +3 -4
py_cq/api.py +248 -0
py_cq/cli.py +216 -90
py_cq/config/config.toml +95 -0
py_cq/context_hash.py +18 -8
py_cq/execution_engine.py +191 -26
py_cq/language_detector.py +4 -1
py_cq/llm_formatter.py +200 -18
py_cq/localtypes.py +53 -7
py_cq/parsers/__init__.py +1 -1
py_cq/parsers/banditparser.py +42 -19
py_cq/parsers/common.py +184 -15
py_cq/parsers/compileparser.py +9 -4
py_cq/parsers/complexityparser.py +38 -9
py_cq/parsers/coverageparser.py +184 -70
py_cq/parsers/exitcodeparser.py +11 -2
py_cq/parsers/halsteadparser.py +41 -20
py_cq/parsers/interrogateparser.py +261 -25
py_cq/parsers/linecountparser.py +10 -2
py_cq/parsers/maintainabilityparser.py +32 -9
py_cq/parsers/pytestparser.py +77 -20
py_cq/parsers/regexcountparser.py +13 -3
py_cq/parsers/ruffparser.py +160 -16
py_cq/parsers/typarser.py +175 -43
py_cq/parsers/vultureparser.py +22 -16
py_cq/table_formatter.py +16 -2
py_cq/tool_registry.py +7 -6
{python_code_quality-0.1.16.dist-info → python_code_quality-0.2.2.dist-info}/METADATA +88 -3
python_code_quality-0.2.2.dist-info/RECORD +35 -0
{python_code_quality-0.1.16.dist-info → python_code_quality-0.2.2.dist-info}/WHEEL +1 -1
py_cq/config/config.yaml +0 -94
python_code_quality-0.1.16.dist-info/RECORD +0 -34
{python_code_quality-0.1.16.dist-info → python_code_quality-0.2.2.dist-info}/entry_points.txt +0 -0

py_cq/parsers/banditparser.py CHANGED Viewed

@@ -7,9 +7,16 @@ logistic-variant score stored under the ``security`` metric key.
 """
 import json
+import logging
 from py_cq.localtypes import AbstractParser, RawResult, ToolResult
-from py_cq.parsers.common import format_source_context, score_logistic_variant
+from py_cq.parsers.common import (
+    extract_first_issue,
+    format_source_context,
+    score_logistic_variant,
+)
+log = logging.getLogger("cq")
 _SEVERITY_WEIGHT = {"HIGH": 5, "MEDIUM": 2, "LOW": 1}
@@ -18,12 +25,26 @@ class BanditParser(AbstractParser):
     """Parses raw JSON output from ``bandit -f json`` into a ToolResult."""
     def parse(self, raw_result: RawResult) -> ToolResult:
+        """Parses the raw bandit JSON output into a ToolResult."""
         try:
             data = json.loads(raw_result.stdout)
         except (json.JSONDecodeError, ValueError):
-            return ToolResult(raw=raw_result, metrics={"security": 1.0})
+            log.warning(
+                "bandit output is not valid JSON (return_code=%s). Reporting degraded score.",
+                raw_result.return_code,
+            )
+            degraded = 0.0 if raw_result.return_code != 0 else 0.5
+            return ToolResult(raw=raw_result, metrics={"security": degraded})
         if not isinstance(data, dict):
-            return ToolResult(raw=raw_result, metrics={"security": 1.0})
+            log.warning("bandit output is not a JSON object. Reporting degraded score.")
+            return ToolResult(raw=raw_result, metrics={"security": 0.5})
+        totals = data.get("metrics", {}).get("_totals", {})
+        log.debug(
+            "bandit scanned %d LOC across %d files",
+            totals.get("loc", 0),
+            len(data.get("metrics", {})) - 1,
+        )
         files: dict[str, list] = {}
         weighted = 0
@@ -32,29 +53,31 @@ class BanditParser(AbstractParser):
             if "/.venv/" in path or "/site-packages/" in path:
                 continue
             severity = issue.get("issue_severity", "LOW")
-            files.setdefault(path, []).append({
-                "line": issue.get("line_number", 0),
-                "code": issue.get("test_id", ""),
-                "severity": severity,
-                "confidence": issue.get("issue_confidence", ""),
-                "message": issue.get("issue_text", ""),
-            })
+            files.setdefault(path, []).append(
+                {
+                    "line": issue.get("line_number", 0),
+                    "code": issue.get("test_id", ""),
+                    "severity": severity,
+                    "confidence": issue.get("issue_confidence", ""),
+                    "message": issue.get("issue_text", ""),
+                }
+            )
             weighted += _SEVERITY_WEIGHT.get(severity, 1)
         score = score_logistic_variant(weighted, scale_factor=10)
         return ToolResult(raw=raw_result, metrics={"security": score}, details=files)
-    def format_llm_message(self, tr: ToolResult, *, context_lines: int = 15) -> str:
-        if not tr.details:
-            return "bandit reported issues (no details available)"
-        file, issues = next(iter(tr.details.items()))
-        if not isinstance(issues, list) or not issues:
-            return "bandit reported issues (no details available)"
-        issue = issues[0]
-        if not isinstance(issue, dict):
+    def format_llm_message(
+        self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
+    ) -> str:
+        """Formats the bandit result into a LLM-friendly message string."""
+        result = extract_first_issue(tr.details)
+        if result is None:
             return "bandit reported issues (no details available)"
+        file, issue = result
         line = issue.get("line", "?")
         code = issue.get("code", "")
         severity = issue.get("severity", "")
         message = issue.get("message", "")
-        return f"`{file}:{line}` — **{code}** [{severity}]: {message}{format_source_context(file, line, count=context_lines)}"
+        return f"{file}:{line} - {code}: [{severity}] {message}{format_source_context(file, line, count=context_lines)}"

py_cq/parsers/common.py CHANGED Viewed

@@ -12,6 +12,7 @@ performance metrics or error scores:
 Both functions return a float and can be used directly in downstream analytics,
 visualisation or decision-making pipelines."""
+import json
 import re
 from pathlib import Path
@@ -19,14 +20,18 @@ from pathlib import Path
 def read_source_lines(file_path: str, line: int, count: int = 5) -> str:
     """Return up to `count` source lines starting at the given 1-based line number."""
     try:
-        all_lines = Path(file_path).read_text(encoding="utf-8").splitlines()
+        all_lines = (
+            Path(file_path).read_text(encoding="utf-8", errors="replace").splitlines()
+        )
         start = max(0, line - 1)
         return "\n".join(all_lines[start : start + count])
     except (OSError, ValueError):
         return ""
-def format_source_context(file: str, line: int | str, context: int = 3, count: int = 8) -> str:
+def format_source_context(
+    file: str, line: int | str, context: int = 3, count: int = 8
+) -> str:
     """Return a fenced python code block for the source around `line`, or '' if unavailable.
     Stops before spilling into the next top-level ``def`` or ``class`` definition.
@@ -51,12 +56,39 @@ def format_source_context(file: str, line: int | str, context: int = 3, count: i
     return f"\n```python\n{src}\n```"
-_PYTHON_KEYWORDS = frozenset([
-    "if", "elif", "else", "for", "while", "with", "assert", "return",
-    "raise", "import", "from", "class", "def", "lambda", "yield",
-    "del", "pass", "break", "continue", "not", "and", "or", "in", "is",
-    "print", "super", "type", "len", "range",
-])
+_PYTHON_KEYWORDS = frozenset(
+    [
+        "if",
+        "elif",
+        "else",
+        "for",
+        "while",
+        "with",
+        "assert",
+        "return",
+        "raise",
+        "import",
+        "from",
+        "class",
+        "def",
+        "lambda",
+        "yield",
+        "del",
+        "pass",
+        "break",
+        "continue",
+        "not",
+        "and",
+        "or",
+        "in",
+        "is",
+        "print",
+        "super",
+        "type",
+        "len",
+        "range",
+    ]
+)
 def extract_callee_name(source_line: str) -> str | None:
@@ -71,7 +103,7 @@ def extract_callee_name(source_line: str) -> str | None:
     if "=" in stripped and not stripped.startswith(("assert", "return")):
         rhs = stripped.split("=", 1)[1].strip()
     m = re.search(r"\b([a-zA-Z_]\w*)\s*\(", rhs)
-    if m and m.group(1) not in _PYTHON_KEYWORDS:
+    if m and m.group(1) not in _PYTHON_KEYWORDS and len(m.group(1)) > 1:
         return m.group(1)
     return None
@@ -89,7 +121,21 @@ def _find_project_root(hint_file: str) -> Path:
     return root
-def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tuple[str, str]:
+_SKIP_DIRS = {
+    ".venv",
+    "venv",
+    "__pycache__",
+    ".git",
+    "node_modules",
+    ".tox",
+    "dist",
+    "build",
+}
+def find_in_project(
+    func_name: str, hint_file: str, max_lines: int = 10
+) -> tuple[str, str]:
     """Find func_name definition in project files; same file first, then project-wide.
     Returns ``(file_path, code_block)`` for the first match, or ``("", "")`` if not found.
@@ -99,6 +145,8 @@ def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tupl
         return hint_file, result
     root = _find_project_root(hint_file)
     for py_file in sorted(root.rglob("*.py")):
+        if any(part in _SKIP_DIRS for part in py_file.parts):
+            continue
         if py_file.resolve() == Path(hint_file).resolve():
             continue
         r = find_function_source(str(py_file), func_name, max_lines=max_lines)
@@ -108,11 +156,20 @@ def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tupl
 def _relative_path(path: str) -> str:
-    """Return path relative to cwd, normalised to forward slashes."""
+    """Return path relative to project root if possible, otherwise absolute. Forward slashes."""
     try:
-        return str(Path(path).relative_to(Path.cwd())).replace("\\", "/")
-    except ValueError:
+        resolved = Path(path).resolve()
+    except (OSError, ValueError):
         return path.replace("\\", "/")
+    try:
+        return resolved.relative_to(_find_project_root(path)).as_posix()
+    except ValueError:
+        return resolved.as_posix()
+def format_issue_header(file: str, line: int, code: str, message: str) -> str:
+    """Return a clean single-line issue header: path:line - CODE: message."""
+    return f"{_relative_path(file)}:{line} - {code}: {message}"
 def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -> str:
@@ -120,7 +177,7 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
     Output format::
-        Callee `func_name` — `relative/path/to/file.py`
+        Callee `func_name` - `relative/path/to/file.py`
         ```python
         N: def func_name(...):
         ...
@@ -134,6 +191,60 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
     return f"\n`{func_name}` is defined at: `{_relative_path(callee_file)}{line_ref}`{code_block}"
+def enclosing_function_range(file: str, line: int) -> tuple[int, int] | None:
+    """Return (start_line, end_line) 1-based for the function enclosing `line`, or None."""
+    try:
+        all_lines = Path(file).read_text(encoding="utf-8").splitlines()
+    except (OSError, ValueError):
+        return None
+    if line < 1 or line > len(all_lines):
+        return None
+    target_idx = line - 1
+    target_indent = len(all_lines[target_idx]) - len(all_lines[target_idx].lstrip())
+    def_re = re.compile(r"^(\s*)(?:async\s+)?def\s+")
+    start_idx = baseline_indent = None
+    for i in range(target_idx - 1, -1, -1):
+        m = def_re.match(all_lines[i])
+        if m:
+            indent = len(m.group(1))
+            if indent < target_indent:
+                start_idx, baseline_indent = i, indent
+                break
+    if start_idx is None or baseline_indent is None:
+        return None
+    end_idx = start_idx
+    in_body = ":" in all_lines[start_idx].split("#")[0]
+    for i, ln in enumerate(all_lines[start_idx + 1 :], start=start_idx + 1):
+        stripped = ln.lstrip()
+        if not in_body:
+            if ":" in ln.split("#")[0]:
+                in_body = True
+            end_idx = i
+        else:
+            if stripped and len(ln) - len(stripped) <= baseline_indent:
+                break
+            end_idx = i
+    return (start_idx + 1, end_idx + 1)
+def find_enclosing_function(file: str, line: int, max_lines: int = 50) -> str:
+    """Return a fenced python block for the function enclosing 1-based `line`, or '' if not found."""
+    r = enclosing_function_range(file, line)
+    if r is None:
+        return ""
+    start_line, end_line = r
+    try:
+        all_lines = Path(file).read_text(encoding="utf-8").splitlines()
+    except (OSError, ValueError):
+        return ""
+    start_idx = start_line - 1
+    collected = list(all_lines[start_idx : min(end_line, start_idx + max_lines)])
+    while collected and not collected[-1].strip():
+        collected.pop()
+    numbered = "\n".join(f"{start_line + i}: {ln}" for i, ln in enumerate(collected))
+    return f"\n```python\n{numbered}\n```"
 def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
     """Return a fenced python block for the body of func_name, or '' if unavailable."""
     try:
@@ -151,11 +262,30 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
         return ""
     start_idx, baseline_indent = match_result
     collected = [all_lines[start_idx]]
+    in_docstring = False
+    docstring_marker: str | None = None
+    past_docstring = False
     for line in all_lines[start_idx + 1 :]:
         stripped = line.lstrip()
         indent = len(line) - len(stripped)
         if stripped and indent <= baseline_indent:
             break
+        if not past_docstring:
+            if not in_docstring:
+                quote = next(
+                    (q for q in ('"""', "'''") if stripped.startswith(q)), None
+                )
+                if quote:
+                    in_docstring = quote not in stripped[3:]
+                    past_docstring = not in_docstring
+                    docstring_marker = quote
+                    continue
+                past_docstring = bool(stripped)
+            else:
+                if docstring_marker and docstring_marker in stripped:
+                    in_docstring = False
+                    past_docstring = True
+                continue
         collected.append(line)
         if len(collected) >= max_lines:
             break
@@ -165,8 +295,47 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
     return f"\n```python\n{numbered}\n```"
+def resolve_path(base: str, rel_file: str) -> str:
+    """Return (base / rel_file) as a posix string; return rel_file unchanged if absolute or base is empty."""
+    if not base or not rel_file:
+        return rel_file
+    try:
+        p = Path(rel_file)
+        if p.is_absolute():
+            return rel_file
+        return (Path(base) / rel_file).as_posix()
+    except (OSError, ValueError):
+        return rel_file
+def parse_json_dict(stdout: str) -> dict | None:
+    """Parse stdout as a JSON object; return None if invalid or not a dict."""
+    try:
+        data = json.loads(stdout)
+    except (json.JSONDecodeError, ValueError):
+        return None
+    return data if isinstance(data, dict) else None
+def extract_first_issue(details: dict) -> tuple[str, dict] | None:
+    """Return (file, issue) from the first list-typed entry in details, or None."""
+    if not details:
+        return None
+    file, issues = next(iter(details.items()))
+    if not isinstance(issues, list) or not issues:
+        return None
+    issue = issues[0]
+    return (file, issue) if isinstance(issue, dict) else None
 def inv_normalize(value: float, max_value: float) -> float:
-    """Returns the inverse normalized value of `value` relative to `max_value`."""
+    """Returns the inverse normalized value of `value` relative to `max_value`.
+    When *max_value* is zero the result is defined as 1.0 (no deviation from a
+    zero-sized reference).
+    """
+    if max_value == 0:
+        return 1.0
     return (max_value - min(value, max_value)) / max_value

py_cq/parsers/compileparser.py CHANGED Viewed

@@ -26,7 +26,7 @@ class CompileParser(AbstractParser):
         events and error messages. For each file that emits an error, it extracts
         the line number, source snippet, error type, and help text, normalizes the
         file path, and stores this information in a dictionary keyed by file path.
-        It then computes a failure ratio (failed files ÷ total compilations) and
+        It then computes a failure ratio (failed files / total compilations) and
         derives a compile score via ``score_logistic_variant``.  The original
         ``stdout`` is cleaned of ``Listing`` lines and back-slash path separators
         are replaced with forward slashes.  A ``ToolResult`` containing the raw
@@ -122,7 +122,9 @@ class CompileParser(AbstractParser):
             tr.details["failed_files"] = failed_files
         return tr
-    def format_llm_message(self, tr: ToolResult, *, context_lines: int = 15) -> str:
+    def format_llm_message(
+        self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
+    ) -> str:
         """Return the first compilation failure as a defect description."""
         failed = tr.details.get("failed_files", {})
         if not failed:
@@ -131,12 +133,15 @@ class CompileParser(AbstractParser):
         line = info.get("line", "?")
         typ = info.get("type", "Error")
         help_msg = info.get("help", "")
-        code_block = format_source_context(file, line, count=context_lines) or (f"\n```python\n{info['src']}\n```" if info.get("src") else "")
+        code_block = format_source_context(file, line, count=context_lines) or (
+            f"\n```python\n{info['src']}\n```" if info.get("src") else ""
+        )
         callee = ""
         src_line = info.get("src", "")
         if src_line:
             from py_cq.parsers.common import extract_callee_name, format_callee_context
             func_name = extract_callee_name(src_line)
             if func_name:
                 callee = format_callee_context(func_name, file)
-        return f"`{file}:{line}` — **{typ}**: {help_msg}{code_block}{callee}"
+        return f"{file}:{line} - {typ}: {help_msg}{code_block}{callee}"

py_cq/parsers/complexityparser.py CHANGED Viewed

@@ -1,9 +1,11 @@
 """Provides a `ComplexityParser` that converts raw complexity-analysis output into structured `ToolResult` objects for downstream use."""
-import json
 from py_cq.localtypes import AbstractParser, RawResult, ToolResult
-from py_cq.parsers.common import score_logistic_variant
+from py_cq.parsers.common import (
+    find_function_source,
+    parse_json_dict,
+    score_logistic_variant,
+)
 class ComplexityParser(AbstractParser):
@@ -64,12 +66,8 @@ class ComplexityParser(AbstractParser):
             >>> result.metrics["simplicity"]
             0.4"""
         tr = ToolResult(raw=raw_result)
-        try:
-            data = json.loads(raw_result.stdout)
-        except (json.JSONDecodeError, ValueError):
-            tr.metrics["simplicity"] = 0.0
-            return tr
-        if not isinstance(data, dict):
+        data = parse_json_dict(raw_result.stdout)
+        if data is None:
             tr.metrics["simplicity"] = 0.0
             return tr
         score = 0
@@ -91,3 +89,34 @@ class ComplexityParser(AbstractParser):
                 }
         tr.metrics["simplicity"] = score / num_items if num_items > 0 else 0.0
         return tr
+    def format_llm_message(
+        self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
+    ) -> str:
+        """Formats the LLM message based on the tool result."""
+        worst_file = worst_func = worst_rank = None
+        worst_score = 1.0
+        for file, funcs in tr.details.items():
+            if not isinstance(funcs, dict):
+                continue
+            for func_name, data in funcs.items():
+                score = data.get("simplicity", 1.0)
+                if score < worst_score:
+                    worst_score = score
+                    worst_file = file
+                    worst_func = func_name
+                    worst_rank = data.get("rank", "F")
+        if worst_file is None or worst_func is None:
+            if tr.metrics:
+                metric_name, value = next(iter(tr.metrics.items()))
+                return f"**{metric_name}** score: {value:.3f}"
+            return "No complexity details available"
+        source = find_function_source(worst_file, worst_func, max_lines=context_lines)
+        header = f"`{worst_file}::{worst_func}` - cyclomatic complexity rank **{worst_rank}**"
+        parts = [header]
+        if source:
+            parts.append(source)
+        parts.append(
+            "Cyclomatic complexity is too high. Break this function into smaller, single-purpose helpers."
+        )
+        return "\n\n".join(parts)

python-code-quality 0.1.16__py3-none-any.whl → 0.2.2__py3-none-any.whl

python-code-quality 0.1.16py3-none-any.whl → 0.2.2py3-none-any.whl