python-code-quality 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_cq/__init__.py +10 -0
- py_cq/cli.py +229 -0
- py_cq/config/__init__.py +27 -0
- py_cq/config/tools.yaml +97 -0
- py_cq/context_hash.py +81 -0
- py_cq/execution_engine.py +160 -0
- py_cq/llm_formatter.py +47 -0
- py_cq/localtypes.py +135 -0
- py_cq/main.py +12 -0
- py_cq/metric_aggregator.py +14 -0
- py_cq/parsers/__init__.py +0 -0
- py_cq/parsers/banditparser.py +52 -0
- py_cq/parsers/common.py +87 -0
- py_cq/parsers/compileparser.py +134 -0
- py_cq/parsers/complexityparser.py +86 -0
- py_cq/parsers/coverageparser.py +88 -0
- py_cq/parsers/halsteadparser.py +174 -0
- py_cq/parsers/interrogateparser.py +58 -0
- py_cq/parsers/maintainabilityparser.py +63 -0
- py_cq/parsers/pytestparser.py +81 -0
- py_cq/parsers/ruffparser.py +61 -0
- py_cq/parsers/typarser.py +65 -0
- py_cq/parsers/vultureparser.py +48 -0
- py_cq/py.typed +0 -0
- py_cq/storage.py +27 -0
- py_cq/tool_registry.py +36 -0
- python_code_quality-0.1.4.dist-info/METADATA +188 -0
- python_code_quality-0.1.4.dist-info/RECORD +31 -0
- python_code_quality-0.1.4.dist-info/WHEEL +4 -0
- python_code_quality-0.1.4.dist-info/entry_points.txt +2 -0
- python_code_quality-0.1.4.dist-info/licenses/LICENSE +21 -0
py_cq/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
"""Provides a simple greeting function that returns a friendly message.
|
|
2
|
+
|
|
3
|
+
The module defines a single function, `hello`, which returns the string
|
|
4
|
+
`'Hello from py_cq!'`. It can serve as a minimal example, placeholder, or
|
|
5
|
+
testing stub in larger applications."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def hello() -> str:
|
|
9
|
+
"""Returns the greeting string `'Hello from py_cq!'`."""
|
|
10
|
+
return "Hello from py_cq!"
|
py_cq/cli.py
ADDED
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
"""CLI for static analysis of Python projects.
|
|
2
|
+
|
|
3
|
+
Provides a Typer command `check` that accepts a path to a Python file or project
|
|
4
|
+
directory, executes a suite of static analysis tools, aggregates their
|
|
5
|
+
results, and outputs the data either as JSON or as a human-readable Rich
|
|
6
|
+
table. The command supports configurable logging, cache clearing,
|
|
7
|
+
score-only output, and optional parallel execution to accelerate
|
|
8
|
+
analysis.
|
|
9
|
+
|
|
10
|
+
Helper functions such as `format_as_table` convert the aggregated tool
|
|
11
|
+
results into a Rich Table for convenient console display.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import copy
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import tomllib
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import typer
|
|
22
|
+
from rich.console import Console
|
|
23
|
+
from rich.logging import RichHandler
|
|
24
|
+
from rich.table import Table
|
|
25
|
+
|
|
26
|
+
from py_cq.config import DEFAULT_STORAGE_FILE, load_user_config
|
|
27
|
+
from py_cq.execution_engine import _cache as tool_cache
|
|
28
|
+
from py_cq.execution_engine import run_tools
|
|
29
|
+
from py_cq.localtypes import CombinedToolResults, ToolConfig
|
|
30
|
+
from py_cq.metric_aggregator import aggregate_metrics
|
|
31
|
+
from py_cq.storage import save_result
|
|
32
|
+
from py_cq.tool_registry import tool_registry
|
|
33
|
+
|
|
34
|
+
logging.basicConfig(
|
|
35
|
+
level="INFO",
|
|
36
|
+
format="%(message)s",
|
|
37
|
+
datefmt="[%X]",
|
|
38
|
+
handlers=[RichHandler(markup=True)],
|
|
39
|
+
)
|
|
40
|
+
log = logging.getLogger("cq")
|
|
41
|
+
app = typer.Typer(
|
|
42
|
+
epilog=(
|
|
43
|
+
"Examples:\n\n"
|
|
44
|
+
" cq check . # full table with all metrics (default)\n\n"
|
|
45
|
+
" cq check . -o llm # top defect as markdown (primary LLM workflow)\n\n"
|
|
46
|
+
" cq check . -o score # numeric score only\n\n"
|
|
47
|
+
" cq config . # show effective tool configuration"
|
|
48
|
+
),
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _apply_user_config(base: dict[str, ToolConfig], user_cfg: dict) -> dict[str, ToolConfig]:
|
|
53
|
+
"""Return a modified copy of base with user overrides applied.
|
|
54
|
+
|
|
55
|
+
Supports:
|
|
56
|
+
- ``disable``: list of tool IDs to remove
|
|
57
|
+
- ``thresholds.<tool_id>.warning`` / ``.error``: override per-tool thresholds
|
|
58
|
+
"""
|
|
59
|
+
registry = {k: copy.copy(v) for k, v in base.items()}
|
|
60
|
+
for tool_id in user_cfg.get("disable", []):
|
|
61
|
+
registry.pop(tool_id, None)
|
|
62
|
+
for tool_id, thresholds in user_cfg.get("thresholds", {}).items():
|
|
63
|
+
if tool_id in registry:
|
|
64
|
+
if "warning" in thresholds:
|
|
65
|
+
registry[tool_id].warning_threshold = float(thresholds["warning"])
|
|
66
|
+
if "error" in thresholds:
|
|
67
|
+
registry[tool_id].error_threshold = float(thresholds["error"])
|
|
68
|
+
return registry
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class OutputMode(str, Enum):
|
|
72
|
+
"""Enum of output types."""
|
|
73
|
+
TABLE = "table"
|
|
74
|
+
SCORE = "score"
|
|
75
|
+
JSON = "json"
|
|
76
|
+
LLM = "llm"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@app.callback()
|
|
80
|
+
def callback():
|
|
81
|
+
"""CQ - Code Quality Analysis Tool."""
|
|
82
|
+
console = Console()
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@app.command()
|
|
86
|
+
def check(
|
|
87
|
+
path: str = typer.Argument(".", help="Path to Python file or project directory"),
|
|
88
|
+
output: OutputMode = typer.Option(
|
|
89
|
+
OutputMode.TABLE, "--output", "-o", help="Output mode: table (default), score, json, llm"
|
|
90
|
+
),
|
|
91
|
+
log_level: str = typer.Option(
|
|
92
|
+
"CRITICAL",
|
|
93
|
+
"--log-level",
|
|
94
|
+
help="Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)",
|
|
95
|
+
),
|
|
96
|
+
out_file: str = typer.Option(
|
|
97
|
+
DEFAULT_STORAGE_FILE,
|
|
98
|
+
"--out-file",
|
|
99
|
+
help="File path to save results in table mode",
|
|
100
|
+
),
|
|
101
|
+
clear_cache: bool = typer.Option(
|
|
102
|
+
False, "--clear-cache", help="Clear cached tool results before running"
|
|
103
|
+
),
|
|
104
|
+
workers: int = typer.Option(
|
|
105
|
+
0, "--workers", help="Max parallel workers (default: one per tool, use 1 for sequential)"
|
|
106
|
+
),
|
|
107
|
+
):
|
|
108
|
+
"""Run static analysis on a Python file or project directory."""
|
|
109
|
+
path_obj = Path(path)
|
|
110
|
+
if not path_obj.exists():
|
|
111
|
+
raise typer.BadParameter(f"Path does not exist: {path}")
|
|
112
|
+
if path_obj.is_file():
|
|
113
|
+
if path_obj.suffix != ".py":
|
|
114
|
+
raise typer.BadParameter(f"File must be a Python file (.py): {path}")
|
|
115
|
+
elif path_obj.is_dir():
|
|
116
|
+
if not (path_obj / "pyproject.toml").exists():
|
|
117
|
+
raise typer.BadParameter(f"Directory must contain pyproject.toml: {path}")
|
|
118
|
+
log.setLevel(log_level)
|
|
119
|
+
effective_registry = _apply_user_config(tool_registry, load_user_config(path_obj))
|
|
120
|
+
if clear_cache:
|
|
121
|
+
tool_cache.clear()
|
|
122
|
+
tool_results = run_tools(effective_registry.values(), path, workers)
|
|
123
|
+
for tr in tool_results:
|
|
124
|
+
log.debug(json.dumps(tr.to_dict(), indent=2))
|
|
125
|
+
combined_metrics = aggregate_metrics(path=path, metrics=tool_results)
|
|
126
|
+
if output == OutputMode.SCORE:
|
|
127
|
+
console.print(combined_metrics.score)
|
|
128
|
+
elif output == OutputMode.JSON:
|
|
129
|
+
console.print(json.dumps(combined_metrics.to_dict(), indent=2))
|
|
130
|
+
elif output == OutputMode.LLM:
|
|
131
|
+
log.setLevel("CRITICAL")
|
|
132
|
+
from py_cq.llm_formatter import format_for_llm
|
|
133
|
+
console.print(format_for_llm(effective_registry, combined_metrics))
|
|
134
|
+
else:
|
|
135
|
+
save_result(combined_tool_results=combined_metrics, file_name=out_file)
|
|
136
|
+
console.print(format_as_table(combined_metrics, effective_registry))
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@app.command()
|
|
140
|
+
def config(
|
|
141
|
+
path: str = typer.Argument(".", help="Path to Python file or project directory"),
|
|
142
|
+
) -> None:
|
|
143
|
+
"""Show the effective tool configuration for a project."""
|
|
144
|
+
path_obj = Path(path).resolve()
|
|
145
|
+
toml_path = (
|
|
146
|
+
path_obj.parent / "pyproject.toml"
|
|
147
|
+
if path_obj.is_file()
|
|
148
|
+
else path_obj / "pyproject.toml"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if not toml_path.exists():
|
|
152
|
+
status_text = "[yellow]file not found[/yellow]"
|
|
153
|
+
user_cfg: dict = {}
|
|
154
|
+
else:
|
|
155
|
+
with toml_path.open("rb") as f:
|
|
156
|
+
toml_data = tomllib.load(f)
|
|
157
|
+
cq_section = toml_data.get("tool", {}).get("cq")
|
|
158
|
+
if cq_section is None:
|
|
159
|
+
status_text = "[yellow]no [tool.cq] section[/yellow]"
|
|
160
|
+
user_cfg = {}
|
|
161
|
+
else:
|
|
162
|
+
status_text = "[green]merged from [tool.cq][/green]"
|
|
163
|
+
user_cfg = cq_section
|
|
164
|
+
|
|
165
|
+
console.print(f"Config: [bold]{toml_path}[/bold] ({status_text})\n")
|
|
166
|
+
|
|
167
|
+
effective_registry = _apply_user_config(tool_registry, user_cfg)
|
|
168
|
+
disabled_ids = set(tool_registry.keys()) - set(effective_registry.keys())
|
|
169
|
+
|
|
170
|
+
table = Table()
|
|
171
|
+
table.add_column("Tool", style="cyan")
|
|
172
|
+
table.add_column("Priority", justify="right")
|
|
173
|
+
table.add_column("Warning", justify="right")
|
|
174
|
+
table.add_column("Error", justify="right")
|
|
175
|
+
table.add_column("Status", justify="center")
|
|
176
|
+
|
|
177
|
+
for tool_id in sorted(tool_registry, key=lambda t: tool_registry[t].priority):
|
|
178
|
+
tc = effective_registry.get(tool_id, tool_registry[tool_id])
|
|
179
|
+
is_disabled = tool_id in disabled_ids
|
|
180
|
+
status = "[red]disabled[/red]" if is_disabled else "[green]enabled[/green]"
|
|
181
|
+
table.add_row(
|
|
182
|
+
tc.name,
|
|
183
|
+
str(tc.priority),
|
|
184
|
+
f"{tc.warning_threshold:.2f}",
|
|
185
|
+
f"{tc.error_threshold:.2f}",
|
|
186
|
+
status,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
console.print(table)
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def format_as_table(data: CombinedToolResults, registry: dict[str, ToolConfig]):
|
|
193
|
+
"""Format combined tool results into a Rich Table.
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
data (CombinedToolResults): Aggregated tool results, including the path,
|
|
197
|
+
individual tool results, and the overall score.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
rich.table.Table: A Rich table with columns ``Tool``, ``Metric``, ``Score`` and
|
|
201
|
+
``Status``. Each metric row displays a status icon based on thresholds from
|
|
202
|
+
the tool's configuration. The table is titled with the data path and ends
|
|
203
|
+
with a row showing the overall score.
|
|
204
|
+
|
|
205
|
+
Example:
|
|
206
|
+
>>> table = format_as_table(combined_results)
|
|
207
|
+
>>> console.print(table)
|
|
208
|
+
"""
|
|
209
|
+
table = Table(title=f"[bold green]{data.path}[/]", width=80)
|
|
210
|
+
table.add_column("Tool", justify="left", no_wrap=True)
|
|
211
|
+
table.add_column("Time", justify="right", style="dim")
|
|
212
|
+
table.add_column("Metric", justify="right", style="cyan", no_wrap=True)
|
|
213
|
+
table.add_column("Score", style="magenta")
|
|
214
|
+
table.add_column("Status")
|
|
215
|
+
for tr in data.tool_results:
|
|
216
|
+
tool_name = tr.raw.tool_name
|
|
217
|
+
config = next((t for t in registry.values() if t.name == tool_name))
|
|
218
|
+
for i, (name, value) in enumerate(tr.metrics.items()):
|
|
219
|
+
status = ""
|
|
220
|
+
if value < config.error_threshold:
|
|
221
|
+
status = "[bold red]Error[/]"
|
|
222
|
+
elif value < config.warning_threshold:
|
|
223
|
+
status = "[yellow]Warning[/]"
|
|
224
|
+
else:
|
|
225
|
+
status = "[green]OK[/]"
|
|
226
|
+
time_str = f"{tr.duration_s:.2f}s" if i == 0 else ""
|
|
227
|
+
table.add_row(tool_name, time_str, name, f"{value:0.3f}", status)
|
|
228
|
+
table.add_row("", "", "[bold]Score[/]", f"[bold]{data.score:0.3f}[/]", "")
|
|
229
|
+
return table
|
py_cq/config/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Default storage path and user config loader."""
|
|
2
|
+
|
|
3
|
+
import tomllib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
DEFAULT_STORAGE_FILE = ".cq.json"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_user_config(project_path: Path) -> dict:
|
|
10
|
+
"""Read [tool.cq] from pyproject.toml at project_path, if present.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
project_path: Path to the project directory or a .py file.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
The contents of [tool.cq] as a plain dict, or {} if absent.
|
|
17
|
+
"""
|
|
18
|
+
toml_path = (
|
|
19
|
+
project_path.parent / "pyproject.toml"
|
|
20
|
+
if project_path.is_file()
|
|
21
|
+
else project_path / "pyproject.toml"
|
|
22
|
+
)
|
|
23
|
+
if not toml_path.exists():
|
|
24
|
+
return {}
|
|
25
|
+
with toml_path.open("rb") as f:
|
|
26
|
+
data = tomllib.load(f)
|
|
27
|
+
return data.get("tool", {}).get("cq", {})
|
py_cq/config/tools.yaml
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
tools:
|
|
2
|
+
|
|
3
|
+
compilation:
|
|
4
|
+
name: "compile"
|
|
5
|
+
command: "{python} -m compileall -r 10 -j 8 {context_path} -x .*venv"
|
|
6
|
+
parser: "CompileParser"
|
|
7
|
+
priority: 1
|
|
8
|
+
warning_threshold: 0.9999
|
|
9
|
+
error_threshold: 0.9999
|
|
10
|
+
|
|
11
|
+
bandit:
|
|
12
|
+
name: "bandit"
|
|
13
|
+
command: "{python} -m bandit -r {context_path} -f json -q -s B101 --severity-level medium --exclude ./.venv,./tests/"
|
|
14
|
+
parser: "BanditParser"
|
|
15
|
+
priority: 2
|
|
16
|
+
warning_threshold: 0.9999
|
|
17
|
+
error_threshold: 0.8
|
|
18
|
+
|
|
19
|
+
ruff:
|
|
20
|
+
name: "ruff"
|
|
21
|
+
command: "{python} -m ruff check --output-format concise --no-cache {context_path}"
|
|
22
|
+
parser: "RuffParser"
|
|
23
|
+
priority: 3
|
|
24
|
+
warning_threshold: 0.9999
|
|
25
|
+
error_threshold: 0.9
|
|
26
|
+
|
|
27
|
+
ty:
|
|
28
|
+
name: "ty"
|
|
29
|
+
command: "{python} -m ty check --output-format concise --color never {context_path}"
|
|
30
|
+
parser: "TyParser"
|
|
31
|
+
priority: 4
|
|
32
|
+
warning_threshold: 0.9999
|
|
33
|
+
error_threshold: 0.8
|
|
34
|
+
run_in_target_env: true
|
|
35
|
+
extra_deps:
|
|
36
|
+
- ty
|
|
37
|
+
|
|
38
|
+
pytest:
|
|
39
|
+
name: "pytest"
|
|
40
|
+
command: "{python} -m pytest -v {context_path}"
|
|
41
|
+
parser: "PytestParser"
|
|
42
|
+
priority: 5
|
|
43
|
+
warning_threshold: 0.7
|
|
44
|
+
error_threshold: 0.5
|
|
45
|
+
run_in_target_env: true
|
|
46
|
+
|
|
47
|
+
coverage:
|
|
48
|
+
name: "coverage"
|
|
49
|
+
command: "{python} -m coverage run -m pytest {context_path} && {python} -m coverage report"
|
|
50
|
+
parser: "CoverageParser"
|
|
51
|
+
priority: 6
|
|
52
|
+
warning_threshold: 0.9
|
|
53
|
+
error_threshold: 0.5
|
|
54
|
+
run_in_target_env: true
|
|
55
|
+
extra_deps:
|
|
56
|
+
- coverage
|
|
57
|
+
- pytest
|
|
58
|
+
|
|
59
|
+
complexity:
|
|
60
|
+
name: "radon cc"
|
|
61
|
+
command: "{python} -m radon cc --json {context_path}"
|
|
62
|
+
parser: "ComplexityParser"
|
|
63
|
+
priority: 7
|
|
64
|
+
warning_threshold: 0.6
|
|
65
|
+
error_threshold: 0.4
|
|
66
|
+
|
|
67
|
+
maintainability:
|
|
68
|
+
name: "radon mi"
|
|
69
|
+
command: "{python} -m radon mi -s --json {context_path}"
|
|
70
|
+
parser: "MaintainabilityParser"
|
|
71
|
+
priority: 8
|
|
72
|
+
warning_threshold: 0.6
|
|
73
|
+
error_threshold: 0.4
|
|
74
|
+
|
|
75
|
+
halstead:
|
|
76
|
+
name: "radon hal"
|
|
77
|
+
command: "{python} -m radon hal -f --json {context_path}"
|
|
78
|
+
parser: "HalsteadParser"
|
|
79
|
+
priority: 9
|
|
80
|
+
warning_threshold: 0.5
|
|
81
|
+
error_threshold: 0.3
|
|
82
|
+
|
|
83
|
+
vulture:
|
|
84
|
+
name: "vulture"
|
|
85
|
+
command: "{python} -m vulture {context_path} --min-confidence 80 --exclude .venv,dist,.*_cache,docs,.git"
|
|
86
|
+
parser: "VultureParser"
|
|
87
|
+
priority: 10
|
|
88
|
+
warning_threshold: 0.9999
|
|
89
|
+
error_threshold: 0.8
|
|
90
|
+
|
|
91
|
+
interrogate:
|
|
92
|
+
name: "interrogate"
|
|
93
|
+
command: "{python} -m interrogate {context_path} -v --fail-under 0"
|
|
94
|
+
parser: "InterrogateParser"
|
|
95
|
+
priority: 11
|
|
96
|
+
warning_threshold: 0.8
|
|
97
|
+
error_threshold: 0.3
|
py_cq/context_hash.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Utilities for computing cryptographic signatures and context hashes.
|
|
2
|
+
|
|
3
|
+
This module offers two lightweight helpers that simplify integrity checks and
|
|
4
|
+
change-detection in larger systems:
|
|
5
|
+
|
|
6
|
+
* **`get_sigs(path)`** - Recursively scans a directory tree and returns a list of
|
|
7
|
+
signature strings for all Python files, ignoring virtual-environment and
|
|
8
|
+
cache directories. Each signature encodes the file path, its size in bytes,
|
|
9
|
+
and its last-modified timestamp (`st_mtime`).
|
|
10
|
+
|
|
11
|
+
* **`get_context_hash(path)`** - Computes an MD5 digest that uniquely identifies
|
|
12
|
+
a file or directory. For a file it hashes its path, size, and modification
|
|
13
|
+
time; for a directory it aggregates the signatures of all contained files.
|
|
14
|
+
|
|
15
|
+
These functions provide deterministic fingerprints that can be used for
|
|
16
|
+
file integrity verification, caching, and change-detection logic.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import hashlib
|
|
20
|
+
import os
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_sigs(path: str):
|
|
24
|
+
"""Recursively scans a directory tree and returns a list of signatures for all Python files.
|
|
25
|
+
|
|
26
|
+
The signature format is ``<file_path>:<size_bytes>:<mtime>`` where ``mtime`` is the
|
|
27
|
+
last modification timestamp retrieved from ``os.stat``. The traversal skips
|
|
28
|
+
directories named ``.venv``, ``venv`` and ``__pycache__``.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
path (str): The root directory to scan.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
list[str]: A signature string for each ``.py`` file found.
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
FileNotFoundError: If ``path`` does not exist.
|
|
38
|
+
PermissionError: If the process cannot access a directory or file.
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> get_sigs('/tmp/project')
|
|
42
|
+
['/tmp/project/main.py:1024:1680000000.0', ...]
|
|
43
|
+
"""
|
|
44
|
+
items = []
|
|
45
|
+
with os.scandir(path) as entries:
|
|
46
|
+
for entry in entries:
|
|
47
|
+
if entry.is_file() and entry.name.endswith(".py"):
|
|
48
|
+
stat_info = entry.stat()
|
|
49
|
+
items.append(f"{entry.path}:{stat_info.st_size}:{stat_info.st_mtime}")
|
|
50
|
+
if entry.is_dir() and entry.name not in [".venv", "venv", "__pycache__"]:
|
|
51
|
+
items.extend(get_sigs(entry.path))
|
|
52
|
+
return items
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_context_hash(path: str):
|
|
56
|
+
"""Compute an MD5 hash that uniquely identifies a file or directory.
|
|
57
|
+
|
|
58
|
+
The hash is derived from a signature string. For a file, the signature consists of
|
|
59
|
+
its path, size, and modification time. For a directory, the signature is the
|
|
60
|
+
concatenation of the signatures of all files within it (recursively).
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
path (str): The filesystem path to hash.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
str: The hexadecimal MD5 digest.
|
|
67
|
+
|
|
68
|
+
Raises:
|
|
69
|
+
OSError: If the file or directory cannot be accessed.
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
>>> get_context_hash('/tmp/example.txt')
|
|
73
|
+
'5d41402abc4b2a76b9719d911017c592'
|
|
74
|
+
"""
|
|
75
|
+
sig = "empty"
|
|
76
|
+
if os.path.isfile(path):
|
|
77
|
+
s = os.stat(path)
|
|
78
|
+
sig = f"{path}:{s.st_size}:{s.st_mtime}"
|
|
79
|
+
elif os.path.isdir(path):
|
|
80
|
+
sig = "".join(get_sigs(path=path))
|
|
81
|
+
return f"{hashlib.md5(sig.encode('utf-8')).hexdigest()}" # nosec
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""Utilities for executing tools and caching their results.
|
|
2
|
+
|
|
3
|
+
This module provides helper functions to run command-line tools while
|
|
4
|
+
automatically caching their output. The key capabilities are:
|
|
5
|
+
|
|
6
|
+
* ``run_tool`` - executes a single tool configuration, captures its
|
|
7
|
+
stdout/stderr/return code, and records a timestamp.
|
|
8
|
+
* ``run_tools`` - runs many tool configurations, optionally in parallel,
|
|
9
|
+
and returns the parsed results.
|
|
10
|
+
|
|
11
|
+
All functions are designed for reuse in data-processing pipelines
|
|
12
|
+
where tool invocations may be expensive and should be avoided
|
|
13
|
+
when a cached result already exists."""
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
import shutil
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
from collections.abc import Collection
|
|
21
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import cast
|
|
24
|
+
|
|
25
|
+
import diskcache
|
|
26
|
+
|
|
27
|
+
from py_cq.context_hash import get_context_hash
|
|
28
|
+
from py_cq.localtypes import RawResult, ToolConfig, ToolResult
|
|
29
|
+
|
|
30
|
+
log = logging.getLogger("cq")
|
|
31
|
+
|
|
32
|
+
_cache = diskcache.Cache(Path.home() / ".cache" / "cq")
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _find_project_root(path: Path) -> Path | None:
|
|
36
|
+
"""Walk up from path to find the nearest directory containing pyproject.toml."""
|
|
37
|
+
for parent in [path] + list(path.parents):
|
|
38
|
+
candidate = parent if parent.is_dir() else parent.parent
|
|
39
|
+
if (candidate / "pyproject.toml").exists():
|
|
40
|
+
return candidate
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def run_tool(tool_config: ToolConfig, context_path: str) -> RawResult:
|
|
45
|
+
"""Runs a tool defined by its configuration and returns the execution result.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
tool_config (ToolConfig): Configuration object containing the tool's name and a
|
|
49
|
+
command template.
|
|
50
|
+
context_path (str): Filesystem path that will be substituted into the command
|
|
51
|
+
template via ``context_path`` formatting.
|
|
52
|
+
|
|
53
|
+
Returns:
|
|
54
|
+
RawResult: An object holding the tool name, the command that was executed,
|
|
55
|
+
standard output, standard error, the process return code, and a timestamp
|
|
56
|
+
of when the command finished.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> result = run_tool(my_tool_config, "/tmp/context")
|
|
60
|
+
>>> result.return_code
|
|
61
|
+
0"""
|
|
62
|
+
python = sys.executable
|
|
63
|
+
path = context_path
|
|
64
|
+
if tool_config.run_in_target_env:
|
|
65
|
+
uv = shutil.which("uv")
|
|
66
|
+
if uv:
|
|
67
|
+
resolved = Path(context_path).resolve()
|
|
68
|
+
if resolved.is_dir():
|
|
69
|
+
abs_dir = str(resolved)
|
|
70
|
+
path = "."
|
|
71
|
+
else:
|
|
72
|
+
project_root = _find_project_root(resolved)
|
|
73
|
+
abs_dir = str(project_root) if project_root else str(resolved.parent)
|
|
74
|
+
path = str(resolved)
|
|
75
|
+
with_flags = " ".join(f"--with {dep}" for dep in tool_config.extra_deps)
|
|
76
|
+
python = f'"{uv}" run --directory "{abs_dir}" {with_flags}'.rstrip()
|
|
77
|
+
command = tool_config.command.format(context_path=path, python=python)
|
|
78
|
+
cache_key = f"{command}:{get_context_hash(context_path)}"
|
|
79
|
+
if cache_key in _cache:
|
|
80
|
+
log.info(f"Cache hit: {command}")
|
|
81
|
+
return cast(RawResult, _cache[cache_key])
|
|
82
|
+
log.info(f"Running: {command}")
|
|
83
|
+
result = subprocess.run(command, capture_output=True, text=True, shell=True) # nosec
|
|
84
|
+
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
85
|
+
raw_result = RawResult(
|
|
86
|
+
tool_name=tool_config.name,
|
|
87
|
+
command=command,
|
|
88
|
+
stdout=result.stdout,
|
|
89
|
+
stderr=result.stderr,
|
|
90
|
+
return_code=result.returncode,
|
|
91
|
+
timestamp=timestamp,
|
|
92
|
+
)
|
|
93
|
+
_cache[cache_key] = raw_result
|
|
94
|
+
return raw_result
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def run_tools(tool_configs: Collection[ToolConfig], path: str, max_workers: int = 0) -> list[ToolResult]:
|
|
98
|
+
"""Run multiple tools and return their parsed results.
|
|
99
|
+
|
|
100
|
+
Runs each tool specified in *tool_configs* on the file or directory at
|
|
101
|
+
*path*. Each tool is executed through :func:`run_tool`, and its output is
|
|
102
|
+
parsed by the tool's ``parser_class`` to produce a :class:`ToolResult`.
|
|
103
|
+
When *parallel* is ``True`` the tools are run concurrently with a
|
|
104
|
+
:class:`concurrent.futures.ThreadPoolExecutor` limited to at most four
|
|
105
|
+
workers. Exceptions raised by a tool during parallel execution are logged
|
|
106
|
+
via ``log.error``; in serial mode the exception propagates to the caller.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
tool_configs (Iterable[ToolConfig]):
|
|
110
|
+
A sequence of tool configuration objects. Each object must expose
|
|
111
|
+
a ``name`` attribute, a ``parser_class`` callable, and any other
|
|
112
|
+
information required by :func:`run_tool`.
|
|
113
|
+
path (str):
|
|
114
|
+
Path to the input file or directory that the tools should analyze.
|
|
115
|
+
parallel (bool, optional):
|
|
116
|
+
If ``True``, run the tools in parallel using a thread pool
|
|
117
|
+
(default: ``False``).
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
list[ToolResult]:
|
|
121
|
+
A list containing the parsed results for each tool, in the same
|
|
122
|
+
order as *tool_configs*.
|
|
123
|
+
|
|
124
|
+
Raises:
|
|
125
|
+
RuntimeError:
|
|
126
|
+
If a non-parallel execution encounters an exception from
|
|
127
|
+
:func:`run_tool` or the parser. In parallel mode exceptions are
|
|
128
|
+
logged instead of being raised.
|
|
129
|
+
|
|
130
|
+
Example:
|
|
131
|
+
>>> from mymodule import run_tools, ToolConfig
|
|
132
|
+
>>> configs = [
|
|
133
|
+
... ToolConfig(name='lint', parser_class=LintParser),
|
|
134
|
+
... ToolConfig(name='scan', parser_class=ScanParser),
|
|
135
|
+
... ]
|
|
136
|
+
>>> results = run_tools(configs, '/path/to/project', parallel=True)"""
|
|
137
|
+
def _run_and_parse(tool_config: ToolConfig) -> tuple[int, ToolResult]:
|
|
138
|
+
t0 = time.perf_counter()
|
|
139
|
+
raw_result = run_tool(tool_config, path)
|
|
140
|
+
tr = tool_config.parser_class().parse(raw_result)
|
|
141
|
+
tr.duration_s = time.perf_counter() - t0
|
|
142
|
+
return tool_config.priority, tr
|
|
143
|
+
|
|
144
|
+
if not tool_configs:
|
|
145
|
+
return []
|
|
146
|
+
t_start = time.perf_counter()
|
|
147
|
+
prioritized: list[tuple[int, ToolResult]] = []
|
|
148
|
+
with ThreadPoolExecutor(max_workers=max_workers or len(tool_configs)) as executor:
|
|
149
|
+
future_to_tool = {
|
|
150
|
+
executor.submit(_run_and_parse, tool_config): tool_config
|
|
151
|
+
for tool_config in tool_configs
|
|
152
|
+
}
|
|
153
|
+
for future in as_completed(future_to_tool):
|
|
154
|
+
tool_config = future_to_tool[future]
|
|
155
|
+
try:
|
|
156
|
+
prioritized.append(future.result())
|
|
157
|
+
except Exception as exc:
|
|
158
|
+
log.error(f"{tool_config.name} generated an exception: {exc}")
|
|
159
|
+
log.info(f"run_tools elapsed: {time.perf_counter() - t_start:.2f}s")
|
|
160
|
+
return [tr for _, tr in sorted(prioritized)]
|
py_cq/llm_formatter.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Format the most important code quality defect as a markdown prompt for LLM consumption."""
|
|
2
|
+
|
|
3
|
+
import sys
|
|
4
|
+
|
|
5
|
+
from py_cq.localtypes import CombinedToolResults, ToolConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _severity(score: float, config: ToolConfig) -> int:
|
|
9
|
+
"""Return 0 (error), 1 (warning), or 2 (ok) for a given score and tool config."""
|
|
10
|
+
if score < config.error_threshold:
|
|
11
|
+
return 0
|
|
12
|
+
if score < config.warning_threshold:
|
|
13
|
+
return 1
|
|
14
|
+
return 2
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def format_for_llm(
|
|
18
|
+
tool_configs: dict,
|
|
19
|
+
combined: CombinedToolResults,
|
|
20
|
+
cq_invocation: str | None = None,
|
|
21
|
+
) -> str:
|
|
22
|
+
"""Return a markdown prompt describing the single most important defect."""
|
|
23
|
+
by_name = {tc.name: tc for tc in tool_configs.values()}
|
|
24
|
+
|
|
25
|
+
failing = sorted(
|
|
26
|
+
[
|
|
27
|
+
tr for tr in combined.tool_results
|
|
28
|
+
if tr.metrics and (cfg := by_name.get(tr.raw.tool_name)) and min(tr.metrics.values()) < cfg.warning_threshold
|
|
29
|
+
],
|
|
30
|
+
key=lambda tr: (
|
|
31
|
+
_severity(min(tr.metrics.values()), by_name[tr.raw.tool_name]),
|
|
32
|
+
by_name[tr.raw.tool_name].priority,
|
|
33
|
+
min(tr.metrics.values()),
|
|
34
|
+
),
|
|
35
|
+
)
|
|
36
|
+
if not failing:
|
|
37
|
+
return f"# No issues found\n\nOverall score: **{combined.score:.3f} / 1.0**"
|
|
38
|
+
|
|
39
|
+
worst = failing[0]
|
|
40
|
+
config = by_name[worst.raw.tool_name]
|
|
41
|
+
defect_md = config.parser_class().format_llm_message(worst)
|
|
42
|
+
if cq_invocation is None:
|
|
43
|
+
cq_invocation = "cq " + " ".join(sys.argv[1:])
|
|
44
|
+
return (
|
|
45
|
+
f"{defect_md}\n\n"
|
|
46
|
+
f"Please fix only this issue. After fixing, run `{cq_invocation}` to verify."
|
|
47
|
+
)
|