python-code-quality 0.1.16__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_cq/__init__.py +3 -4
- py_cq/api.py +248 -0
- py_cq/cli.py +216 -90
- py_cq/config/config.toml +95 -0
- py_cq/context_hash.py +18 -8
- py_cq/execution_engine.py +191 -26
- py_cq/language_detector.py +4 -1
- py_cq/llm_formatter.py +200 -18
- py_cq/localtypes.py +53 -7
- py_cq/parsers/__init__.py +1 -1
- py_cq/parsers/banditparser.py +42 -19
- py_cq/parsers/common.py +184 -15
- py_cq/parsers/compileparser.py +9 -4
- py_cq/parsers/complexityparser.py +38 -9
- py_cq/parsers/coverageparser.py +184 -70
- py_cq/parsers/exitcodeparser.py +11 -2
- py_cq/parsers/halsteadparser.py +41 -20
- py_cq/parsers/interrogateparser.py +261 -25
- py_cq/parsers/linecountparser.py +10 -2
- py_cq/parsers/maintainabilityparser.py +32 -9
- py_cq/parsers/pytestparser.py +77 -20
- py_cq/parsers/regexcountparser.py +13 -3
- py_cq/parsers/ruffparser.py +160 -16
- py_cq/parsers/typarser.py +175 -43
- py_cq/parsers/vultureparser.py +22 -16
- py_cq/table_formatter.py +16 -2
- py_cq/tool_registry.py +7 -6
- {python_code_quality-0.1.16.dist-info → python_code_quality-0.2.2.dist-info}/METADATA +88 -3
- python_code_quality-0.2.2.dist-info/RECORD +35 -0
- {python_code_quality-0.1.16.dist-info → python_code_quality-0.2.2.dist-info}/WHEEL +1 -1
- py_cq/config/config.yaml +0 -94
- python_code_quality-0.1.16.dist-info/RECORD +0 -34
- {python_code_quality-0.1.16.dist-info → python_code_quality-0.2.2.dist-info}/entry_points.txt +0 -0
py_cq/execution_engine.py
CHANGED
|
@@ -13,9 +13,12 @@ where tool invocations may be expensive and should be avoided
|
|
|
13
13
|
when a cached result already exists."""
|
|
14
14
|
|
|
15
15
|
import logging
|
|
16
|
+
import os
|
|
17
|
+
import shlex
|
|
16
18
|
import shutil
|
|
17
19
|
import subprocess
|
|
18
20
|
import sys
|
|
21
|
+
import tempfile
|
|
19
22
|
import time
|
|
20
23
|
from collections.abc import Collection
|
|
21
24
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
@@ -29,7 +32,9 @@ from py_cq.localtypes import RawResult, ToolConfig, ToolResult
|
|
|
29
32
|
|
|
30
33
|
log = logging.getLogger("cq")
|
|
31
34
|
|
|
32
|
-
_cache = Cache(
|
|
35
|
+
_cache = Cache(
|
|
36
|
+
Path.home() / ".cache" / "cq", size_limit=100 * 1024 * 1024, disk=JSONDisk
|
|
37
|
+
)
|
|
33
38
|
|
|
34
39
|
|
|
35
40
|
def _find_project_root(path: Path) -> Path | None:
|
|
@@ -53,17 +58,58 @@ def _dep_in_venv(dep: str, project_root: Path) -> bool:
|
|
|
53
58
|
return False
|
|
54
59
|
|
|
55
60
|
|
|
56
|
-
def
|
|
61
|
+
def _compute_scan_targets(
|
|
62
|
+
context_path: str,
|
|
63
|
+
scan_exclude_names: list[str],
|
|
64
|
+
user_excludes: list[str] | None = None,
|
|
65
|
+
) -> str:
|
|
66
|
+
"""Return space-separated quoted absolute paths for bandit-style scanning.
|
|
67
|
+
|
|
68
|
+
When context_path is a directory, enumerates its top-level children and
|
|
69
|
+
omits any whose name is in scan_exclude_names or user_excludes. When it's
|
|
70
|
+
a file, returns just that file. Falls back to the root itself if all
|
|
71
|
+
children are excluded.
|
|
72
|
+
"""
|
|
73
|
+
root = Path(context_path).resolve()
|
|
74
|
+
if not root.is_dir():
|
|
75
|
+
return f'"{root}"'
|
|
76
|
+
excluded = set(scan_exclude_names) | {Path(e).name for e in (user_excludes or [])}
|
|
77
|
+
targets = [str(p) for p in sorted(root.iterdir()) if p.name not in excluded]
|
|
78
|
+
paths = targets if targets else [str(root)]
|
|
79
|
+
return " ".join(f'"{p}"' for p in paths)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _build_exclude_str(
|
|
83
|
+
exclude_format: str, excludes: list[str], **extra_vars: str
|
|
84
|
+
) -> str:
|
|
85
|
+
"""Builds an exclude string from a list of excludes and a format string."""
|
|
86
|
+
|
|
57
87
|
if not exclude_format or not excludes:
|
|
58
88
|
return ""
|
|
59
89
|
parts = []
|
|
60
90
|
for exc in excludes:
|
|
61
91
|
abs_posix_path = Path(exc).resolve().as_posix()
|
|
62
|
-
|
|
92
|
+
abs_native_path = str(Path(exc).resolve())
|
|
93
|
+
# shlex.quote prevents shell injection via exclude paths
|
|
94
|
+
parts.append(
|
|
95
|
+
exclude_format.format(
|
|
96
|
+
path=shlex.quote(exc),
|
|
97
|
+
abs_posix_path=shlex.quote(abs_posix_path),
|
|
98
|
+
abs_native_path=shlex.quote(abs_native_path),
|
|
99
|
+
**{k: shlex.quote(v) for k, v in extra_vars.items()},
|
|
100
|
+
)
|
|
101
|
+
)
|
|
63
102
|
return "".join(parts)
|
|
64
103
|
|
|
65
104
|
|
|
66
|
-
def run_tool(
|
|
105
|
+
def run_tool(
|
|
106
|
+
tool_config: ToolConfig,
|
|
107
|
+
context_path: str,
|
|
108
|
+
excludes: list[str] | None = None,
|
|
109
|
+
*,
|
|
110
|
+
precomputed_hash: str | None = None,
|
|
111
|
+
project_tag: str | None = None,
|
|
112
|
+
) -> RawResult:
|
|
67
113
|
"""Runs a tool defined by its configuration and returns the execution result.
|
|
68
114
|
|
|
69
115
|
Args:
|
|
@@ -83,6 +129,8 @@ def run_tool(tool_config: ToolConfig, context_path: str, excludes: list[str] | N
|
|
|
83
129
|
0"""
|
|
84
130
|
python = sys.executable
|
|
85
131
|
path = str(Path(context_path))
|
|
132
|
+
run_env = None
|
|
133
|
+
project_dir = ""
|
|
86
134
|
if tool_config.run_in_target_env:
|
|
87
135
|
uv = shutil.which("uv")
|
|
88
136
|
if uv:
|
|
@@ -94,21 +142,102 @@ def run_tool(tool_config: ToolConfig, context_path: str, excludes: list[str] | N
|
|
|
94
142
|
project_root = _find_project_root(resolved)
|
|
95
143
|
abs_dir = str(project_root) if project_root else str(resolved.parent)
|
|
96
144
|
path = str(resolved)
|
|
145
|
+
project_dir = Path(abs_dir).as_posix()
|
|
97
146
|
project_root_path = Path(abs_dir)
|
|
98
|
-
missing_deps = [
|
|
99
|
-
|
|
147
|
+
missing_deps = [
|
|
148
|
+
d
|
|
149
|
+
for d in tool_config.extra_deps
|
|
150
|
+
if not _dep_in_venv(d, project_root_path)
|
|
151
|
+
]
|
|
152
|
+
# Quote deps with shlex.quote to prevent injection via extra_deps.
|
|
153
|
+
# The uv path and abs_dir use standard double-quoting which is
|
|
154
|
+
# compatible with both POSIX and MSYS bash on Windows.
|
|
155
|
+
with_flags = " ".join(f"--with {shlex.quote(dep)}" for dep in missing_deps)
|
|
100
156
|
no_sync = "--no-sync" if sys.executable.startswith(abs_dir) else ""
|
|
101
|
-
python =
|
|
157
|
+
python = (
|
|
158
|
+
f'"{uv}" run {no_sync} --directory "{abs_dir}" {with_flags}'.strip()
|
|
159
|
+
)
|
|
160
|
+
# Strip venv env vars so the target project's environment is used cleanly.
|
|
161
|
+
# VIRTUAL_ENV pointing to cq's own venv would cause uv to warn and can
|
|
162
|
+
# corrupt the subprocess's sys.path, mixing packages from both projects.
|
|
163
|
+
run_env = {
|
|
164
|
+
k: v
|
|
165
|
+
for k, v in os.environ.items()
|
|
166
|
+
if k not in ("VIRTUAL_ENV", "PYTHONHOME", "PYTHONPATH")
|
|
167
|
+
}
|
|
102
168
|
abs_context_path = str(Path(context_path).resolve())
|
|
169
|
+
abs_context_path_posix = Path(context_path).resolve().as_posix()
|
|
170
|
+
native_sep = os.sep
|
|
171
|
+
if not project_dir:
|
|
172
|
+
project_dir = (
|
|
173
|
+
Path(abs_context_path).as_posix()
|
|
174
|
+
if Path(abs_context_path).is_dir()
|
|
175
|
+
else Path(abs_context_path).parent.as_posix()
|
|
176
|
+
)
|
|
103
177
|
input_path_posix = Path(context_path).as_posix().rstrip("/")
|
|
104
|
-
exclude = _build_exclude_str(
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
178
|
+
exclude = _build_exclude_str(
|
|
179
|
+
tool_config.exclude_format,
|
|
180
|
+
excludes or [],
|
|
181
|
+
input_path_posix=input_path_posix,
|
|
182
|
+
abs_context_path_posix=abs_context_path_posix,
|
|
183
|
+
)
|
|
184
|
+
scan_targets = _compute_scan_targets(
|
|
185
|
+
context_path, tool_config.scan_exclude_names, excludes
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
command = tool_config.command.format(
|
|
189
|
+
context_path=path,
|
|
190
|
+
abs_context_path=abs_context_path,
|
|
191
|
+
abs_context_path_posix=abs_context_path_posix,
|
|
192
|
+
input_path_posix=input_path_posix,
|
|
193
|
+
native_sep=native_sep,
|
|
194
|
+
scan_targets=scan_targets,
|
|
195
|
+
python=python,
|
|
196
|
+
exclude=exclude,
|
|
197
|
+
)
|
|
198
|
+
context_hash = (
|
|
199
|
+
precomputed_hash
|
|
200
|
+
if precomputed_hash is not None
|
|
201
|
+
else get_context_hash(context_path)
|
|
202
|
+
)
|
|
203
|
+
cache_key = f"{command}:{context_hash}"
|
|
204
|
+
|
|
205
|
+
t_cache0 = time.perf_counter()
|
|
206
|
+
cached = _cache.get(cache_key)
|
|
207
|
+
t_cache = time.perf_counter() - t_cache0
|
|
208
|
+
if cached is not None:
|
|
209
|
+
log.debug(
|
|
210
|
+
f"{tool_config.name}: [CACHE HIT] cache={t_cache * 1000:.1f}ms {command}"
|
|
211
|
+
)
|
|
212
|
+
return RawResult(**cast(dict[str, Any], cached))
|
|
213
|
+
|
|
214
|
+
# shell=True is required because commands use shell features (&&, |) and
|
|
215
|
+
# variable substitution ({python} expands to a compound uv command).
|
|
216
|
+
# All user-supplied values (context_path, excludes) are properly quoted
|
|
217
|
+
# via shlex.quote() to prevent injection - see _build_exclude_str and
|
|
218
|
+
# the uv command assembly above.
|
|
219
|
+
if run_env is None:
|
|
220
|
+
run_env = dict(os.environ)
|
|
221
|
+
_fd, coverage_tmp = tempfile.mkstemp(prefix=".coverage.cq.")
|
|
222
|
+
os.close(_fd)
|
|
223
|
+
run_env["COVERAGE_FILE"] = coverage_tmp
|
|
224
|
+
t_sub0 = time.perf_counter()
|
|
225
|
+
try:
|
|
226
|
+
result = subprocess.run(
|
|
227
|
+
command,
|
|
228
|
+
capture_output=True,
|
|
229
|
+
text=True,
|
|
230
|
+
shell=True,
|
|
231
|
+
encoding="utf-8",
|
|
232
|
+
errors="replace",
|
|
233
|
+
env=run_env,
|
|
234
|
+
) # nosec
|
|
235
|
+
finally:
|
|
236
|
+
Path(coverage_tmp).unlink(missing_ok=True)
|
|
237
|
+
t_sub = time.perf_counter() - t_sub0
|
|
238
|
+
log.debug(
|
|
239
|
+
f"{tool_config.name}: [MISS] cache={t_cache * 1000:.1f}ms tool={t_sub * 1000:.0f}ms: {command}"
|
|
240
|
+
)
|
|
112
241
|
timestamp = time.strftime("%Y-%m-%d %H:%M:%S")
|
|
113
242
|
raw_result = RawResult(
|
|
114
243
|
tool_name=tool_config.name,
|
|
@@ -117,12 +246,20 @@ def run_tool(tool_config: ToolConfig, context_path: str, excludes: list[str] | N
|
|
|
117
246
|
stderr=result.stderr,
|
|
118
247
|
return_code=result.returncode,
|
|
119
248
|
timestamp=timestamp,
|
|
249
|
+
project_path=project_dir,
|
|
120
250
|
)
|
|
121
|
-
_cache.set(cache_key, raw_result.to_dict(), expire=5 * 24 * 60 * 60)
|
|
251
|
+
_cache.set(cache_key, raw_result.to_dict(), expire=5 * 24 * 60 * 60, tag=project_tag)
|
|
122
252
|
return raw_result
|
|
123
253
|
|
|
124
254
|
|
|
125
|
-
def run_tools(
|
|
255
|
+
def run_tools(
|
|
256
|
+
tool_configs: Collection[ToolConfig],
|
|
257
|
+
path: str,
|
|
258
|
+
max_workers: int = 0,
|
|
259
|
+
early_exit: bool = False,
|
|
260
|
+
excludes: list[str] | None = None,
|
|
261
|
+
project_root: str | None = None,
|
|
262
|
+
) -> list[ToolResult]:
|
|
126
263
|
"""Run multiple tools and return their parsed results.
|
|
127
264
|
|
|
128
265
|
Runs each tool specified in *tool_configs* on the file or directory at
|
|
@@ -162,39 +299,67 @@ def run_tools(tool_configs: Collection[ToolConfig], path: str, max_workers: int
|
|
|
162
299
|
... ToolConfig(name='scan', parser_class=ScanParser),
|
|
163
300
|
... ]
|
|
164
301
|
>>> results = run_tools(configs, '/path/to/project', parallel=True)"""
|
|
302
|
+
if not tool_configs:
|
|
303
|
+
return []
|
|
304
|
+
t_start = time.perf_counter()
|
|
305
|
+
t_hash0 = time.perf_counter()
|
|
306
|
+
root = project_root or str(Path(path).resolve())
|
|
307
|
+
shared_hash = get_context_hash(root)
|
|
308
|
+
log.debug(f"context_hash: {(time.perf_counter() - t_hash0) * 1000:.1f}ms {shared_hash}")
|
|
309
|
+
|
|
310
|
+
sentinel_key = f"_project_hash:{root}"
|
|
311
|
+
prev_hash = _cache.get(sentinel_key)
|
|
312
|
+
if prev_hash is not None and prev_hash != shared_hash:
|
|
313
|
+
evicted = _cache.evict(root)
|
|
314
|
+
log.debug(f"project changed: evicted {evicted} stale cache entries for {root}")
|
|
315
|
+
_cache.set(sentinel_key, shared_hash, expire=5 * 24 * 60 * 60, tag=root)
|
|
316
|
+
|
|
165
317
|
def _run_and_parse(tool_config: ToolConfig) -> tuple[int, ToolResult]:
|
|
166
318
|
t0 = time.perf_counter()
|
|
167
|
-
raw_result = run_tool(tool_config, path, excludes)
|
|
319
|
+
raw_result = run_tool(tool_config, path, excludes, precomputed_hash=shared_hash, project_tag=root)
|
|
168
320
|
tr = tool_config.parser_class(tool_config.parser_config).parse(raw_result)
|
|
169
321
|
tr.duration_s = time.perf_counter() - t0
|
|
170
322
|
return tool_config.order, tr
|
|
171
323
|
|
|
172
|
-
if not tool_configs:
|
|
173
|
-
return []
|
|
174
|
-
t_start = time.perf_counter()
|
|
175
324
|
prioritized: list[tuple[int, ToolResult]] = []
|
|
176
325
|
if early_exit:
|
|
177
|
-
|
|
326
|
+
sorted_configs = sorted(tool_configs, key=lambda tc: tc.order)
|
|
327
|
+
n_total = len(sorted_configs)
|
|
328
|
+
for i, tool_config in enumerate(sorted_configs):
|
|
178
329
|
try:
|
|
179
330
|
prioritized.append(_run_and_parse(tool_config))
|
|
180
331
|
except Exception as exc:
|
|
181
|
-
log.error(f"{tool_config.name} generated an exception: {exc}")
|
|
332
|
+
log.error(f"{tool_config.name} generated an exception: {exc} {exc.__traceback__}")
|
|
333
|
+
n_skipped = n_total - i - 1
|
|
334
|
+
if n_skipped:
|
|
335
|
+
remaining = ", ".join(tc.name for tc in sorted_configs[i + 1 :])
|
|
336
|
+
log.warning(f"Early exit: skipped {n_skipped} tool(s): {remaining}")
|
|
182
337
|
break
|
|
183
338
|
_, tr = prioritized[-1]
|
|
184
339
|
if tr.metrics and min(tr.metrics.values()) < tool_config.error_threshold:
|
|
340
|
+
n_skipped = n_total - i - 1
|
|
341
|
+
if n_skipped:
|
|
342
|
+
remaining = ", ".join(tc.name for tc in sorted_configs[i + 1 :])
|
|
343
|
+
log.debug(
|
|
344
|
+
f"Error threshold hit at {tool_config.name}: skipped {n_skipped} tool(s): {remaining}"
|
|
345
|
+
)
|
|
185
346
|
break
|
|
186
|
-
log.info(f"run_tools elapsed: {time.perf_counter() - t_start:.2f}s")
|
|
347
|
+
log.info(f"cq run_tools elapsed: {time.perf_counter() - t_start:.2f}s")
|
|
187
348
|
return [tr for _, tr in sorted(prioritized)]
|
|
188
349
|
with ThreadPoolExecutor(max_workers=max_workers or len(tool_configs)) as executor:
|
|
189
350
|
future_to_tool = {
|
|
190
351
|
executor.submit(_run_and_parse, tool_config): tool_config
|
|
191
352
|
for tool_config in tool_configs
|
|
192
353
|
}
|
|
354
|
+
timings: list[tuple[int, str, float]] = []
|
|
193
355
|
for future in as_completed(future_to_tool):
|
|
194
356
|
tool_config = future_to_tool[future]
|
|
195
357
|
try:
|
|
196
|
-
|
|
358
|
+
order, tr = future.result()
|
|
359
|
+
prioritized.append((order, tr))
|
|
360
|
+
timings.append((order, tool_config.name, tr.duration_s))
|
|
197
361
|
except Exception as exc:
|
|
198
362
|
log.error(f"{tool_config.name} generated an exception: {exc}")
|
|
199
|
-
|
|
363
|
+
per_tool = ", ".join(f"{name}={dur:.2f}s" for _, name, dur in sorted(timings))
|
|
364
|
+
log.debug(f"run_tools elapsed: {time.perf_counter() - t_start:.2f}s [{per_tool}]")
|
|
200
365
|
return [tr for _, tr in sorted(prioritized)]
|
py_cq/language_detector.py
CHANGED
|
@@ -4,7 +4,10 @@ from pathlib import Path
|
|
|
4
4
|
|
|
5
5
|
# Ordered: first match wins. Python is listed first so it takes priority.
|
|
6
6
|
_MARKERS: list[tuple[str, list[str]]] = [
|
|
7
|
-
(
|
|
7
|
+
(
|
|
8
|
+
"python",
|
|
9
|
+
["pyproject.toml", "setup.py", "setup.cfg", "requirements.txt", "Pipfile"],
|
|
10
|
+
),
|
|
8
11
|
("typescript", ["tsconfig.json", "package.json"]),
|
|
9
12
|
("rust", ["Cargo.toml"]),
|
|
10
13
|
("go", ["go.mod"]),
|
py_cq/llm_formatter.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"""Format the most important code quality defect as a markdown prompt for LLM consumption."""
|
|
2
2
|
|
|
3
3
|
import sys
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import cast
|
|
4
6
|
|
|
5
|
-
from py_cq.localtypes import CombinedToolResults, ToolConfig
|
|
7
|
+
from py_cq.localtypes import CombinedToolResults, Fingerprint, ToolConfig, ToolResult
|
|
6
8
|
|
|
7
9
|
|
|
8
10
|
def _severity(score: float, config: ToolConfig) -> int:
|
|
@@ -14,19 +16,91 @@ def _severity(score: float, config: ToolConfig) -> int:
|
|
|
14
16
|
return 2
|
|
15
17
|
|
|
16
18
|
|
|
17
|
-
def
|
|
19
|
+
def _single_issue_slices(
|
|
20
|
+
tr: ToolResult,
|
|
21
|
+
limit: int,
|
|
22
|
+
silence: list[str] | None = None,
|
|
23
|
+
project_root: Path | None = None,
|
|
24
|
+
) -> list[ToolResult]:
|
|
25
|
+
"""Return up to `limit` ToolResults each containing one issue from tr.details.
|
|
26
|
+
|
|
27
|
+
Returns empty list (not [tr]) when silence specs filter out all issues."""
|
|
28
|
+
silence_set = set(silence or [])
|
|
29
|
+
slices: list[ToolResult] = []
|
|
30
|
+
has_list = any(isinstance(v, list) for v in tr.details.values())
|
|
31
|
+
|
|
32
|
+
if has_list:
|
|
33
|
+
for file, issues in tr.details.items():
|
|
34
|
+
if isinstance(issues, list):
|
|
35
|
+
for issue in issues:
|
|
36
|
+
candidate = ToolResult(
|
|
37
|
+
raw=tr.raw,
|
|
38
|
+
metrics=tr.metrics,
|
|
39
|
+
details={file: [issue]},
|
|
40
|
+
project_path=tr.project_path,
|
|
41
|
+
)
|
|
42
|
+
if (
|
|
43
|
+
_fingerprint_from_slice(
|
|
44
|
+
tr.raw.tool_name, candidate, project_root
|
|
45
|
+
)
|
|
46
|
+
in silence_set
|
|
47
|
+
):
|
|
48
|
+
continue
|
|
49
|
+
slices.append(candidate)
|
|
50
|
+
if len(slices) >= limit:
|
|
51
|
+
break
|
|
52
|
+
else:
|
|
53
|
+
# Non-list details: sort so files with failures (pytest-style) come first, then by coverage ascending
|
|
54
|
+
def _dict_sort_key(v: object) -> tuple[int, float, float]:
|
|
55
|
+
if not isinstance(v, dict):
|
|
56
|
+
return (0, 0.0, 1.0)
|
|
57
|
+
d = cast("dict[str, object]", v)
|
|
58
|
+
failures = sum(
|
|
59
|
+
1
|
|
60
|
+
for val in d.values()
|
|
61
|
+
if isinstance(val, str) and val in ("FAILED", "ERROR")
|
|
62
|
+
)
|
|
63
|
+
cov_val = d.get("coverage", 0)
|
|
64
|
+
coverage = float(cov_val) if isinstance(cov_val, (int, float, str)) else 0.0
|
|
65
|
+
sm_val = d.get("smallness", 1.0)
|
|
66
|
+
smallness = float(sm_val) if isinstance(sm_val, (int, float)) else 1.0
|
|
67
|
+
return (-failures, coverage, smallness)
|
|
68
|
+
|
|
69
|
+
items = sorted(tr.details.items(), key=lambda x: _dict_sort_key(x[1]))
|
|
70
|
+
for file, data in items:
|
|
71
|
+
candidate = ToolResult(
|
|
72
|
+
raw=tr.raw,
|
|
73
|
+
metrics=tr.metrics,
|
|
74
|
+
details={file: data},
|
|
75
|
+
project_path=tr.project_path,
|
|
76
|
+
)
|
|
77
|
+
if (
|
|
78
|
+
_fingerprint_from_slice(tr.raw.tool_name, candidate, project_root)
|
|
79
|
+
in silence_set
|
|
80
|
+
):
|
|
81
|
+
continue
|
|
82
|
+
slices.append(candidate)
|
|
83
|
+
if len(slices) >= limit:
|
|
84
|
+
break
|
|
85
|
+
return slices[:limit] or ([] if silence_set else [tr])
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def _select_top_issue(
|
|
18
89
|
tool_configs: dict,
|
|
19
90
|
combined: CombinedToolResults,
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
91
|
+
limit: int,
|
|
92
|
+
silence: list[str],
|
|
93
|
+
project_root: Path | None = None,
|
|
94
|
+
):
|
|
95
|
+
"""Return (worst, slices, config, parser) for the top failing tool, or None if all pass."""
|
|
24
96
|
by_name = {tc.name: tc for tc in tool_configs.values()}
|
|
25
|
-
|
|
26
97
|
failing = sorted(
|
|
27
98
|
[
|
|
28
|
-
tr
|
|
29
|
-
|
|
99
|
+
tr
|
|
100
|
+
for tr in combined.tool_results
|
|
101
|
+
if tr.metrics
|
|
102
|
+
and (cfg := by_name.get(tr.raw.tool_name))
|
|
103
|
+
and min(tr.metrics.values()) < cfg.warning_threshold
|
|
30
104
|
],
|
|
31
105
|
key=lambda tr: (
|
|
32
106
|
_severity(min(tr.metrics.values()), by_name[tr.raw.tool_name]),
|
|
@@ -34,15 +108,123 @@ def format_for_llm(
|
|
|
34
108
|
min(tr.metrics.values()),
|
|
35
109
|
),
|
|
36
110
|
)
|
|
37
|
-
|
|
111
|
+
|
|
112
|
+
for candidate in failing:
|
|
113
|
+
slices = _single_issue_slices(candidate, limit, silence, project_root)
|
|
114
|
+
if slices:
|
|
115
|
+
config = by_name[candidate.raw.tool_name]
|
|
116
|
+
return candidate, slices, config, config.parser_class()
|
|
117
|
+
return None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _build_message(
|
|
121
|
+
slices, parser, context_lines: int, limit: int, hint: bool, cq_invocation
|
|
122
|
+
) -> str:
|
|
123
|
+
parts = [
|
|
124
|
+
parser.format_llm_message(s, context_lines=context_lines, limit=limit)
|
|
125
|
+
for s in slices
|
|
126
|
+
]
|
|
127
|
+
n = len(parts)
|
|
128
|
+
close = "Please fix only this issue." if n == 1 else f"Please fix these {n} issues."
|
|
129
|
+
body = "\n\n---\n\n".join(parts) + f"\n\n{close}"
|
|
130
|
+
if hint:
|
|
131
|
+
if cq_invocation is None:
|
|
132
|
+
cq_invocation = "cq " + " ".join(sys.argv[1:])
|
|
133
|
+
body += f" After fixing, run `{cq_invocation}` to verify."
|
|
134
|
+
return body
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _fingerprint_from_slice(
|
|
138
|
+
tool_name: str, tr: ToolResult, project_root: Path | None = None
|
|
139
|
+
) -> str:
|
|
140
|
+
"""Return fingerprint string for a single-issue ToolResult slice."""
|
|
141
|
+
root = project_root.resolve() if project_root else None
|
|
142
|
+
project_str = root.as_posix() if root else ""
|
|
143
|
+
for file, issues in tr.details.items():
|
|
144
|
+
if root:
|
|
145
|
+
p = Path(file)
|
|
146
|
+
resolved = (root / p).resolve() if not p.is_absolute() else p.resolve()
|
|
147
|
+
try:
|
|
148
|
+
path_str = resolved.relative_to(root).as_posix()
|
|
149
|
+
except ValueError:
|
|
150
|
+
path_str = resolved.as_posix()
|
|
151
|
+
else:
|
|
152
|
+
path_str = Path(file).as_posix()
|
|
153
|
+
if isinstance(issues, list) and issues:
|
|
154
|
+
first = issues[0]
|
|
155
|
+
line = str(first.get("line", "")) if isinstance(first, dict) else ""
|
|
156
|
+
code = first.get("code", "") if isinstance(first, dict) else ""
|
|
157
|
+
fp = Fingerprint(
|
|
158
|
+
tool=tool_name, project=project_str, path=path_str, line=line, code=code
|
|
159
|
+
)
|
|
160
|
+
elif isinstance(issues, dict):
|
|
161
|
+
str_vals = [v for v in issues.values() if isinstance(v, str)]
|
|
162
|
+
if str_vals and all(v not in ("FAILED", "ERROR") for v in str_vals):
|
|
163
|
+
continue
|
|
164
|
+
fp = Fingerprint(tool=tool_name, project=project_str, path=path_str)
|
|
165
|
+
else:
|
|
166
|
+
fp = Fingerprint(tool=tool_name, project=project_str, path="")
|
|
167
|
+
return str(fp)
|
|
168
|
+
return tool_name
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def format_for_llm(
|
|
172
|
+
tool_configs: dict,
|
|
173
|
+
combined: CombinedToolResults,
|
|
174
|
+
cq_invocation: str | None = None,
|
|
175
|
+
context_lines: int = 15,
|
|
176
|
+
hint: bool = False,
|
|
177
|
+
limit: int = 1,
|
|
178
|
+
silence: list[str] | None = None,
|
|
179
|
+
project_root: Path | None = None,
|
|
180
|
+
) -> str:
|
|
181
|
+
"""Return a markdown prompt describing the top `limit` defects from the worst-scoring tool."""
|
|
182
|
+
result = _select_top_issue(
|
|
183
|
+
tool_configs, combined, limit, silence or [], project_root
|
|
184
|
+
)
|
|
185
|
+
if result is None:
|
|
38
186
|
return f"# No issues found\n\nOverall score: **{combined.score:.3f} / 1.0**"
|
|
187
|
+
_, slices, _, parser = result
|
|
188
|
+
return _build_message(slices, parser, context_lines, limit, hint, cq_invocation)
|
|
39
189
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
190
|
+
|
|
191
|
+
def format_for_llm_json(
|
|
192
|
+
tool_configs: dict,
|
|
193
|
+
combined: CombinedToolResults,
|
|
194
|
+
cq_invocation: str | None = None,
|
|
195
|
+
context_lines: int = 15,
|
|
196
|
+
hint: bool = False,
|
|
197
|
+
limit: int = 1,
|
|
198
|
+
silence: list[str] | None = None,
|
|
199
|
+
project_root: Path | None = None,
|
|
200
|
+
) -> dict:
|
|
201
|
+
"""Like format_for_llm but returns a dict with id, file, project, and message for automation use."""
|
|
202
|
+
message = format_for_llm(
|
|
203
|
+
tool_configs,
|
|
204
|
+
combined,
|
|
205
|
+
cq_invocation,
|
|
206
|
+
context_lines,
|
|
207
|
+
hint,
|
|
208
|
+
limit,
|
|
209
|
+
silence,
|
|
210
|
+
project_root,
|
|
211
|
+
)
|
|
212
|
+
project = project_root.as_posix() if project_root else None
|
|
213
|
+
result = _select_top_issue(
|
|
214
|
+
tool_configs, combined, limit, silence or [], project_root
|
|
48
215
|
)
|
|
216
|
+
if result is None:
|
|
217
|
+
return {"id": None, "file": None, "project": project, "message": message}
|
|
218
|
+
worst, slices, _, _ = result
|
|
219
|
+
issue_id = _fingerprint_from_slice(worst.raw.tool_name, slices[0], project_root)
|
|
220
|
+
raw_file = next(iter(slices[0].details), "")
|
|
221
|
+
if project_root and raw_file:
|
|
222
|
+
try:
|
|
223
|
+
file: str | None = (
|
|
224
|
+
Path(raw_file).resolve().relative_to(project_root).as_posix() or None
|
|
225
|
+
)
|
|
226
|
+
except ValueError:
|
|
227
|
+
file = Path(raw_file).as_posix() or None
|
|
228
|
+
else:
|
|
229
|
+
file = Path(raw_file).as_posix() or None
|
|
230
|
+
return {"id": issue_id, "file": file, "project": project, "message": message}
|
py_cq/localtypes.py
CHANGED
|
@@ -8,6 +8,35 @@ from dataclasses import dataclass, field
|
|
|
8
8
|
from typing import Any
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
@dataclass
|
|
12
|
+
class Fingerprint:
|
|
13
|
+
"""Stable identity for a single reported issue.
|
|
14
|
+
|
|
15
|
+
String form: ``tool::project::path[::line[::code]]`` (trailing empty fields omitted).
|
|
16
|
+
``project`` is an absolute path; ``path`` is relative to it.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
tool: str
|
|
20
|
+
project: str # absolute path to project root
|
|
21
|
+
path: str # path relative to project
|
|
22
|
+
line: str = ""
|
|
23
|
+
code: str = ""
|
|
24
|
+
|
|
25
|
+
def __str__(self) -> str:
|
|
26
|
+
parts = [self.tool, self.project, self.path, self.line, self.code]
|
|
27
|
+
while parts and not parts[-1]:
|
|
28
|
+
parts.pop()
|
|
29
|
+
return "::".join(parts)
|
|
30
|
+
|
|
31
|
+
@classmethod
|
|
32
|
+
def from_string(cls, s: str) -> "Fingerprint":
|
|
33
|
+
parts = s.split("::")
|
|
34
|
+
parts += [""] * (5 - len(parts))
|
|
35
|
+
return cls(
|
|
36
|
+
tool=parts[0], project=parts[1], path=parts[2], line=parts[3], code=parts[4]
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
11
40
|
@dataclass
|
|
12
41
|
class ToolConfig:
|
|
13
42
|
"""Represents the configuration for an analysis tool, including its name, command, parser class, context path, order, and thresholds for warnings and errors."""
|
|
@@ -20,9 +49,17 @@ class ToolConfig:
|
|
|
20
49
|
warning_threshold: float = 0.7 # Yellow warning if below this
|
|
21
50
|
error_threshold: float = 0.5 # Red error if below this
|
|
22
51
|
run_in_target_env: bool = False # If True, run in target project's env via uv
|
|
23
|
-
extra_deps: list[str] = field(
|
|
52
|
+
extra_deps: list[str] = field(
|
|
53
|
+
default_factory=list
|
|
54
|
+
) # Extra deps to inject via uv --with
|
|
24
55
|
parser_config: dict[str, Any] = field(default_factory=dict)
|
|
25
|
-
exclude_format: str =
|
|
56
|
+
exclude_format: str = (
|
|
57
|
+
"" # Per-path template for --exclude injection, e.g. " --exclude {path}"
|
|
58
|
+
)
|
|
59
|
+
scan_exclude_names: list[str] = field(
|
|
60
|
+
default_factory=list
|
|
61
|
+
) # Top-level dir/file names to omit from {scan_targets}
|
|
62
|
+
skip_for_file: bool = False # If True, skip when context_path is a single file
|
|
26
63
|
|
|
27
64
|
|
|
28
65
|
@dataclass
|
|
@@ -38,6 +75,7 @@ class RawResult:
|
|
|
38
75
|
stderr: str = ""
|
|
39
76
|
return_code: int = 0
|
|
40
77
|
timestamp: str = "" # For tracking when the analysis ran
|
|
78
|
+
project_path: str = "" # Absolute path to the target project root
|
|
41
79
|
|
|
42
80
|
def to_dict(self):
|
|
43
81
|
"""Returns a dictionary containing the tool name, command, stdout, stderr, return code, and timestamp."""
|
|
@@ -48,6 +86,7 @@ class RawResult:
|
|
|
48
86
|
"stderr": self.stderr,
|
|
49
87
|
"return_code": self.return_code,
|
|
50
88
|
"timestamp": self.timestamp,
|
|
89
|
+
"project_path": self.project_path,
|
|
51
90
|
}
|
|
52
91
|
|
|
53
92
|
|
|
@@ -61,10 +100,9 @@ class ToolResult:
|
|
|
61
100
|
data into a plain dictionary."""
|
|
62
101
|
|
|
63
102
|
metrics: dict[str, float] = field(default_factory=dict)
|
|
64
|
-
details: dict[str, Any] = field(
|
|
65
|
-
default_factory=dict
|
|
66
|
-
) # Additional details about the metric
|
|
103
|
+
details: dict[str, Any] = field(default_factory=dict)
|
|
67
104
|
raw: RawResult = field(default_factory=RawResult)
|
|
105
|
+
project_path: str = ""
|
|
68
106
|
duration_s: float = 0.0
|
|
69
107
|
|
|
70
108
|
def __post_init__(self):
|
|
@@ -80,6 +118,7 @@ class ToolResult:
|
|
|
80
118
|
"tool_name": self.raw.tool_name,
|
|
81
119
|
"metrics": self.metrics,
|
|
82
120
|
"details": self.details,
|
|
121
|
+
"project_path": self.project_path,
|
|
83
122
|
"duration_s": self.duration_s,
|
|
84
123
|
}
|
|
85
124
|
|
|
@@ -102,7 +141,12 @@ class CombinedToolResults:
|
|
|
102
141
|
self.tool_results = tool_results
|
|
103
142
|
self.path = path
|
|
104
143
|
scored = [tr for tr in tool_results if tr.metrics]
|
|
105
|
-
self.score =
|
|
144
|
+
self.score = (
|
|
145
|
+
sum(sum(tr.metrics.values()) / len(tr.metrics) for tr in scored)
|
|
146
|
+
/ len(scored)
|
|
147
|
+
if scored
|
|
148
|
+
else 0.0
|
|
149
|
+
)
|
|
106
150
|
|
|
107
151
|
score: float = 0.0
|
|
108
152
|
path: str = ""
|
|
@@ -129,7 +173,9 @@ class AbstractParser(ABC):
|
|
|
129
173
|
"""Converts raw tool output into a structured ToolResult."""
|
|
130
174
|
pass
|
|
131
175
|
|
|
132
|
-
def format_llm_message(
|
|
176
|
+
def format_llm_message(
|
|
177
|
+
self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
|
|
178
|
+
) -> str:
|
|
133
179
|
"""Return a single-defect description for LLM consumption.
|
|
134
180
|
|
|
135
181
|
Default implementation reports the worst metric by name and score.
|
py_cq/parsers/__init__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
"""Tool Response parsers"""
|
|
1
|
+
"""Tool Response parsers"""
|