python-code-quality 0.1.15__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- py_cq/__init__.py +3 -4
- py_cq/api.py +248 -0
- py_cq/cli.py +218 -129
- py_cq/config/config.toml +95 -0
- py_cq/context_hash.py +18 -8
- py_cq/execution_engine.py +182 -26
- py_cq/language_detector.py +4 -1
- py_cq/llm_formatter.py +200 -18
- py_cq/localtypes.py +53 -7
- py_cq/main.py +1 -1
- py_cq/parsers/__init__.py +1 -1
- py_cq/parsers/banditparser.py +43 -14
- py_cq/parsers/common.py +187 -25
- py_cq/parsers/compileparser.py +21 -9
- py_cq/parsers/complexityparser.py +40 -4
- py_cq/parsers/coverageparser.py +184 -70
- py_cq/parsers/exitcodeparser.py +11 -2
- py_cq/parsers/halsteadparser.py +42 -14
- py_cq/parsers/interrogateparser.py +261 -25
- py_cq/parsers/linecountparser.py +10 -2
- py_cq/parsers/maintainabilityparser.py +34 -4
- py_cq/parsers/pytestparser.py +77 -20
- py_cq/parsers/regexcountparser.py +13 -3
- py_cq/parsers/ruffparser.py +160 -12
- py_cq/parsers/typarser.py +175 -39
- py_cq/parsers/vultureparser.py +22 -12
- py_cq/table_formatter.py +43 -0
- py_cq/tool_registry.py +7 -6
- {python_code_quality-0.1.15.dist-info → python_code_quality-0.2.1.dist-info}/METADATA +88 -3
- python_code_quality-0.2.1.dist-info/RECORD +35 -0
- {python_code_quality-0.1.15.dist-info → python_code_quality-0.2.1.dist-info}/WHEEL +1 -1
- py_cq/config/config.yaml +0 -94
- python_code_quality-0.1.15.dist-info/RECORD +0 -33
- {python_code_quality-0.1.15.dist-info → python_code_quality-0.2.1.dist-info}/entry_points.txt +0 -0
py_cq/parsers/banditparser.py
CHANGED
|
@@ -7,9 +7,16 @@ logistic-variant score stored under the ``security`` metric key.
|
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
import json
|
|
10
|
+
import logging
|
|
10
11
|
|
|
11
12
|
from py_cq.localtypes import AbstractParser, RawResult, ToolResult
|
|
12
|
-
from py_cq.parsers.common import
|
|
13
|
+
from py_cq.parsers.common import (
|
|
14
|
+
extract_first_issue,
|
|
15
|
+
format_source_context,
|
|
16
|
+
score_logistic_variant,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger("cq")
|
|
13
20
|
|
|
14
21
|
_SEVERITY_WEIGHT = {"HIGH": 5, "MEDIUM": 2, "LOW": 1}
|
|
15
22
|
|
|
@@ -18,10 +25,26 @@ class BanditParser(AbstractParser):
|
|
|
18
25
|
"""Parses raw JSON output from ``bandit -f json`` into a ToolResult."""
|
|
19
26
|
|
|
20
27
|
def parse(self, raw_result: RawResult) -> ToolResult:
|
|
28
|
+
"""Parses the raw bandit JSON output into a ToolResult."""
|
|
21
29
|
try:
|
|
22
30
|
data = json.loads(raw_result.stdout)
|
|
23
31
|
except (json.JSONDecodeError, ValueError):
|
|
24
|
-
|
|
32
|
+
log.warning(
|
|
33
|
+
"bandit output is not valid JSON (return_code=%s). Reporting degraded score.",
|
|
34
|
+
raw_result.return_code,
|
|
35
|
+
)
|
|
36
|
+
degraded = 0.0 if raw_result.return_code != 0 else 0.5
|
|
37
|
+
return ToolResult(raw=raw_result, metrics={"security": degraded})
|
|
38
|
+
if not isinstance(data, dict):
|
|
39
|
+
log.warning("bandit output is not a JSON object. Reporting degraded score.")
|
|
40
|
+
return ToolResult(raw=raw_result, metrics={"security": 0.5})
|
|
41
|
+
|
|
42
|
+
totals = data.get("metrics", {}).get("_totals", {})
|
|
43
|
+
log.debug(
|
|
44
|
+
"bandit scanned %d LOC across %d files",
|
|
45
|
+
totals.get("loc", 0),
|
|
46
|
+
len(data.get("metrics", {})) - 1,
|
|
47
|
+
)
|
|
25
48
|
|
|
26
49
|
files: dict[str, list] = {}
|
|
27
50
|
weighted = 0
|
|
@@ -30,25 +53,31 @@ class BanditParser(AbstractParser):
|
|
|
30
53
|
if "/.venv/" in path or "/site-packages/" in path:
|
|
31
54
|
continue
|
|
32
55
|
severity = issue.get("issue_severity", "LOW")
|
|
33
|
-
files.setdefault(path, []).append(
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
56
|
+
files.setdefault(path, []).append(
|
|
57
|
+
{
|
|
58
|
+
"line": issue.get("line_number", 0),
|
|
59
|
+
"code": issue.get("test_id", ""),
|
|
60
|
+
"severity": severity,
|
|
61
|
+
"confidence": issue.get("issue_confidence", ""),
|
|
62
|
+
"message": issue.get("issue_text", ""),
|
|
63
|
+
}
|
|
64
|
+
)
|
|
40
65
|
weighted += _SEVERITY_WEIGHT.get(severity, 1)
|
|
41
66
|
|
|
42
67
|
score = score_logistic_variant(weighted, scale_factor=10)
|
|
43
68
|
return ToolResult(raw=raw_result, metrics={"security": score}, details=files)
|
|
44
69
|
|
|
45
|
-
def format_llm_message(
|
|
46
|
-
|
|
70
|
+
def format_llm_message(
|
|
71
|
+
self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
|
|
72
|
+
) -> str:
|
|
73
|
+
"""Formats the bandit result into a LLM-friendly message string."""
|
|
74
|
+
|
|
75
|
+
result = extract_first_issue(tr.details)
|
|
76
|
+
if result is None:
|
|
47
77
|
return "bandit reported issues (no details available)"
|
|
48
|
-
file,
|
|
49
|
-
issue = issues[0]
|
|
78
|
+
file, issue = result
|
|
50
79
|
line = issue.get("line", "?")
|
|
51
80
|
code = issue.get("code", "")
|
|
52
81
|
severity = issue.get("severity", "")
|
|
53
82
|
message = issue.get("message", "")
|
|
54
|
-
return f"
|
|
83
|
+
return f"{file}:{line} - {code}: [{severity}] {message}{format_source_context(file, line, count=context_lines)}"
|
py_cq/parsers/common.py
CHANGED
|
@@ -12,21 +12,26 @@ performance metrics or error scores:
|
|
|
12
12
|
Both functions return a float and can be used directly in downstream analytics,
|
|
13
13
|
visualisation or decision-making pipelines."""
|
|
14
14
|
|
|
15
|
+
import json
|
|
16
|
+
import re
|
|
15
17
|
from pathlib import Path
|
|
16
18
|
|
|
17
19
|
|
|
18
20
|
def read_source_lines(file_path: str, line: int, count: int = 5) -> str:
|
|
19
21
|
"""Return up to `count` source lines starting at the given 1-based line number."""
|
|
20
|
-
from pathlib import Path
|
|
21
22
|
try:
|
|
22
|
-
all_lines =
|
|
23
|
+
all_lines = (
|
|
24
|
+
Path(file_path).read_text(encoding="utf-8", errors="replace").splitlines()
|
|
25
|
+
)
|
|
23
26
|
start = max(0, line - 1)
|
|
24
27
|
return "\n".join(all_lines[start : start + count])
|
|
25
|
-
except OSError:
|
|
28
|
+
except (OSError, ValueError):
|
|
26
29
|
return ""
|
|
27
30
|
|
|
28
31
|
|
|
29
|
-
def format_source_context(
|
|
32
|
+
def format_source_context(
|
|
33
|
+
file: str, line: int | str, context: int = 3, count: int = 8
|
|
34
|
+
) -> str:
|
|
30
35
|
"""Return a fenced python code block for the source around `line`, or '' if unavailable.
|
|
31
36
|
|
|
32
37
|
Stops before spilling into the next top-level ``def`` or ``class`` definition.
|
|
@@ -51,12 +56,39 @@ def format_source_context(file: str, line: int | str, context: int = 3, count: i
|
|
|
51
56
|
return f"\n```python\n{src}\n```"
|
|
52
57
|
|
|
53
58
|
|
|
54
|
-
_PYTHON_KEYWORDS = frozenset(
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
_PYTHON_KEYWORDS = frozenset(
|
|
60
|
+
[
|
|
61
|
+
"if",
|
|
62
|
+
"elif",
|
|
63
|
+
"else",
|
|
64
|
+
"for",
|
|
65
|
+
"while",
|
|
66
|
+
"with",
|
|
67
|
+
"assert",
|
|
68
|
+
"return",
|
|
69
|
+
"raise",
|
|
70
|
+
"import",
|
|
71
|
+
"from",
|
|
72
|
+
"class",
|
|
73
|
+
"def",
|
|
74
|
+
"lambda",
|
|
75
|
+
"yield",
|
|
76
|
+
"del",
|
|
77
|
+
"pass",
|
|
78
|
+
"break",
|
|
79
|
+
"continue",
|
|
80
|
+
"not",
|
|
81
|
+
"and",
|
|
82
|
+
"or",
|
|
83
|
+
"in",
|
|
84
|
+
"is",
|
|
85
|
+
"print",
|
|
86
|
+
"super",
|
|
87
|
+
"type",
|
|
88
|
+
"len",
|
|
89
|
+
"range",
|
|
90
|
+
]
|
|
91
|
+
)
|
|
60
92
|
|
|
61
93
|
|
|
62
94
|
def extract_callee_name(source_line: str) -> str | None:
|
|
@@ -66,19 +98,17 @@ def extract_callee_name(source_line: str) -> str | None:
|
|
|
66
98
|
``func`` rather than the variable on the left. Python keywords and
|
|
67
99
|
built-ins listed in ``_PYTHON_KEYWORDS`` are excluded.
|
|
68
100
|
"""
|
|
69
|
-
import re
|
|
70
101
|
stripped = source_line.strip()
|
|
71
102
|
rhs = stripped
|
|
72
103
|
if "=" in stripped and not stripped.startswith(("assert", "return")):
|
|
73
104
|
rhs = stripped.split("=", 1)[1].strip()
|
|
74
105
|
m = re.search(r"\b([a-zA-Z_]\w*)\s*\(", rhs)
|
|
75
|
-
if m and m.group(1) not in _PYTHON_KEYWORDS:
|
|
106
|
+
if m and m.group(1) not in _PYTHON_KEYWORDS and len(m.group(1)) > 1:
|
|
76
107
|
return m.group(1)
|
|
77
108
|
return None
|
|
78
109
|
|
|
79
110
|
|
|
80
111
|
def _find_project_root(hint_file: str) -> Path:
|
|
81
|
-
from pathlib import Path
|
|
82
112
|
root = Path(hint_file).resolve().parent
|
|
83
113
|
current = root
|
|
84
114
|
for _ in range(8):
|
|
@@ -91,17 +121,32 @@ def _find_project_root(hint_file: str) -> Path:
|
|
|
91
121
|
return root
|
|
92
122
|
|
|
93
123
|
|
|
94
|
-
|
|
124
|
+
_SKIP_DIRS = {
|
|
125
|
+
".venv",
|
|
126
|
+
"venv",
|
|
127
|
+
"__pycache__",
|
|
128
|
+
".git",
|
|
129
|
+
"node_modules",
|
|
130
|
+
".tox",
|
|
131
|
+
"dist",
|
|
132
|
+
"build",
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def find_in_project(
|
|
137
|
+
func_name: str, hint_file: str, max_lines: int = 10
|
|
138
|
+
) -> tuple[str, str]:
|
|
95
139
|
"""Find func_name definition in project files; same file first, then project-wide.
|
|
96
140
|
|
|
97
141
|
Returns ``(file_path, code_block)`` for the first match, or ``("", "")`` if not found.
|
|
98
142
|
"""
|
|
99
|
-
from pathlib import Path
|
|
100
143
|
result = find_function_source(hint_file, func_name, max_lines=max_lines)
|
|
101
144
|
if result:
|
|
102
145
|
return hint_file, result
|
|
103
146
|
root = _find_project_root(hint_file)
|
|
104
147
|
for py_file in sorted(root.rglob("*.py")):
|
|
148
|
+
if any(part in _SKIP_DIRS for part in py_file.parts):
|
|
149
|
+
continue
|
|
105
150
|
if py_file.resolve() == Path(hint_file).resolve():
|
|
106
151
|
continue
|
|
107
152
|
r = find_function_source(str(py_file), func_name, max_lines=max_lines)
|
|
@@ -111,12 +156,20 @@ def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tupl
|
|
|
111
156
|
|
|
112
157
|
|
|
113
158
|
def _relative_path(path: str) -> str:
|
|
114
|
-
"""Return path relative to
|
|
115
|
-
from pathlib import Path
|
|
159
|
+
"""Return path relative to project root if possible, otherwise absolute. Forward slashes."""
|
|
116
160
|
try:
|
|
117
|
-
|
|
118
|
-
except ValueError:
|
|
161
|
+
resolved = Path(path).resolve()
|
|
162
|
+
except (OSError, ValueError):
|
|
119
163
|
return path.replace("\\", "/")
|
|
164
|
+
try:
|
|
165
|
+
return resolved.relative_to(_find_project_root(path)).as_posix()
|
|
166
|
+
except ValueError:
|
|
167
|
+
return resolved.as_posix()
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def format_issue_header(file: str, line: int, code: str, message: str) -> str:
|
|
171
|
+
"""Return a clean single-line issue header: path:line - CODE: message."""
|
|
172
|
+
return f"{_relative_path(file)}:{line} - {code}: {message}"
|
|
120
173
|
|
|
121
174
|
|
|
122
175
|
def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -> str:
|
|
@@ -124,13 +177,12 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
|
|
|
124
177
|
|
|
125
178
|
Output format::
|
|
126
179
|
|
|
127
|
-
Callee `func_name`
|
|
180
|
+
Callee `func_name` - `relative/path/to/file.py`
|
|
128
181
|
```python
|
|
129
182
|
N: def func_name(...):
|
|
130
183
|
...
|
|
131
184
|
```
|
|
132
185
|
"""
|
|
133
|
-
import re
|
|
134
186
|
callee_file, code_block = find_in_project(func_name, hint_file, max_lines=max_lines)
|
|
135
187
|
if not code_block:
|
|
136
188
|
return ""
|
|
@@ -139,14 +191,66 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
|
|
|
139
191
|
return f"\n`{func_name}` is defined at: `{_relative_path(callee_file)}{line_ref}`{code_block}"
|
|
140
192
|
|
|
141
193
|
|
|
194
|
+
def enclosing_function_range(file: str, line: int) -> tuple[int, int] | None:
|
|
195
|
+
"""Return (start_line, end_line) 1-based for the function enclosing `line`, or None."""
|
|
196
|
+
try:
|
|
197
|
+
all_lines = Path(file).read_text(encoding="utf-8").splitlines()
|
|
198
|
+
except (OSError, ValueError):
|
|
199
|
+
return None
|
|
200
|
+
if line < 1 or line > len(all_lines):
|
|
201
|
+
return None
|
|
202
|
+
target_idx = line - 1
|
|
203
|
+
target_indent = len(all_lines[target_idx]) - len(all_lines[target_idx].lstrip())
|
|
204
|
+
def_re = re.compile(r"^(\s*)(?:async\s+)?def\s+")
|
|
205
|
+
start_idx = baseline_indent = None
|
|
206
|
+
for i in range(target_idx - 1, -1, -1):
|
|
207
|
+
m = def_re.match(all_lines[i])
|
|
208
|
+
if m:
|
|
209
|
+
indent = len(m.group(1))
|
|
210
|
+
if indent < target_indent:
|
|
211
|
+
start_idx, baseline_indent = i, indent
|
|
212
|
+
break
|
|
213
|
+
if start_idx is None or baseline_indent is None:
|
|
214
|
+
return None
|
|
215
|
+
end_idx = start_idx
|
|
216
|
+
in_body = ":" in all_lines[start_idx].split("#")[0]
|
|
217
|
+
for i, ln in enumerate(all_lines[start_idx + 1 :], start=start_idx + 1):
|
|
218
|
+
stripped = ln.lstrip()
|
|
219
|
+
if not in_body:
|
|
220
|
+
if ":" in ln.split("#")[0]:
|
|
221
|
+
in_body = True
|
|
222
|
+
end_idx = i
|
|
223
|
+
else:
|
|
224
|
+
if stripped and len(ln) - len(stripped) <= baseline_indent:
|
|
225
|
+
break
|
|
226
|
+
end_idx = i
|
|
227
|
+
return (start_idx + 1, end_idx + 1)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def find_enclosing_function(file: str, line: int, max_lines: int = 50) -> str:
|
|
231
|
+
"""Return a fenced python block for the function enclosing 1-based `line`, or '' if not found."""
|
|
232
|
+
r = enclosing_function_range(file, line)
|
|
233
|
+
if r is None:
|
|
234
|
+
return ""
|
|
235
|
+
start_line, end_line = r
|
|
236
|
+
try:
|
|
237
|
+
all_lines = Path(file).read_text(encoding="utf-8").splitlines()
|
|
238
|
+
except (OSError, ValueError):
|
|
239
|
+
return ""
|
|
240
|
+
start_idx = start_line - 1
|
|
241
|
+
collected = list(all_lines[start_idx : min(end_line, start_idx + max_lines)])
|
|
242
|
+
while collected and not collected[-1].strip():
|
|
243
|
+
collected.pop()
|
|
244
|
+
numbered = "\n".join(f"{start_line + i}: {ln}" for i, ln in enumerate(collected))
|
|
245
|
+
return f"\n```python\n{numbered}\n```"
|
|
246
|
+
|
|
247
|
+
|
|
142
248
|
def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
|
|
143
249
|
"""Return a fenced python block for the body of func_name, or '' if unavailable."""
|
|
144
|
-
from pathlib import Path
|
|
145
250
|
try:
|
|
146
251
|
all_lines = Path(file).read_text(encoding="utf-8").splitlines()
|
|
147
|
-
except OSError:
|
|
252
|
+
except (OSError, ValueError):
|
|
148
253
|
return ""
|
|
149
|
-
import re
|
|
150
254
|
pattern = re.compile(rf"^(\s*)(?:async\s+)?def\s+{re.escape(func_name)}\s*\(")
|
|
151
255
|
match_result: tuple[int, int] | None = None
|
|
152
256
|
for i, line in enumerate(all_lines):
|
|
@@ -158,11 +262,30 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
|
|
|
158
262
|
return ""
|
|
159
263
|
start_idx, baseline_indent = match_result
|
|
160
264
|
collected = [all_lines[start_idx]]
|
|
265
|
+
in_docstring = False
|
|
266
|
+
docstring_marker: str | None = None
|
|
267
|
+
past_docstring = False
|
|
161
268
|
for line in all_lines[start_idx + 1 :]:
|
|
162
269
|
stripped = line.lstrip()
|
|
163
270
|
indent = len(line) - len(stripped)
|
|
164
271
|
if stripped and indent <= baseline_indent:
|
|
165
272
|
break
|
|
273
|
+
if not past_docstring:
|
|
274
|
+
if not in_docstring:
|
|
275
|
+
quote = next(
|
|
276
|
+
(q for q in ('"""', "'''") if stripped.startswith(q)), None
|
|
277
|
+
)
|
|
278
|
+
if quote:
|
|
279
|
+
in_docstring = quote not in stripped[3:]
|
|
280
|
+
past_docstring = not in_docstring
|
|
281
|
+
docstring_marker = quote
|
|
282
|
+
continue
|
|
283
|
+
past_docstring = bool(stripped)
|
|
284
|
+
else:
|
|
285
|
+
if docstring_marker and docstring_marker in stripped:
|
|
286
|
+
in_docstring = False
|
|
287
|
+
past_docstring = True
|
|
288
|
+
continue
|
|
166
289
|
collected.append(line)
|
|
167
290
|
if len(collected) >= max_lines:
|
|
168
291
|
break
|
|
@@ -172,8 +295,47 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
|
|
|
172
295
|
return f"\n```python\n{numbered}\n```"
|
|
173
296
|
|
|
174
297
|
|
|
298
|
+
def resolve_path(base: str, rel_file: str) -> str:
|
|
299
|
+
"""Return (base / rel_file) as a posix string; return rel_file unchanged if absolute or base is empty."""
|
|
300
|
+
if not base or not rel_file:
|
|
301
|
+
return rel_file
|
|
302
|
+
try:
|
|
303
|
+
p = Path(rel_file)
|
|
304
|
+
if p.is_absolute():
|
|
305
|
+
return rel_file
|
|
306
|
+
return (Path(base) / rel_file).as_posix()
|
|
307
|
+
except (OSError, ValueError):
|
|
308
|
+
return rel_file
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def parse_json_dict(stdout: str) -> dict | None:
|
|
312
|
+
"""Parse stdout as a JSON object; return None if invalid or not a dict."""
|
|
313
|
+
try:
|
|
314
|
+
data = json.loads(stdout)
|
|
315
|
+
except (json.JSONDecodeError, ValueError):
|
|
316
|
+
return None
|
|
317
|
+
return data if isinstance(data, dict) else None
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def extract_first_issue(details: dict) -> tuple[str, dict] | None:
|
|
321
|
+
"""Return (file, issue) from the first list-typed entry in details, or None."""
|
|
322
|
+
if not details:
|
|
323
|
+
return None
|
|
324
|
+
file, issues = next(iter(details.items()))
|
|
325
|
+
if not isinstance(issues, list) or not issues:
|
|
326
|
+
return None
|
|
327
|
+
issue = issues[0]
|
|
328
|
+
return (file, issue) if isinstance(issue, dict) else None
|
|
329
|
+
|
|
330
|
+
|
|
175
331
|
def inv_normalize(value: float, max_value: float) -> float:
|
|
176
|
-
"""Returns the inverse normalized value of `value` relative to `max_value`.
|
|
332
|
+
"""Returns the inverse normalized value of `value` relative to `max_value`.
|
|
333
|
+
|
|
334
|
+
When *max_value* is zero the result is defined as 1.0 (no deviation from a
|
|
335
|
+
zero-sized reference).
|
|
336
|
+
"""
|
|
337
|
+
if max_value == 0:
|
|
338
|
+
return 1.0
|
|
177
339
|
return (max_value - min(value, max_value)) / max_value
|
|
178
340
|
|
|
179
341
|
|
py_cq/parsers/compileparser.py
CHANGED
|
@@ -26,7 +26,7 @@ class CompileParser(AbstractParser):
|
|
|
26
26
|
events and error messages. For each file that emits an error, it extracts
|
|
27
27
|
the line number, source snippet, error type, and help text, normalizes the
|
|
28
28
|
file path, and stores this information in a dictionary keyed by file path.
|
|
29
|
-
It then computes a failure ratio (failed files
|
|
29
|
+
It then computes a failure ratio (failed files / total compilations) and
|
|
30
30
|
derives a compile score via ``score_logistic_variant``. The original
|
|
31
31
|
``stdout`` is cleaned of ``Listing`` lines and back-slash path separators
|
|
32
32
|
are replaced with forward slashes. A ``ToolResult`` containing the raw
|
|
@@ -64,7 +64,10 @@ class CompileParser(AbstractParser):
|
|
|
64
64
|
compilations += 1
|
|
65
65
|
elif line.startswith("*** File "):
|
|
66
66
|
# This indicates a compilation error
|
|
67
|
-
|
|
67
|
+
parts = line.split('"')
|
|
68
|
+
if len(parts) < 2:
|
|
69
|
+
continue
|
|
70
|
+
file_path = parts[1]
|
|
68
71
|
current_error = {"file": file_path, "error": line}
|
|
69
72
|
elif current_error and line.strip():
|
|
70
73
|
# Append additional error context
|
|
@@ -80,17 +83,21 @@ class CompileParser(AbstractParser):
|
|
|
80
83
|
error_info = {}
|
|
81
84
|
# Extract line number if present
|
|
82
85
|
if "line " in error_lines[0]:
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
+
try:
|
|
87
|
+
error_info["line"] = int(
|
|
88
|
+
error_lines[0].split("line ")[1].split(",")[0]
|
|
89
|
+
)
|
|
90
|
+
except ValueError:
|
|
91
|
+
pass
|
|
86
92
|
# Get source code context if available
|
|
87
93
|
if len(error_lines) > 1:
|
|
88
94
|
error_info["src"] = error_lines[1].strip()
|
|
89
95
|
if len(error_lines) > 3:
|
|
90
96
|
if "Error:" in error_lines[3]:
|
|
91
97
|
error_parts = error_lines[3].split(":")
|
|
98
|
+
type_tokens = error_parts[0].strip().split()
|
|
92
99
|
error_info["type"] = (
|
|
93
|
-
|
|
100
|
+
type_tokens[-1] if type_tokens else "Unknown"
|
|
94
101
|
) # Gets "SyntaxError"
|
|
95
102
|
error_info["help"] = ",".join(
|
|
96
103
|
error_parts[1:]
|
|
@@ -115,7 +122,9 @@ class CompileParser(AbstractParser):
|
|
|
115
122
|
tr.details["failed_files"] = failed_files
|
|
116
123
|
return tr
|
|
117
124
|
|
|
118
|
-
def format_llm_message(
|
|
125
|
+
def format_llm_message(
|
|
126
|
+
self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
|
|
127
|
+
) -> str:
|
|
119
128
|
"""Return the first compilation failure as a defect description."""
|
|
120
129
|
failed = tr.details.get("failed_files", {})
|
|
121
130
|
if not failed:
|
|
@@ -124,12 +133,15 @@ class CompileParser(AbstractParser):
|
|
|
124
133
|
line = info.get("line", "?")
|
|
125
134
|
typ = info.get("type", "Error")
|
|
126
135
|
help_msg = info.get("help", "")
|
|
127
|
-
code_block = format_source_context(file, line, count=context_lines) or (
|
|
136
|
+
code_block = format_source_context(file, line, count=context_lines) or (
|
|
137
|
+
f"\n```python\n{info['src']}\n```" if info.get("src") else ""
|
|
138
|
+
)
|
|
128
139
|
callee = ""
|
|
129
140
|
src_line = info.get("src", "")
|
|
130
141
|
if src_line:
|
|
131
142
|
from py_cq.parsers.common import extract_callee_name, format_callee_context
|
|
143
|
+
|
|
132
144
|
func_name = extract_callee_name(src_line)
|
|
133
145
|
if func_name:
|
|
134
146
|
callee = format_callee_context(func_name, file)
|
|
135
|
-
return f"
|
|
147
|
+
return f"{file}:{line} - {typ}: {help_msg}{code_block}{callee}"
|
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
"""Provides a `ComplexityParser` that converts raw complexity-analysis output into structured `ToolResult` objects for downstream use."""
|
|
2
2
|
|
|
3
|
-
import json
|
|
4
|
-
|
|
5
3
|
from py_cq.localtypes import AbstractParser, RawResult, ToolResult
|
|
6
|
-
from py_cq.parsers.common import
|
|
4
|
+
from py_cq.parsers.common import (
|
|
5
|
+
find_function_source,
|
|
6
|
+
parse_json_dict,
|
|
7
|
+
score_logistic_variant,
|
|
8
|
+
)
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
class ComplexityParser(AbstractParser):
|
|
@@ -64,7 +66,10 @@ class ComplexityParser(AbstractParser):
|
|
|
64
66
|
>>> result.metrics["simplicity"]
|
|
65
67
|
0.4"""
|
|
66
68
|
tr = ToolResult(raw=raw_result)
|
|
67
|
-
data =
|
|
69
|
+
data = parse_json_dict(raw_result.stdout)
|
|
70
|
+
if data is None:
|
|
71
|
+
tr.metrics["simplicity"] = 0.0
|
|
72
|
+
return tr
|
|
68
73
|
score = 0
|
|
69
74
|
num_items = 0
|
|
70
75
|
max_complexity = 30
|
|
@@ -84,3 +89,34 @@ class ComplexityParser(AbstractParser):
|
|
|
84
89
|
}
|
|
85
90
|
tr.metrics["simplicity"] = score / num_items if num_items > 0 else 0.0
|
|
86
91
|
return tr
|
|
92
|
+
|
|
93
|
+
def format_llm_message(
|
|
94
|
+
self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
|
|
95
|
+
) -> str:
|
|
96
|
+
"""Formats the LLM message based on the tool result."""
|
|
97
|
+
worst_file = worst_func = worst_rank = None
|
|
98
|
+
worst_score = 1.0
|
|
99
|
+
for file, funcs in tr.details.items():
|
|
100
|
+
if not isinstance(funcs, dict):
|
|
101
|
+
continue
|
|
102
|
+
for func_name, data in funcs.items():
|
|
103
|
+
score = data.get("simplicity", 1.0)
|
|
104
|
+
if score < worst_score:
|
|
105
|
+
worst_score = score
|
|
106
|
+
worst_file = file
|
|
107
|
+
worst_func = func_name
|
|
108
|
+
worst_rank = data.get("rank", "F")
|
|
109
|
+
if worst_file is None or worst_func is None:
|
|
110
|
+
if tr.metrics:
|
|
111
|
+
metric_name, value = next(iter(tr.metrics.items()))
|
|
112
|
+
return f"**{metric_name}** score: {value:.3f}"
|
|
113
|
+
return "No complexity details available"
|
|
114
|
+
source = find_function_source(worst_file, worst_func, max_lines=context_lines)
|
|
115
|
+
header = f"`{worst_file}::{worst_func}` - cyclomatic complexity rank **{worst_rank}**"
|
|
116
|
+
parts = [header]
|
|
117
|
+
if source:
|
|
118
|
+
parts.append(source)
|
|
119
|
+
parts.append(
|
|
120
|
+
"Cyclomatic complexity is too high. Break this function into smaller, single-purpose helpers."
|
|
121
|
+
)
|
|
122
|
+
return "\n\n".join(parts)
|