python-code-quality 0.1.16__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,16 @@ logistic-variant score stored under the ``security`` metric key.
7
7
  """
8
8
 
9
9
  import json
10
+ import logging
10
11
 
11
12
  from py_cq.localtypes import AbstractParser, RawResult, ToolResult
12
- from py_cq.parsers.common import format_source_context, score_logistic_variant
13
+ from py_cq.parsers.common import (
14
+ extract_first_issue,
15
+ format_source_context,
16
+ score_logistic_variant,
17
+ )
18
+
19
+ log = logging.getLogger("cq")
13
20
 
14
21
  _SEVERITY_WEIGHT = {"HIGH": 5, "MEDIUM": 2, "LOW": 1}
15
22
 
@@ -18,12 +25,26 @@ class BanditParser(AbstractParser):
18
25
  """Parses raw JSON output from ``bandit -f json`` into a ToolResult."""
19
26
 
20
27
  def parse(self, raw_result: RawResult) -> ToolResult:
28
+ """Parses the raw bandit JSON output into a ToolResult."""
21
29
  try:
22
30
  data = json.loads(raw_result.stdout)
23
31
  except (json.JSONDecodeError, ValueError):
24
- return ToolResult(raw=raw_result, metrics={"security": 1.0})
32
+ log.warning(
33
+ "bandit output is not valid JSON (return_code=%s). Reporting degraded score.",
34
+ raw_result.return_code,
35
+ )
36
+ degraded = 0.0 if raw_result.return_code != 0 else 0.5
37
+ return ToolResult(raw=raw_result, metrics={"security": degraded})
25
38
  if not isinstance(data, dict):
26
- return ToolResult(raw=raw_result, metrics={"security": 1.0})
39
+ log.warning("bandit output is not a JSON object. Reporting degraded score.")
40
+ return ToolResult(raw=raw_result, metrics={"security": 0.5})
41
+
42
+ totals = data.get("metrics", {}).get("_totals", {})
43
+ log.debug(
44
+ "bandit scanned %d LOC across %d files",
45
+ totals.get("loc", 0),
46
+ len(data.get("metrics", {})) - 1,
47
+ )
27
48
 
28
49
  files: dict[str, list] = {}
29
50
  weighted = 0
@@ -32,29 +53,31 @@ class BanditParser(AbstractParser):
32
53
  if "/.venv/" in path or "/site-packages/" in path:
33
54
  continue
34
55
  severity = issue.get("issue_severity", "LOW")
35
- files.setdefault(path, []).append({
36
- "line": issue.get("line_number", 0),
37
- "code": issue.get("test_id", ""),
38
- "severity": severity,
39
- "confidence": issue.get("issue_confidence", ""),
40
- "message": issue.get("issue_text", ""),
41
- })
56
+ files.setdefault(path, []).append(
57
+ {
58
+ "line": issue.get("line_number", 0),
59
+ "code": issue.get("test_id", ""),
60
+ "severity": severity,
61
+ "confidence": issue.get("issue_confidence", ""),
62
+ "message": issue.get("issue_text", ""),
63
+ }
64
+ )
42
65
  weighted += _SEVERITY_WEIGHT.get(severity, 1)
43
66
 
44
67
  score = score_logistic_variant(weighted, scale_factor=10)
45
68
  return ToolResult(raw=raw_result, metrics={"security": score}, details=files)
46
69
 
47
- def format_llm_message(self, tr: ToolResult, *, context_lines: int = 15) -> str:
48
- if not tr.details:
49
- return "bandit reported issues (no details available)"
50
- file, issues = next(iter(tr.details.items()))
51
- if not isinstance(issues, list) or not issues:
52
- return "bandit reported issues (no details available)"
53
- issue = issues[0]
54
- if not isinstance(issue, dict):
70
+ def format_llm_message(
71
+ self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
72
+ ) -> str:
73
+ """Formats the bandit result into a LLM-friendly message string."""
74
+
75
+ result = extract_first_issue(tr.details)
76
+ if result is None:
55
77
  return "bandit reported issues (no details available)"
78
+ file, issue = result
56
79
  line = issue.get("line", "?")
57
80
  code = issue.get("code", "")
58
81
  severity = issue.get("severity", "")
59
82
  message = issue.get("message", "")
60
- return f"`{file}:{line}` **{code}** [{severity}]: {message}{format_source_context(file, line, count=context_lines)}"
83
+ return f"{file}:{line} - {code}: [{severity}] {message}{format_source_context(file, line, count=context_lines)}"
py_cq/parsers/common.py CHANGED
@@ -12,6 +12,7 @@ performance metrics or error scores:
12
12
  Both functions return a float and can be used directly in downstream analytics,
13
13
  visualisation or decision-making pipelines."""
14
14
 
15
+ import json
15
16
  import re
16
17
  from pathlib import Path
17
18
 
@@ -19,14 +20,18 @@ from pathlib import Path
19
20
  def read_source_lines(file_path: str, line: int, count: int = 5) -> str:
20
21
  """Return up to `count` source lines starting at the given 1-based line number."""
21
22
  try:
22
- all_lines = Path(file_path).read_text(encoding="utf-8").splitlines()
23
+ all_lines = (
24
+ Path(file_path).read_text(encoding="utf-8", errors="replace").splitlines()
25
+ )
23
26
  start = max(0, line - 1)
24
27
  return "\n".join(all_lines[start : start + count])
25
28
  except (OSError, ValueError):
26
29
  return ""
27
30
 
28
31
 
29
- def format_source_context(file: str, line: int | str, context: int = 3, count: int = 8) -> str:
32
+ def format_source_context(
33
+ file: str, line: int | str, context: int = 3, count: int = 8
34
+ ) -> str:
30
35
  """Return a fenced python code block for the source around `line`, or '' if unavailable.
31
36
 
32
37
  Stops before spilling into the next top-level ``def`` or ``class`` definition.
@@ -51,12 +56,39 @@ def format_source_context(file: str, line: int | str, context: int = 3, count: i
51
56
  return f"\n```python\n{src}\n```"
52
57
 
53
58
 
54
- _PYTHON_KEYWORDS = frozenset([
55
- "if", "elif", "else", "for", "while", "with", "assert", "return",
56
- "raise", "import", "from", "class", "def", "lambda", "yield",
57
- "del", "pass", "break", "continue", "not", "and", "or", "in", "is",
58
- "print", "super", "type", "len", "range",
59
- ])
59
+ _PYTHON_KEYWORDS = frozenset(
60
+ [
61
+ "if",
62
+ "elif",
63
+ "else",
64
+ "for",
65
+ "while",
66
+ "with",
67
+ "assert",
68
+ "return",
69
+ "raise",
70
+ "import",
71
+ "from",
72
+ "class",
73
+ "def",
74
+ "lambda",
75
+ "yield",
76
+ "del",
77
+ "pass",
78
+ "break",
79
+ "continue",
80
+ "not",
81
+ "and",
82
+ "or",
83
+ "in",
84
+ "is",
85
+ "print",
86
+ "super",
87
+ "type",
88
+ "len",
89
+ "range",
90
+ ]
91
+ )
60
92
 
61
93
 
62
94
  def extract_callee_name(source_line: str) -> str | None:
@@ -71,7 +103,7 @@ def extract_callee_name(source_line: str) -> str | None:
71
103
  if "=" in stripped and not stripped.startswith(("assert", "return")):
72
104
  rhs = stripped.split("=", 1)[1].strip()
73
105
  m = re.search(r"\b([a-zA-Z_]\w*)\s*\(", rhs)
74
- if m and m.group(1) not in _PYTHON_KEYWORDS:
106
+ if m and m.group(1) not in _PYTHON_KEYWORDS and len(m.group(1)) > 1:
75
107
  return m.group(1)
76
108
  return None
77
109
 
@@ -89,7 +121,21 @@ def _find_project_root(hint_file: str) -> Path:
89
121
  return root
90
122
 
91
123
 
92
- def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tuple[str, str]:
124
+ _SKIP_DIRS = {
125
+ ".venv",
126
+ "venv",
127
+ "__pycache__",
128
+ ".git",
129
+ "node_modules",
130
+ ".tox",
131
+ "dist",
132
+ "build",
133
+ }
134
+
135
+
136
+ def find_in_project(
137
+ func_name: str, hint_file: str, max_lines: int = 10
138
+ ) -> tuple[str, str]:
93
139
  """Find func_name definition in project files; same file first, then project-wide.
94
140
 
95
141
  Returns ``(file_path, code_block)`` for the first match, or ``("", "")`` if not found.
@@ -99,6 +145,8 @@ def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tupl
99
145
  return hint_file, result
100
146
  root = _find_project_root(hint_file)
101
147
  for py_file in sorted(root.rglob("*.py")):
148
+ if any(part in _SKIP_DIRS for part in py_file.parts):
149
+ continue
102
150
  if py_file.resolve() == Path(hint_file).resolve():
103
151
  continue
104
152
  r = find_function_source(str(py_file), func_name, max_lines=max_lines)
@@ -108,11 +156,20 @@ def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tupl
108
156
 
109
157
 
110
158
  def _relative_path(path: str) -> str:
111
- """Return path relative to cwd, normalised to forward slashes."""
159
+ """Return path relative to project root if possible, otherwise absolute. Forward slashes."""
112
160
  try:
113
- return str(Path(path).relative_to(Path.cwd())).replace("\\", "/")
114
- except ValueError:
161
+ resolved = Path(path).resolve()
162
+ except (OSError, ValueError):
115
163
  return path.replace("\\", "/")
164
+ try:
165
+ return resolved.relative_to(_find_project_root(path)).as_posix()
166
+ except ValueError:
167
+ return resolved.as_posix()
168
+
169
+
170
+ def format_issue_header(file: str, line: int, code: str, message: str) -> str:
171
+ """Return a clean single-line issue header: path:line - CODE: message."""
172
+ return f"{_relative_path(file)}:{line} - {code}: {message}"
116
173
 
117
174
 
118
175
  def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -> str:
@@ -120,7 +177,7 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
120
177
 
121
178
  Output format::
122
179
 
123
- Callee `func_name` `relative/path/to/file.py`
180
+ Callee `func_name` - `relative/path/to/file.py`
124
181
  ```python
125
182
  N: def func_name(...):
126
183
  ...
@@ -134,6 +191,60 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
134
191
  return f"\n`{func_name}` is defined at: `{_relative_path(callee_file)}{line_ref}`{code_block}"
135
192
 
136
193
 
194
+ def enclosing_function_range(file: str, line: int) -> tuple[int, int] | None:
195
+ """Return (start_line, end_line) 1-based for the function enclosing `line`, or None."""
196
+ try:
197
+ all_lines = Path(file).read_text(encoding="utf-8").splitlines()
198
+ except (OSError, ValueError):
199
+ return None
200
+ if line < 1 or line > len(all_lines):
201
+ return None
202
+ target_idx = line - 1
203
+ target_indent = len(all_lines[target_idx]) - len(all_lines[target_idx].lstrip())
204
+ def_re = re.compile(r"^(\s*)(?:async\s+)?def\s+")
205
+ start_idx = baseline_indent = None
206
+ for i in range(target_idx - 1, -1, -1):
207
+ m = def_re.match(all_lines[i])
208
+ if m:
209
+ indent = len(m.group(1))
210
+ if indent < target_indent:
211
+ start_idx, baseline_indent = i, indent
212
+ break
213
+ if start_idx is None or baseline_indent is None:
214
+ return None
215
+ end_idx = start_idx
216
+ in_body = ":" in all_lines[start_idx].split("#")[0]
217
+ for i, ln in enumerate(all_lines[start_idx + 1 :], start=start_idx + 1):
218
+ stripped = ln.lstrip()
219
+ if not in_body:
220
+ if ":" in ln.split("#")[0]:
221
+ in_body = True
222
+ end_idx = i
223
+ else:
224
+ if stripped and len(ln) - len(stripped) <= baseline_indent:
225
+ break
226
+ end_idx = i
227
+ return (start_idx + 1, end_idx + 1)
228
+
229
+
230
+ def find_enclosing_function(file: str, line: int, max_lines: int = 50) -> str:
231
+ """Return a fenced python block for the function enclosing 1-based `line`, or '' if not found."""
232
+ r = enclosing_function_range(file, line)
233
+ if r is None:
234
+ return ""
235
+ start_line, end_line = r
236
+ try:
237
+ all_lines = Path(file).read_text(encoding="utf-8").splitlines()
238
+ except (OSError, ValueError):
239
+ return ""
240
+ start_idx = start_line - 1
241
+ collected = list(all_lines[start_idx : min(end_line, start_idx + max_lines)])
242
+ while collected and not collected[-1].strip():
243
+ collected.pop()
244
+ numbered = "\n".join(f"{start_line + i}: {ln}" for i, ln in enumerate(collected))
245
+ return f"\n```python\n{numbered}\n```"
246
+
247
+
137
248
  def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
138
249
  """Return a fenced python block for the body of func_name, or '' if unavailable."""
139
250
  try:
@@ -151,11 +262,30 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
151
262
  return ""
152
263
  start_idx, baseline_indent = match_result
153
264
  collected = [all_lines[start_idx]]
265
+ in_docstring = False
266
+ docstring_marker: str | None = None
267
+ past_docstring = False
154
268
  for line in all_lines[start_idx + 1 :]:
155
269
  stripped = line.lstrip()
156
270
  indent = len(line) - len(stripped)
157
271
  if stripped and indent <= baseline_indent:
158
272
  break
273
+ if not past_docstring:
274
+ if not in_docstring:
275
+ quote = next(
276
+ (q for q in ('"""', "'''") if stripped.startswith(q)), None
277
+ )
278
+ if quote:
279
+ in_docstring = quote not in stripped[3:]
280
+ past_docstring = not in_docstring
281
+ docstring_marker = quote
282
+ continue
283
+ past_docstring = bool(stripped)
284
+ else:
285
+ if docstring_marker and docstring_marker in stripped:
286
+ in_docstring = False
287
+ past_docstring = True
288
+ continue
159
289
  collected.append(line)
160
290
  if len(collected) >= max_lines:
161
291
  break
@@ -165,8 +295,47 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
165
295
  return f"\n```python\n{numbered}\n```"
166
296
 
167
297
 
298
+ def resolve_path(base: str, rel_file: str) -> str:
299
+ """Return (base / rel_file) as a posix string; return rel_file unchanged if absolute or base is empty."""
300
+ if not base or not rel_file:
301
+ return rel_file
302
+ try:
303
+ p = Path(rel_file)
304
+ if p.is_absolute():
305
+ return rel_file
306
+ return (Path(base) / rel_file).as_posix()
307
+ except (OSError, ValueError):
308
+ return rel_file
309
+
310
+
311
+ def parse_json_dict(stdout: str) -> dict | None:
312
+ """Parse stdout as a JSON object; return None if invalid or not a dict."""
313
+ try:
314
+ data = json.loads(stdout)
315
+ except (json.JSONDecodeError, ValueError):
316
+ return None
317
+ return data if isinstance(data, dict) else None
318
+
319
+
320
+ def extract_first_issue(details: dict) -> tuple[str, dict] | None:
321
+ """Return (file, issue) from the first list-typed entry in details, or None."""
322
+ if not details:
323
+ return None
324
+ file, issues = next(iter(details.items()))
325
+ if not isinstance(issues, list) or not issues:
326
+ return None
327
+ issue = issues[0]
328
+ return (file, issue) if isinstance(issue, dict) else None
329
+
330
+
168
331
  def inv_normalize(value: float, max_value: float) -> float:
169
- """Returns the inverse normalized value of `value` relative to `max_value`."""
332
+ """Returns the inverse normalized value of `value` relative to `max_value`.
333
+
334
+ When *max_value* is zero the result is defined as 1.0 (no deviation from a
335
+ zero-sized reference).
336
+ """
337
+ if max_value == 0:
338
+ return 1.0
170
339
  return (max_value - min(value, max_value)) / max_value
171
340
 
172
341
 
@@ -26,7 +26,7 @@ class CompileParser(AbstractParser):
26
26
  events and error messages. For each file that emits an error, it extracts
27
27
  the line number, source snippet, error type, and help text, normalizes the
28
28
  file path, and stores this information in a dictionary keyed by file path.
29
- It then computes a failure ratio (failed files ÷ total compilations) and
29
+ It then computes a failure ratio (failed files / total compilations) and
30
30
  derives a compile score via ``score_logistic_variant``. The original
31
31
  ``stdout`` is cleaned of ``Listing`` lines and back-slash path separators
32
32
  are replaced with forward slashes. A ``ToolResult`` containing the raw
@@ -122,7 +122,9 @@ class CompileParser(AbstractParser):
122
122
  tr.details["failed_files"] = failed_files
123
123
  return tr
124
124
 
125
- def format_llm_message(self, tr: ToolResult, *, context_lines: int = 15) -> str:
125
+ def format_llm_message(
126
+ self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
127
+ ) -> str:
126
128
  """Return the first compilation failure as a defect description."""
127
129
  failed = tr.details.get("failed_files", {})
128
130
  if not failed:
@@ -131,12 +133,15 @@ class CompileParser(AbstractParser):
131
133
  line = info.get("line", "?")
132
134
  typ = info.get("type", "Error")
133
135
  help_msg = info.get("help", "")
134
- code_block = format_source_context(file, line, count=context_lines) or (f"\n```python\n{info['src']}\n```" if info.get("src") else "")
136
+ code_block = format_source_context(file, line, count=context_lines) or (
137
+ f"\n```python\n{info['src']}\n```" if info.get("src") else ""
138
+ )
135
139
  callee = ""
136
140
  src_line = info.get("src", "")
137
141
  if src_line:
138
142
  from py_cq.parsers.common import extract_callee_name, format_callee_context
143
+
139
144
  func_name = extract_callee_name(src_line)
140
145
  if func_name:
141
146
  callee = format_callee_context(func_name, file)
142
- return f"`{file}:{line}` **{typ}**: {help_msg}{code_block}{callee}"
147
+ return f"{file}:{line} - {typ}: {help_msg}{code_block}{callee}"
@@ -1,9 +1,11 @@
1
1
  """Provides a `ComplexityParser` that converts raw complexity-analysis output into structured `ToolResult` objects for downstream use."""
2
2
 
3
- import json
4
-
5
3
  from py_cq.localtypes import AbstractParser, RawResult, ToolResult
6
- from py_cq.parsers.common import score_logistic_variant
4
+ from py_cq.parsers.common import (
5
+ find_function_source,
6
+ parse_json_dict,
7
+ score_logistic_variant,
8
+ )
7
9
 
8
10
 
9
11
  class ComplexityParser(AbstractParser):
@@ -64,12 +66,8 @@ class ComplexityParser(AbstractParser):
64
66
  >>> result.metrics["simplicity"]
65
67
  0.4"""
66
68
  tr = ToolResult(raw=raw_result)
67
- try:
68
- data = json.loads(raw_result.stdout)
69
- except (json.JSONDecodeError, ValueError):
70
- tr.metrics["simplicity"] = 0.0
71
- return tr
72
- if not isinstance(data, dict):
69
+ data = parse_json_dict(raw_result.stdout)
70
+ if data is None:
73
71
  tr.metrics["simplicity"] = 0.0
74
72
  return tr
75
73
  score = 0
@@ -91,3 +89,34 @@ class ComplexityParser(AbstractParser):
91
89
  }
92
90
  tr.metrics["simplicity"] = score / num_items if num_items > 0 else 0.0
93
91
  return tr
92
+
93
+ def format_llm_message(
94
+ self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
95
+ ) -> str:
96
+ """Formats the LLM message based on the tool result."""
97
+ worst_file = worst_func = worst_rank = None
98
+ worst_score = 1.0
99
+ for file, funcs in tr.details.items():
100
+ if not isinstance(funcs, dict):
101
+ continue
102
+ for func_name, data in funcs.items():
103
+ score = data.get("simplicity", 1.0)
104
+ if score < worst_score:
105
+ worst_score = score
106
+ worst_file = file
107
+ worst_func = func_name
108
+ worst_rank = data.get("rank", "F")
109
+ if worst_file is None or worst_func is None:
110
+ if tr.metrics:
111
+ metric_name, value = next(iter(tr.metrics.items()))
112
+ return f"**{metric_name}** score: {value:.3f}"
113
+ return "No complexity details available"
114
+ source = find_function_source(worst_file, worst_func, max_lines=context_lines)
115
+ header = f"`{worst_file}::{worst_func}` - cyclomatic complexity rank **{worst_rank}**"
116
+ parts = [header]
117
+ if source:
118
+ parts.append(source)
119
+ parts.append(
120
+ "Cyclomatic complexity is too high. Break this function into smaller, single-purpose helpers."
121
+ )
122
+ return "\n\n".join(parts)