python-code-quality 0.1.15__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,9 +7,16 @@ logistic-variant score stored under the ``security`` metric key.
7
7
  """
8
8
 
9
9
  import json
10
+ import logging
10
11
 
11
12
  from py_cq.localtypes import AbstractParser, RawResult, ToolResult
12
- from py_cq.parsers.common import format_source_context, score_logistic_variant
13
+ from py_cq.parsers.common import (
14
+ extract_first_issue,
15
+ format_source_context,
16
+ score_logistic_variant,
17
+ )
18
+
19
+ log = logging.getLogger("cq")
13
20
 
14
21
  _SEVERITY_WEIGHT = {"HIGH": 5, "MEDIUM": 2, "LOW": 1}
15
22
 
@@ -18,10 +25,26 @@ class BanditParser(AbstractParser):
18
25
  """Parses raw JSON output from ``bandit -f json`` into a ToolResult."""
19
26
 
20
27
  def parse(self, raw_result: RawResult) -> ToolResult:
28
+ """Parses the raw bandit JSON output into a ToolResult."""
21
29
  try:
22
30
  data = json.loads(raw_result.stdout)
23
31
  except (json.JSONDecodeError, ValueError):
24
- return ToolResult(raw=raw_result, metrics={"security": 1.0})
32
+ log.warning(
33
+ "bandit output is not valid JSON (return_code=%s). Reporting degraded score.",
34
+ raw_result.return_code,
35
+ )
36
+ degraded = 0.0 if raw_result.return_code != 0 else 0.5
37
+ return ToolResult(raw=raw_result, metrics={"security": degraded})
38
+ if not isinstance(data, dict):
39
+ log.warning("bandit output is not a JSON object. Reporting degraded score.")
40
+ return ToolResult(raw=raw_result, metrics={"security": 0.5})
41
+
42
+ totals = data.get("metrics", {}).get("_totals", {})
43
+ log.debug(
44
+ "bandit scanned %d LOC across %d files",
45
+ totals.get("loc", 0),
46
+ len(data.get("metrics", {})) - 1,
47
+ )
25
48
 
26
49
  files: dict[str, list] = {}
27
50
  weighted = 0
@@ -30,25 +53,31 @@ class BanditParser(AbstractParser):
30
53
  if "/.venv/" in path or "/site-packages/" in path:
31
54
  continue
32
55
  severity = issue.get("issue_severity", "LOW")
33
- files.setdefault(path, []).append({
34
- "line": issue.get("line_number", 0),
35
- "code": issue.get("test_id", ""),
36
- "severity": severity,
37
- "confidence": issue.get("issue_confidence", ""),
38
- "message": issue.get("issue_text", ""),
39
- })
56
+ files.setdefault(path, []).append(
57
+ {
58
+ "line": issue.get("line_number", 0),
59
+ "code": issue.get("test_id", ""),
60
+ "severity": severity,
61
+ "confidence": issue.get("issue_confidence", ""),
62
+ "message": issue.get("issue_text", ""),
63
+ }
64
+ )
40
65
  weighted += _SEVERITY_WEIGHT.get(severity, 1)
41
66
 
42
67
  score = score_logistic_variant(weighted, scale_factor=10)
43
68
  return ToolResult(raw=raw_result, metrics={"security": score}, details=files)
44
69
 
45
- def format_llm_message(self, tr: ToolResult, *, context_lines: int = 15) -> str:
46
- if not tr.details:
70
+ def format_llm_message(
71
+ self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
72
+ ) -> str:
73
+ """Formats the bandit result into a LLM-friendly message string."""
74
+
75
+ result = extract_first_issue(tr.details)
76
+ if result is None:
47
77
  return "bandit reported issues (no details available)"
48
- file, issues = next(iter(tr.details.items()))
49
- issue = issues[0]
78
+ file, issue = result
50
79
  line = issue.get("line", "?")
51
80
  code = issue.get("code", "")
52
81
  severity = issue.get("severity", "")
53
82
  message = issue.get("message", "")
54
- return f"`{file}:{line}` **{code}** [{severity}]: {message}{format_source_context(file, line, count=context_lines)}"
83
+ return f"{file}:{line} - {code}: [{severity}] {message}{format_source_context(file, line, count=context_lines)}"
py_cq/parsers/common.py CHANGED
@@ -12,21 +12,26 @@ performance metrics or error scores:
12
12
  Both functions return a float and can be used directly in downstream analytics,
13
13
  visualisation or decision-making pipelines."""
14
14
 
15
+ import json
16
+ import re
15
17
  from pathlib import Path
16
18
 
17
19
 
18
20
  def read_source_lines(file_path: str, line: int, count: int = 5) -> str:
19
21
  """Return up to `count` source lines starting at the given 1-based line number."""
20
- from pathlib import Path
21
22
  try:
22
- all_lines = Path(file_path).read_text(encoding="utf-8").splitlines()
23
+ all_lines = (
24
+ Path(file_path).read_text(encoding="utf-8", errors="replace").splitlines()
25
+ )
23
26
  start = max(0, line - 1)
24
27
  return "\n".join(all_lines[start : start + count])
25
- except OSError:
28
+ except (OSError, ValueError):
26
29
  return ""
27
30
 
28
31
 
29
- def format_source_context(file: str, line: int | str, context: int = 3, count: int = 8) -> str:
32
+ def format_source_context(
33
+ file: str, line: int | str, context: int = 3, count: int = 8
34
+ ) -> str:
30
35
  """Return a fenced python code block for the source around `line`, or '' if unavailable.
31
36
 
32
37
  Stops before spilling into the next top-level ``def`` or ``class`` definition.
@@ -51,12 +56,39 @@ def format_source_context(file: str, line: int | str, context: int = 3, count: i
51
56
  return f"\n```python\n{src}\n```"
52
57
 
53
58
 
54
- _PYTHON_KEYWORDS = frozenset([
55
- "if", "elif", "else", "for", "while", "with", "assert", "return",
56
- "raise", "import", "from", "class", "def", "lambda", "yield",
57
- "del", "pass", "break", "continue", "not", "and", "or", "in", "is",
58
- "print", "super", "type", "len", "range",
59
- ])
59
+ _PYTHON_KEYWORDS = frozenset(
60
+ [
61
+ "if",
62
+ "elif",
63
+ "else",
64
+ "for",
65
+ "while",
66
+ "with",
67
+ "assert",
68
+ "return",
69
+ "raise",
70
+ "import",
71
+ "from",
72
+ "class",
73
+ "def",
74
+ "lambda",
75
+ "yield",
76
+ "del",
77
+ "pass",
78
+ "break",
79
+ "continue",
80
+ "not",
81
+ "and",
82
+ "or",
83
+ "in",
84
+ "is",
85
+ "print",
86
+ "super",
87
+ "type",
88
+ "len",
89
+ "range",
90
+ ]
91
+ )
60
92
 
61
93
 
62
94
  def extract_callee_name(source_line: str) -> str | None:
@@ -66,19 +98,17 @@ def extract_callee_name(source_line: str) -> str | None:
66
98
  ``func`` rather than the variable on the left. Python keywords and
67
99
  built-ins listed in ``_PYTHON_KEYWORDS`` are excluded.
68
100
  """
69
- import re
70
101
  stripped = source_line.strip()
71
102
  rhs = stripped
72
103
  if "=" in stripped and not stripped.startswith(("assert", "return")):
73
104
  rhs = stripped.split("=", 1)[1].strip()
74
105
  m = re.search(r"\b([a-zA-Z_]\w*)\s*\(", rhs)
75
- if m and m.group(1) not in _PYTHON_KEYWORDS:
106
+ if m and m.group(1) not in _PYTHON_KEYWORDS and len(m.group(1)) > 1:
76
107
  return m.group(1)
77
108
  return None
78
109
 
79
110
 
80
111
  def _find_project_root(hint_file: str) -> Path:
81
- from pathlib import Path
82
112
  root = Path(hint_file).resolve().parent
83
113
  current = root
84
114
  for _ in range(8):
@@ -91,17 +121,32 @@ def _find_project_root(hint_file: str) -> Path:
91
121
  return root
92
122
 
93
123
 
94
- def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tuple[str, str]:
124
+ _SKIP_DIRS = {
125
+ ".venv",
126
+ "venv",
127
+ "__pycache__",
128
+ ".git",
129
+ "node_modules",
130
+ ".tox",
131
+ "dist",
132
+ "build",
133
+ }
134
+
135
+
136
+ def find_in_project(
137
+ func_name: str, hint_file: str, max_lines: int = 10
138
+ ) -> tuple[str, str]:
95
139
  """Find func_name definition in project files; same file first, then project-wide.
96
140
 
97
141
  Returns ``(file_path, code_block)`` for the first match, or ``("", "")`` if not found.
98
142
  """
99
- from pathlib import Path
100
143
  result = find_function_source(hint_file, func_name, max_lines=max_lines)
101
144
  if result:
102
145
  return hint_file, result
103
146
  root = _find_project_root(hint_file)
104
147
  for py_file in sorted(root.rglob("*.py")):
148
+ if any(part in _SKIP_DIRS for part in py_file.parts):
149
+ continue
105
150
  if py_file.resolve() == Path(hint_file).resolve():
106
151
  continue
107
152
  r = find_function_source(str(py_file), func_name, max_lines=max_lines)
@@ -111,12 +156,20 @@ def find_in_project(func_name: str, hint_file: str, max_lines: int = 10) -> tupl
111
156
 
112
157
 
113
158
  def _relative_path(path: str) -> str:
114
- """Return path relative to cwd, normalised to forward slashes."""
115
- from pathlib import Path
159
+ """Return path relative to project root if possible, otherwise absolute. Forward slashes."""
116
160
  try:
117
- return str(Path(path).relative_to(Path.cwd())).replace("\\", "/")
118
- except ValueError:
161
+ resolved = Path(path).resolve()
162
+ except (OSError, ValueError):
119
163
  return path.replace("\\", "/")
164
+ try:
165
+ return resolved.relative_to(_find_project_root(path)).as_posix()
166
+ except ValueError:
167
+ return resolved.as_posix()
168
+
169
+
170
+ def format_issue_header(file: str, line: int, code: str, message: str) -> str:
171
+ """Return a clean single-line issue header: path:line - CODE: message."""
172
+ return f"{_relative_path(file)}:{line} - {code}: {message}"
120
173
 
121
174
 
122
175
  def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -> str:
@@ -124,13 +177,12 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
124
177
 
125
178
  Output format::
126
179
 
127
- Callee `func_name` `relative/path/to/file.py`
180
+ Callee `func_name` - `relative/path/to/file.py`
128
181
  ```python
129
182
  N: def func_name(...):
130
183
  ...
131
184
  ```
132
185
  """
133
- import re
134
186
  callee_file, code_block = find_in_project(func_name, hint_file, max_lines=max_lines)
135
187
  if not code_block:
136
188
  return ""
@@ -139,14 +191,66 @@ def format_callee_context(func_name: str, hint_file: str, max_lines: int = 10) -
139
191
  return f"\n`{func_name}` is defined at: `{_relative_path(callee_file)}{line_ref}`{code_block}"
140
192
 
141
193
 
194
+ def enclosing_function_range(file: str, line: int) -> tuple[int, int] | None:
195
+ """Return (start_line, end_line) 1-based for the function enclosing `line`, or None."""
196
+ try:
197
+ all_lines = Path(file).read_text(encoding="utf-8").splitlines()
198
+ except (OSError, ValueError):
199
+ return None
200
+ if line < 1 or line > len(all_lines):
201
+ return None
202
+ target_idx = line - 1
203
+ target_indent = len(all_lines[target_idx]) - len(all_lines[target_idx].lstrip())
204
+ def_re = re.compile(r"^(\s*)(?:async\s+)?def\s+")
205
+ start_idx = baseline_indent = None
206
+ for i in range(target_idx - 1, -1, -1):
207
+ m = def_re.match(all_lines[i])
208
+ if m:
209
+ indent = len(m.group(1))
210
+ if indent < target_indent:
211
+ start_idx, baseline_indent = i, indent
212
+ break
213
+ if start_idx is None or baseline_indent is None:
214
+ return None
215
+ end_idx = start_idx
216
+ in_body = ":" in all_lines[start_idx].split("#")[0]
217
+ for i, ln in enumerate(all_lines[start_idx + 1 :], start=start_idx + 1):
218
+ stripped = ln.lstrip()
219
+ if not in_body:
220
+ if ":" in ln.split("#")[0]:
221
+ in_body = True
222
+ end_idx = i
223
+ else:
224
+ if stripped and len(ln) - len(stripped) <= baseline_indent:
225
+ break
226
+ end_idx = i
227
+ return (start_idx + 1, end_idx + 1)
228
+
229
+
230
+ def find_enclosing_function(file: str, line: int, max_lines: int = 50) -> str:
231
+ """Return a fenced python block for the function enclosing 1-based `line`, or '' if not found."""
232
+ r = enclosing_function_range(file, line)
233
+ if r is None:
234
+ return ""
235
+ start_line, end_line = r
236
+ try:
237
+ all_lines = Path(file).read_text(encoding="utf-8").splitlines()
238
+ except (OSError, ValueError):
239
+ return ""
240
+ start_idx = start_line - 1
241
+ collected = list(all_lines[start_idx : min(end_line, start_idx + max_lines)])
242
+ while collected and not collected[-1].strip():
243
+ collected.pop()
244
+ numbered = "\n".join(f"{start_line + i}: {ln}" for i, ln in enumerate(collected))
245
+ return f"\n```python\n{numbered}\n```"
246
+
247
+
142
248
  def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
143
249
  """Return a fenced python block for the body of func_name, or '' if unavailable."""
144
- from pathlib import Path
145
250
  try:
146
251
  all_lines = Path(file).read_text(encoding="utf-8").splitlines()
147
- except OSError:
252
+ except (OSError, ValueError):
148
253
  return ""
149
- import re
150
254
  pattern = re.compile(rf"^(\s*)(?:async\s+)?def\s+{re.escape(func_name)}\s*\(")
151
255
  match_result: tuple[int, int] | None = None
152
256
  for i, line in enumerate(all_lines):
@@ -158,11 +262,30 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
158
262
  return ""
159
263
  start_idx, baseline_indent = match_result
160
264
  collected = [all_lines[start_idx]]
265
+ in_docstring = False
266
+ docstring_marker: str | None = None
267
+ past_docstring = False
161
268
  for line in all_lines[start_idx + 1 :]:
162
269
  stripped = line.lstrip()
163
270
  indent = len(line) - len(stripped)
164
271
  if stripped and indent <= baseline_indent:
165
272
  break
273
+ if not past_docstring:
274
+ if not in_docstring:
275
+ quote = next(
276
+ (q for q in ('"""', "'''") if stripped.startswith(q)), None
277
+ )
278
+ if quote:
279
+ in_docstring = quote not in stripped[3:]
280
+ past_docstring = not in_docstring
281
+ docstring_marker = quote
282
+ continue
283
+ past_docstring = bool(stripped)
284
+ else:
285
+ if docstring_marker and docstring_marker in stripped:
286
+ in_docstring = False
287
+ past_docstring = True
288
+ continue
166
289
  collected.append(line)
167
290
  if len(collected) >= max_lines:
168
291
  break
@@ -172,8 +295,47 @@ def find_function_source(file: str, func_name: str, max_lines: int = 15) -> str:
172
295
  return f"\n```python\n{numbered}\n```"
173
296
 
174
297
 
298
+ def resolve_path(base: str, rel_file: str) -> str:
299
+ """Return (base / rel_file) as a posix string; return rel_file unchanged if absolute or base is empty."""
300
+ if not base or not rel_file:
301
+ return rel_file
302
+ try:
303
+ p = Path(rel_file)
304
+ if p.is_absolute():
305
+ return rel_file
306
+ return (Path(base) / rel_file).as_posix()
307
+ except (OSError, ValueError):
308
+ return rel_file
309
+
310
+
311
+ def parse_json_dict(stdout: str) -> dict | None:
312
+ """Parse stdout as a JSON object; return None if invalid or not a dict."""
313
+ try:
314
+ data = json.loads(stdout)
315
+ except (json.JSONDecodeError, ValueError):
316
+ return None
317
+ return data if isinstance(data, dict) else None
318
+
319
+
320
+ def extract_first_issue(details: dict) -> tuple[str, dict] | None:
321
+ """Return (file, issue) from the first list-typed entry in details, or None."""
322
+ if not details:
323
+ return None
324
+ file, issues = next(iter(details.items()))
325
+ if not isinstance(issues, list) or not issues:
326
+ return None
327
+ issue = issues[0]
328
+ return (file, issue) if isinstance(issue, dict) else None
329
+
330
+
175
331
  def inv_normalize(value: float, max_value: float) -> float:
176
- """Returns the inverse normalized value of `value` relative to `max_value`."""
332
+ """Returns the inverse normalized value of `value` relative to `max_value`.
333
+
334
+ When *max_value* is zero the result is defined as 1.0 (no deviation from a
335
+ zero-sized reference).
336
+ """
337
+ if max_value == 0:
338
+ return 1.0
177
339
  return (max_value - min(value, max_value)) / max_value
178
340
 
179
341
 
@@ -26,7 +26,7 @@ class CompileParser(AbstractParser):
26
26
  events and error messages. For each file that emits an error, it extracts
27
27
  the line number, source snippet, error type, and help text, normalizes the
28
28
  file path, and stores this information in a dictionary keyed by file path.
29
- It then computes a failure ratio (failed files ÷ total compilations) and
29
+ It then computes a failure ratio (failed files / total compilations) and
30
30
  derives a compile score via ``score_logistic_variant``. The original
31
31
  ``stdout`` is cleaned of ``Listing`` lines and back-slash path separators
32
32
  are replaced with forward slashes. A ``ToolResult`` containing the raw
@@ -64,7 +64,10 @@ class CompileParser(AbstractParser):
64
64
  compilations += 1
65
65
  elif line.startswith("*** File "):
66
66
  # This indicates a compilation error
67
- file_path = line.split('"')[1]
67
+ parts = line.split('"')
68
+ if len(parts) < 2:
69
+ continue
70
+ file_path = parts[1]
68
71
  current_error = {"file": file_path, "error": line}
69
72
  elif current_error and line.strip():
70
73
  # Append additional error context
@@ -80,17 +83,21 @@ class CompileParser(AbstractParser):
80
83
  error_info = {}
81
84
  # Extract line number if present
82
85
  if "line " in error_lines[0]:
83
- error_info["line"] = int(
84
- error_lines[0].split("line ")[1].split(",")[0]
85
- )
86
+ try:
87
+ error_info["line"] = int(
88
+ error_lines[0].split("line ")[1].split(",")[0]
89
+ )
90
+ except ValueError:
91
+ pass
86
92
  # Get source code context if available
87
93
  if len(error_lines) > 1:
88
94
  error_info["src"] = error_lines[1].strip()
89
95
  if len(error_lines) > 3:
90
96
  if "Error:" in error_lines[3]:
91
97
  error_parts = error_lines[3].split(":")
98
+ type_tokens = error_parts[0].strip().split()
92
99
  error_info["type"] = (
93
- error_parts[0].strip().split()[-1]
100
+ type_tokens[-1] if type_tokens else "Unknown"
94
101
  ) # Gets "SyntaxError"
95
102
  error_info["help"] = ",".join(
96
103
  error_parts[1:]
@@ -115,7 +122,9 @@ class CompileParser(AbstractParser):
115
122
  tr.details["failed_files"] = failed_files
116
123
  return tr
117
124
 
118
- def format_llm_message(self, tr: ToolResult, *, context_lines: int = 15) -> str:
125
+ def format_llm_message(
126
+ self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
127
+ ) -> str:
119
128
  """Return the first compilation failure as a defect description."""
120
129
  failed = tr.details.get("failed_files", {})
121
130
  if not failed:
@@ -124,12 +133,15 @@ class CompileParser(AbstractParser):
124
133
  line = info.get("line", "?")
125
134
  typ = info.get("type", "Error")
126
135
  help_msg = info.get("help", "")
127
- code_block = format_source_context(file, line, count=context_lines) or (f"\n```python\n{info['src']}\n```" if info.get("src") else "")
136
+ code_block = format_source_context(file, line, count=context_lines) or (
137
+ f"\n```python\n{info['src']}\n```" if info.get("src") else ""
138
+ )
128
139
  callee = ""
129
140
  src_line = info.get("src", "")
130
141
  if src_line:
131
142
  from py_cq.parsers.common import extract_callee_name, format_callee_context
143
+
132
144
  func_name = extract_callee_name(src_line)
133
145
  if func_name:
134
146
  callee = format_callee_context(func_name, file)
135
- return f"`{file}:{line}` **{typ}**: {help_msg}{code_block}{callee}"
147
+ return f"{file}:{line} - {typ}: {help_msg}{code_block}{callee}"
@@ -1,9 +1,11 @@
1
1
  """Provides a `ComplexityParser` that converts raw complexity-analysis output into structured `ToolResult` objects for downstream use."""
2
2
 
3
- import json
4
-
5
3
  from py_cq.localtypes import AbstractParser, RawResult, ToolResult
6
- from py_cq.parsers.common import score_logistic_variant
4
+ from py_cq.parsers.common import (
5
+ find_function_source,
6
+ parse_json_dict,
7
+ score_logistic_variant,
8
+ )
7
9
 
8
10
 
9
11
  class ComplexityParser(AbstractParser):
@@ -64,7 +66,10 @@ class ComplexityParser(AbstractParser):
64
66
  >>> result.metrics["simplicity"]
65
67
  0.4"""
66
68
  tr = ToolResult(raw=raw_result)
67
- data = json.loads(raw_result.stdout)
69
+ data = parse_json_dict(raw_result.stdout)
70
+ if data is None:
71
+ tr.metrics["simplicity"] = 0.0
72
+ return tr
68
73
  score = 0
69
74
  num_items = 0
70
75
  max_complexity = 30
@@ -84,3 +89,34 @@ class ComplexityParser(AbstractParser):
84
89
  }
85
90
  tr.metrics["simplicity"] = score / num_items if num_items > 0 else 0.0
86
91
  return tr
92
+
93
+ def format_llm_message(
94
+ self, tr: ToolResult, *, context_lines: int = 15, limit: int = 1
95
+ ) -> str:
96
+ """Formats the LLM message based on the tool result."""
97
+ worst_file = worst_func = worst_rank = None
98
+ worst_score = 1.0
99
+ for file, funcs in tr.details.items():
100
+ if not isinstance(funcs, dict):
101
+ continue
102
+ for func_name, data in funcs.items():
103
+ score = data.get("simplicity", 1.0)
104
+ if score < worst_score:
105
+ worst_score = score
106
+ worst_file = file
107
+ worst_func = func_name
108
+ worst_rank = data.get("rank", "F")
109
+ if worst_file is None or worst_func is None:
110
+ if tr.metrics:
111
+ metric_name, value = next(iter(tr.metrics.items()))
112
+ return f"**{metric_name}** score: {value:.3f}"
113
+ return "No complexity details available"
114
+ source = find_function_source(worst_file, worst_func, max_lines=context_lines)
115
+ header = f"`{worst_file}::{worst_func}` - cyclomatic complexity rank **{worst_rank}**"
116
+ parts = [header]
117
+ if source:
118
+ parts.append(source)
119
+ parts.append(
120
+ "Cyclomatic complexity is too high. Break this function into smaller, single-purpose helpers."
121
+ )
122
+ return "\n\n".join(parts)