qgis-plugin-analyzer 1.4.0__py3-none-any.whl → 1.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. analyzer/__init__.py +2 -1
  2. analyzer/cli/__init__.py +14 -0
  3. analyzer/cli/app.py +147 -0
  4. analyzer/cli/base.py +93 -0
  5. analyzer/cli/commands/__init__.py +19 -0
  6. analyzer/cli/commands/analyze.py +47 -0
  7. analyzer/cli/commands/fix.py +58 -0
  8. analyzer/cli/commands/init.py +41 -0
  9. analyzer/cli/commands/list_rules.py +41 -0
  10. analyzer/cli/commands/security.py +46 -0
  11. analyzer/cli/commands/summary.py +52 -0
  12. analyzer/cli/commands/version.py +41 -0
  13. analyzer/cli.py +4 -281
  14. analyzer/commands.py +163 -0
  15. analyzer/engine.py +491 -245
  16. analyzer/fixer.py +206 -130
  17. analyzer/reporters/markdown_reporter.py +88 -14
  18. analyzer/reporters/summary_reporter.py +226 -49
  19. analyzer/rules/qgis_rules.py +3 -1
  20. analyzer/scanner.py +219 -711
  21. analyzer/secrets.py +84 -0
  22. analyzer/security_checker.py +85 -0
  23. analyzer/security_rules.py +127 -0
  24. analyzer/transformers.py +29 -8
  25. analyzer/utils/__init__.py +2 -0
  26. analyzer/utils/path_utils.py +53 -1
  27. analyzer/validators.py +90 -55
  28. analyzer/visitors/__init__.py +19 -0
  29. analyzer/visitors/base.py +75 -0
  30. analyzer/visitors/composite_visitor.py +73 -0
  31. analyzer/visitors/imports_visitor.py +85 -0
  32. analyzer/visitors/metrics_visitor.py +158 -0
  33. analyzer/visitors/security_visitor.py +52 -0
  34. analyzer/visitors/standards_visitor.py +284 -0
  35. {qgis_plugin_analyzer-1.4.0.dist-info → qgis_plugin_analyzer-1.6.0.dist-info}/METADATA +32 -10
  36. qgis_plugin_analyzer-1.6.0.dist-info/RECORD +52 -0
  37. {qgis_plugin_analyzer-1.4.0.dist-info → qgis_plugin_analyzer-1.6.0.dist-info}/WHEEL +1 -1
  38. qgis_plugin_analyzer-1.4.0.dist-info/RECORD +0 -30
  39. {qgis_plugin_analyzer-1.4.0.dist-info → qgis_plugin_analyzer-1.6.0.dist-info}/entry_points.txt +0 -0
  40. {qgis_plugin_analyzer-1.4.0.dist-info → qgis_plugin_analyzer-1.6.0.dist-info}/licenses/LICENSE +0 -0
  41. {qgis_plugin_analyzer-1.4.0.dist-info → qgis_plugin_analyzer-1.6.0.dist-info}/top_level.txt +0 -0
analyzer/scanner.py CHANGED
@@ -18,619 +18,66 @@
18
18
  # * *
19
19
  # ***************************************************************************/
20
20
 
21
+ """Module for scanning and auditing QGIS plugin Python files.
22
+
23
+ This module provides functionalities to analyze individual Python modules using AST,
24
+ check for security vulnerabilities, and audit against QGIS coding standards.
25
+ """
26
+
21
27
  import ast
22
28
  import pathlib
23
- import re
24
- from typing import Any, Dict, List, Optional
29
+ from typing import Any, Dict, List, Optional, TypedDict
25
30
 
26
- from .rules.qgis_rules import I18N_METHODS, get_qgis_audit_rules
31
+ from .rules.qgis_rules import get_qgis_audit_rules
32
+ from .secrets import SecretScanner
27
33
  from .utils.ast_utils import (
28
- calculate_complexity,
29
34
  calculate_module_complexity,
30
35
  check_main_guard,
31
36
  extract_classes_from_ast,
32
37
  extract_functions_from_ast,
33
38
  extract_imports_from_ast,
34
39
  )
40
+ from .visitors import QGISASTVisitor, QGISSecurityVisitor
35
41
 
42
+ # --- Types ---
36
43
 
37
- class QGISASTVisitor(ast.NodeVisitor):
38
- """AST visitor to detect QGIS-specific issues."""
39
-
40
- def __init__(self, rel_path: str, rules_config: Optional[Dict[str, Any]] = None) -> None:
41
- """Initializes the AST visitor for a specific file.
42
-
43
- Args:
44
- rel_path: Relative path to the file being analyzed.
45
- rules_config: Optional configuration for audit rules and severities.
46
- """
47
- self.rel_path = rel_path
48
- self.issues: List[Dict[str, Any]] = []
49
- self.rules_config = rules_config or {}
50
- self.class_methods_stack: List[Any] = [] # Actually Set[str] but allows flexibility
51
-
52
- # New metrics for research-based scoring
53
- self.docstring_styles: List[str] = [] # List of detected styles (Google, NumPy)
54
- self.type_hint_stats = {
55
- "total_parameters": 0,
56
- "annotated_parameters": 0,
57
- "has_return_hint": 0,
58
- "total_functions": 0,
59
- }
60
- self.docstring_stats = {"total_public_items": 0, "has_docstring": 0}
61
- self.i18n_methods = I18N_METHODS
62
-
63
- def _check_docstring_style(self, doc: Optional[str]) -> None:
64
- """Identifies Google or NumPy docstring styles within a string.
65
-
66
- Args:
67
- doc: The docstring content to analyze.
68
- """
69
- if not doc:
70
- return
71
- # Google: Args: or Returns: or Raises: as headers
72
- if re.search(r"\n\s*(Args|Returns|Raises|Yields):\s*\n", doc):
73
- self.docstring_styles.append("Google")
74
- # NumPy: Underlined headers
75
- elif re.search(r"\n(Parameters|Returns|Raises|Yields)\n\s*-{3,}", doc):
76
- self.docstring_styles.append("NumPy")
77
-
78
- def visit_Module(self, node: ast.Module) -> None:
79
- """Analyzes a module-level AST node for docstrings and other metrics.
80
-
81
- Args:
82
- node: The module node to analyze.
83
- """
84
- doc = ast.get_docstring(node)
85
- self.docstring_stats["total_public_items"] += 1
86
- if doc:
87
- self.docstring_stats["has_docstring"] += 1
88
- self._check_docstring_style(doc)
89
- elif self._should_report("MISSING_DOCSTRING"):
90
- self.issues.append(
91
- {
92
- "file": self.rel_path,
93
- "line": 1,
94
- "type": "MISSING_DOCSTRING",
95
- "severity": self._get_severity("MISSING_DOCSTRING"),
96
- "message": "Module is missing a docstring (PEP 257).",
97
- "code": "Module: " + self.rel_path,
98
- }
99
- )
100
- self.generic_visit(node)
101
-
102
- def _should_report(self, rule_id: str) -> bool:
103
- """Check if rule should be reported based on config."""
104
- severity = self.rules_config.get(rule_id, "warning")
105
- return bool(severity != "ignore")
106
-
107
- def _get_severity(self, rule_id: str) -> str:
108
- """Get configured severity for rule (maps to 'high', 'medium', 'low')."""
109
- config_severity = self.rules_config.get(rule_id, "warning")
110
- # Map config severity to internal severity
111
- severity_map = {
112
- "error": "high",
113
- "warning": "medium",
114
- "info": "low",
115
- }
116
- return severity_map.get(config_severity, "medium")
117
-
118
- def _check_obsolete_api(self, node: ast.Call) -> None:
119
- """Detects usage of obsolete QGIS APIs.
120
-
121
- Args:
122
- node: The function call node to analyze.
123
- """
124
- if isinstance(node.func, ast.Attribute) and node.func.attr == "writeAsVectorFormat":
125
- if self._should_report("OBSOLETE_API"):
126
- self.issues.append(
127
- {
128
- "file": self.rel_path,
129
- "line": node.lineno,
130
- "type": "OBSOLETE_API",
131
- "severity": self._get_severity("OBSOLETE_API"),
132
- "message": "Obsolete writeAsVectorFormat() usage. Use writeAsVectorFormatV3().",
133
- "code": ast.unparse(node),
134
- }
135
- )
136
-
137
- def _check_missing_i18n(self, node: ast.Call) -> None:
138
- """Detects untranslated UI strings in common PyQGIS methods.
139
-
140
- Args:
141
- node: The function call node to analyze.
142
- """
143
- if isinstance(node.func, ast.Attribute) and node.func.attr in self.i18n_methods:
144
- if (
145
- node.args
146
- and isinstance(node.args[0], ast.Constant)
147
- and isinstance(node.args[0].value, str)
148
- ):
149
- val = node.args[0].value
150
- if val.strip() and not val.startswith("%"):
151
- if self._should_report("MISSING_I18N"):
152
- self.issues.append(
153
- {
154
- "file": self.rel_path,
155
- "line": node.lineno,
156
- "type": "MISSING_I18N",
157
- "severity": self._get_severity("MISSING_I18N"),
158
- "message": f"Untranslated UI text string in '{node.func.attr}': '{val}'. Use self.tr().",
159
- "code": ast.unparse(node),
160
- }
161
- )
162
-
163
- def _check_missing_slot(self, node: ast.Call) -> None:
164
- """Heuristically detects potentially missing signal slots in signal connections.
165
-
166
- Args:
167
- node: The function call node to analyze.
168
- """
169
- if isinstance(node.func, ast.Attribute) and node.func.attr == "connect":
170
- if node.args:
171
- arg = node.args[0]
172
- if (
173
- isinstance(arg, ast.Attribute)
174
- and isinstance(arg.value, ast.Name)
175
- and arg.value.id == "self"
176
- ):
177
- slot = arg.attr
178
- if self.class_methods_stack:
179
- current_methods = self.class_methods_stack[-1]
180
- if slot not in current_methods:
181
- if self._should_report("POTENTIAL_MISSING_SLOT"):
182
- self.issues.append(
183
- {
184
- "file": self.rel_path,
185
- "line": node.lineno,
186
- "type": "POTENTIAL_MISSING_SLOT",
187
- "severity": self._get_severity("POTENTIAL_MISSING_SLOT"),
188
- "message": f"Connected slot 'self.{slot}' not found in class definitions. Verify it is defined or inherited.",
189
- }
190
- )
191
-
192
- def _check_unsafe_subprocess(self, node: ast.Call) -> None:
193
- """Detects potentially unsafe subprocess usage.
194
-
195
- Args:
196
- node: The function call node to analyze.
197
- """
198
- # Targets: subprocess.run, call, Popen, check_call, check_output
199
- is_subprocess = False
200
- if isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name):
201
- if node.func.value.id == "subprocess" and node.func.attr in {
202
- "run",
203
- "call",
204
- "Popen",
205
- "check_call",
206
- "check_output",
207
- }:
208
- is_subprocess = True
209
-
210
- if not is_subprocess:
211
- return
212
-
213
- # 1. Check for shell=True
214
- shell_true = False
215
- for kw in node.keywords:
216
- if kw.arg == "shell" and isinstance(kw.value, ast.Constant) and kw.value.value is True:
217
- shell_true = True
218
- break
219
-
220
- if shell_true:
221
- if self._should_report("UNSAFE_SUBPROCESS"):
222
- self.issues.append(
223
- {
224
- "file": self.rel_path,
225
- "line": node.lineno,
226
- "type": "UNSAFE_SUBPROCESS",
227
- "severity": self._get_severity("UNSAFE_SUBPROCESS"),
228
- "message": "Subprocess called with 'shell=True'. This is a security risk if input is unsanitized.",
229
- "code": ast.unparse(node),
230
- }
231
- )
232
- return
233
-
234
- # 2. Check for unquoted variable interpolation in the command string (heuristic)
235
- # If the first argument is a string (not a list) and contains % or {} or f-string
236
- if node.args:
237
- cmd_arg = node.args[0]
238
- if isinstance(cmd_arg, (ast.JoinedStr, ast.BinOp)):
239
- if self._should_report("UNSAFE_SUBPROCESS"):
240
- self.issues.append(
241
- {
242
- "file": self.rel_path,
243
- "line": node.lineno,
244
- "type": "UNSAFE_SUBPROCESS",
245
- "severity": self._get_severity("UNSAFE_SUBPROCESS"),
246
- "message": "Possible unquoted variable injection in subprocess command. Use a list of arguments instead.",
247
- "code": ast.unparse(node),
248
- }
249
- )
250
-
251
- def _check_blocking_network(self, node: ast.Call) -> None:
252
- """Detects synchronous network calls in UI-related files.
253
-
254
- Args:
255
- node: The function call node to analyze.
256
- """
257
- is_network = False
258
- # requests.get/post...
259
- if isinstance(node.func, ast.Attribute) and isinstance(node.func.value, ast.Name):
260
- if node.func.value.id == "requests" and node.func.attr in {
261
- "get",
262
- "post",
263
- "put",
264
- "delete",
265
- "patch",
266
- }:
267
- is_network = True
268
-
269
- # urllib.request.urlopen (can be deep)
270
- # Note: urllib.request.urlopen(...) or urlopen(...) if from urllib.request import urlopen
271
- # current AST logic check: urllib.request.urlopen
272
- if not is_network:
273
- attr_chain = []
274
- curr = node.func
275
- while isinstance(curr, ast.Attribute):
276
- attr_chain.append(curr.attr)
277
- curr = curr.value
278
- if isinstance(curr, ast.Name):
279
- attr_chain.append(curr.id)
280
-
281
- # Chain is reversed: ['urlopen', 'request', 'urllib']
282
- if attr_chain == ["urlopen", "request", "urllib"]:
283
- is_network = True
284
- elif attr_chain == ["urlopen"] and isinstance(node.func, ast.Name) and node.func.id == "urlopen":
285
- # This would need tracking imports, but let's stick to full path for now as per plan
286
- # Or check if it's just 'urlopen'
287
- is_network = True
288
-
289
- if not is_network:
290
- return
291
-
292
- # Check if it's a UI/GUI file
293
- is_ui_file = any(kw in self.rel_path.lower() for kw in ["gui", "ui", "dialog", "widget"])
294
-
295
- if is_ui_file:
296
- if self._should_report("BLOCKING_NETWORK_CALL"):
297
- self.issues.append(
298
- {
299
- "file": self.rel_path,
300
- "line": node.lineno,
301
- "type": "BLOCKING_NETWORK_CALL",
302
- "severity": self._get_severity("BLOCKING_NETWORK_CALL"),
303
- "message": "Synchronous network call detected in UI file. This will freeze QGIS. Use QgsTask or QNetworkAccessManager.",
304
- "code": ast.unparse(node),
305
- }
306
- )
307
-
308
- def visit_Call(self, node: ast.Call) -> None:
309
- """Analyzes function call nodes for multiple QGIS-specific rules.
310
-
311
- Args:
312
- node: The call node to analyze.
313
- """
314
- self._check_obsolete_api(node)
315
- self._check_missing_i18n(node)
316
- self._check_missing_slot(node)
317
- self._check_unsafe_subprocess(node)
318
- self._check_blocking_network(node)
319
- self.generic_visit(node)
320
-
321
- def visit_For(self, node: ast.For) -> None:
322
- """Analyzes loop nodes for performance (spatial indexing) and Pythonic patterns.
323
-
324
- Args:
325
- node: The loop node to analyze.
326
- """
327
- # Detect SPATIAL_INDEX (Looping features without filter)
328
- # Check if iterating over .getFeatures()
329
- if isinstance(node.iter, ast.Call) and isinstance(node.iter.func, ast.Attribute):
330
- if node.iter.func.attr == "getFeatures":
331
- # If getFeatures() has no arguments or is passed QgsFeatureRequest() with no filter,
332
- # it's potentially heavy.
333
- warn = False
334
- if not node.iter.args:
335
- warn = True
336
- elif len(node.iter.args) == 1:
337
- arg = node.iter.args[0]
338
- # Check if it's a blank QgsFeatureRequest()
339
- if (
340
- isinstance(arg, ast.Call)
341
- and isinstance(arg.func, ast.Name)
342
- and arg.func.id == "QgsFeatureRequest"
343
- ):
344
- if not arg.args and not arg.keywords:
345
- warn = True
346
-
347
- if warn and self._should_report("SPATIAL_INDEX"):
348
- self.issues.append(
349
- {
350
- "file": self.rel_path,
351
- "line": node.lineno,
352
- "type": "SPATIAL_INDEX",
353
- "severity": self._get_severity("SPATIAL_INDEX"),
354
- "message": "Iteration over features with getFeatures() and no filter. Use a spatial index and QgsFeatureRequest for large layers.",
355
- "code": ast.unparse(node.iter),
356
- }
357
- )
358
-
359
- # Non-Pythonic Loop Detection (check for manual counters like i += 1)
360
- for body_node in ast.walk(node):
361
- if isinstance(body_node, ast.AugAssign) and isinstance(body_node.op, ast.Add):
362
- if isinstance(body_node.target, ast.Name):
363
- if isinstance(body_node.value, ast.Constant) and body_node.value.value == 1:
364
- if self._should_report("NON_PYTHONIC_LOOP"):
365
- self.issues.append(
366
- {
367
- "file": self.rel_path,
368
- "line": body_node.lineno,
369
- "type": "NON_PYTHONIC_LOOP",
370
- "severity": self._get_severity("NON_PYTHONIC_LOOP"),
371
- "message": f"Manual counter '{body_node.target.id} += 1' detected inside loop. Use enumerate() instead.",
372
- "code": ast.unparse(body_node),
373
- }
374
- )
375
-
376
- self.generic_visit(node)
377
-
378
- def visit_ClassDef(self, node: ast.ClassDef) -> None:
379
- """Analyzes class definitions for mandatory methods and documentation.
380
-
381
- Args:
382
- node: The class definition node to analyze.
383
- """
384
- # Track methods defined in the current class context
385
- methods = {
386
- item.name
387
- for item in node.body
388
- if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef))
389
- }
390
- self.class_methods_stack.append(methods)
391
44
 
392
- # 3. Detect MANDATORY_CLEANUP
393
- # Simple check: if a class has initGui, it MUST have unload
394
- has_init_gui = any(
395
- isinstance(m, ast.FunctionDef) and m.name == "initGui" for m in node.body
396
- )
397
- has_unload = any(isinstance(m, ast.FunctionDef) and m.name == "unload" for m in node.body)
398
-
399
- if has_init_gui and not has_unload:
400
- if self._should_report("MANDATORY_CLEANUP"):
401
- self.issues.append(
402
- {
403
- "file": self.rel_path,
404
- "line": node.lineno,
405
- "type": "MANDATORY_CLEANUP",
406
- "severity": self._get_severity("MANDATORY_CLEANUP"),
407
- "message": f"Class '{node.name}' implements 'initGui()' but is missing 'unload()'. Mandatory for cleanup.",
408
- "code": f"class {node.name}...",
409
- }
410
- )
411
-
412
- # Research recommendation: Missing Docstring for Classes
413
- if not node.name.startswith("_"):
414
- doc = ast.get_docstring(node)
415
- self.docstring_stats["total_public_items"] += 1
416
- if doc:
417
- self.docstring_stats["has_docstring"] += 1
418
- self._check_docstring_style(doc)
419
- elif self._should_report("MISSING_DOCSTRING"):
420
- self.issues.append(
421
- {
422
- "file": self.rel_path,
423
- "line": node.lineno,
424
- "type": "MISSING_DOCSTRING",
425
- "severity": self._get_severity("MISSING_DOCSTRING"),
426
- "message": f"Public class '{node.name}' is missing a docstring.",
427
- "code": f"class {node.name}...",
428
- }
429
- )
430
-
431
- self.generic_visit(node)
432
- self.class_methods_stack.pop()
433
-
434
- def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
435
- """Analyzes function definitions for best practices and research-based metrics.
436
-
437
- Args:
438
- node: The function definition node to analyze.
439
- """
440
- # 4. Detect IFACE_AS_ARGUMENT (QGS105)
441
- # Avoid passing QgisInterface as an argument
442
- for arg in node.args.args:
443
- if arg.annotation and isinstance(arg.annotation, ast.Name):
444
- if arg.annotation.id == "QgisInterface":
445
- if self._should_report("IFACE_AS_ARGUMENT"):
446
- self.issues.append(
447
- {
448
- "file": self.rel_path,
449
- "line": node.lineno,
450
- "type": "IFACE_AS_ARGUMENT",
451
- "severity": self._get_severity("IFACE_AS_ARGUMENT"),
452
- "message": f"Function '{node.name}' receives 'QgisInterface' as an argument. Use the global 'iface' or Singleton pattern.",
453
- "code": ast.unparse(node).split("\n")[0],
454
- }
455
- )
456
-
457
- # 5. Detect HIGH_COMPLEXITY
458
- complexity = calculate_complexity(node)
459
- if complexity > 15:
460
- if self._should_report("HIGH_COMPLEXITY"):
461
- self.issues.append(
462
- {
463
- "file": self.rel_path,
464
- "line": node.lineno,
465
- "type": "HIGH_COMPLEXITY",
466
- "severity": self._get_severity("HIGH_COMPLEXITY"),
467
- "message": f"Function '{node.name}' is too complex (CC={complexity} > 15). Consider extracting methods to improve maintainability.",
468
- "code": f"def {node.name}...",
469
- }
470
- )
471
-
472
- # Research recommendation: Missing Docstring and Type Hints
473
- if not node.name.startswith("_") and node.name != "__init__":
474
- doc = ast.get_docstring(node)
475
- self.docstring_stats["total_public_items"] += 1
476
- if doc:
477
- self.docstring_stats["has_docstring"] += 1
478
- self._check_docstring_style(doc)
479
- elif self._should_report("MISSING_DOCSTRING"):
480
- self.issues.append(
481
- {
482
- "file": self.rel_path,
483
- "line": node.lineno,
484
- "type": "MISSING_DOCSTRING",
485
- "severity": self._get_severity("MISSING_DOCSTRING"),
486
- "message": f"Public function '{node.name}' is missing a docstring.",
487
- "code": f"def {node.name}...",
488
- }
489
- )
490
-
491
- # Type Hint Stats (PEP 484)
492
- if node.name != "__init__":
493
- self.type_hint_stats["total_functions"] += 1
494
- params = [a for a in node.args.args if a.arg != "self" and a.arg != "cls"]
495
- self.type_hint_stats["total_parameters"] += len(params)
496
- annotated = [a for a in params if a.annotation]
497
- self.type_hint_stats["annotated_parameters"] += len(annotated)
498
- if node.returns:
499
- self.type_hint_stats["has_return_hint"] += 1
500
-
501
- # Rule: MISSING_TYPE_HINTS (if zero hints in a function with params)
502
- if params and not annotated and not node.returns:
503
- if self._should_report("MISSING_TYPE_HINTS"):
504
- self.issues.append(
505
- {
506
- "file": self.rel_path,
507
- "line": node.lineno,
508
- "type": "MISSING_TYPE_HINTS",
509
- "severity": self._get_severity("MISSING_TYPE_HINTS"),
510
- "message": f"Function '{node.name}' has no type annotations.",
511
- "code": f"def {node.name}...",
512
- }
513
- )
514
-
515
- self.generic_visit(node)
516
-
517
- def visit_Import(self, node: ast.Import) -> None:
518
- """Analyzes import nodes for protected members, legacy PyQt, and GDAL usage.
519
-
520
- Args:
521
- node: The import node to analyze.
522
- """
523
- for alias in node.names:
524
- # 5. Detect QGIS_PROTECTED_MEMBER (QGS101/102)
525
- if alias.name.startswith("qgis._") and not alias.name.startswith("qgis._3d"):
526
- if self._should_report("QGIS_PROTECTED_MEMBER"):
527
- self.issues.append(
528
- {
529
- "file": self.rel_path,
530
- "line": node.lineno,
531
- "type": "QGIS_PROTECTED_MEMBER",
532
- "severity": self._get_severity("QGIS_PROTECTED_MEMBER"),
533
- "message": f"Protected member import detected: '{alias.name}'. Protected members are unstable.",
534
- "code": ast.unparse(node),
535
- }
536
- )
537
- # 6. Detect GDAL_DIRECT_IMPORT (QGS106)
538
- if alias.name == "gdal":
539
- if self._should_report("GDAL_DIRECT_IMPORT"):
540
- self.issues.append(
541
- {
542
- "file": self.rel_path,
543
- "line": node.lineno,
544
- "type": "GDAL_DIRECT_IMPORT",
545
- "severity": self._get_severity("GDAL_DIRECT_IMPORT"),
546
- "message": "Direct 'gdal' import detected. Use 'from osgeo import gdal'.",
547
- "code": ast.unparse(node),
548
- }
549
- )
550
- # QGIS_LEGACY_IMPORT (already existing)
551
- if alias.name.startswith(("PyQt4", "PyQt5")):
552
- if self._should_report("QGIS_LEGACY_IMPORT"):
553
- self.issues.append(
554
- {
555
- "file": self.rel_path,
556
- "line": node.lineno,
557
- "type": "QGIS_LEGACY_IMPORT",
558
- "severity": self._get_severity("QGIS_LEGACY_IMPORT"),
559
- "message": f"Legacy import detected: '{alias.name}'. Use 'qgis.PyQt' for compatibility.",
560
- "code": ast.unparse(node),
561
- }
562
- )
563
- self.generic_visit(node)
564
-
565
- def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
566
- """Analyzes 'from import' nodes for protected members, legacy PyQt, and GDAL.
567
-
568
- Also detects heavy dependencies in UI-related files.
569
-
570
- Args:
571
- node: The import-from node to analyze.
572
- """
573
- if node.module:
574
- # Detect QGIS_PROTECTED_MEMBER
575
- if node.module.startswith("qgis._") and not node.module.startswith("qgis._3d"):
576
- if self._should_report("QGIS_PROTECTED_MEMBER"):
577
- self.issues.append(
578
- {
579
- "file": self.rel_path,
580
- "line": node.lineno,
581
- "type": "QGIS_PROTECTED_MEMBER",
582
- "severity": self._get_severity("QGIS_PROTECTED_MEMBER"),
583
- "message": f"Protected member import detected: 'from {node.module} import ...'. Protected members are unstable.",
584
- "code": ast.unparse(node),
585
- }
586
- )
587
- # Detect GDAL_DIRECT_IMPORT
588
- if node.module == "gdal":
589
- if self._should_report("GDAL_DIRECT_IMPORT"):
590
- self.issues.append(
591
- {
592
- "file": self.rel_path,
593
- "line": node.lineno,
594
- "type": "GDAL_DIRECT_IMPORT",
595
- "severity": self._get_severity("GDAL_DIRECT_IMPORT"),
596
- "message": "Direct 'gdal' import detected. Use 'from osgeo import gdal'.",
597
- "code": ast.unparse(node),
598
- }
599
- )
600
- # QGIS_LEGACY_IMPORT
601
- if node.module.startswith(("PyQt4", "PyQt5")):
602
- if self._should_report("QGIS_LEGACY_IMPORT"):
603
- self.issues.append(
604
- {
605
- "file": self.rel_path,
606
- "line": node.lineno,
607
- "type": "QGIS_LEGACY_IMPORT",
608
- "severity": self._get_severity("QGIS_LEGACY_IMPORT"),
609
- "message": f"Legacy import detected: 'from {node.module} import ...'. Use 'qgis.PyQt' for compatibility.",
610
- "code": ast.unparse(node),
611
- }
612
- )
613
- # 7. Detect HEAVY_LOGIC_UI (QGS107)
614
- heavy_libs = {"pandas", "numpy", "scipy", "sklearn", "matplotlib"}
615
- is_ui_file = "gui" in self.rel_path.lower() or "ui" in self.rel_path.lower()
616
- if is_ui_file and (
617
- node.module in heavy_libs or node.module.split(".")[0] in heavy_libs
618
- ):
619
- if self._should_report("HEAVY_LOGIC_UI"):
620
- self.issues.append(
621
- {
622
- "file": self.rel_path,
623
- "line": node.lineno,
624
- "type": "HEAVY_LOGIC_UI",
625
- "severity": self._get_severity("HEAVY_LOGIC_UI"),
626
- "message": f"Heavy dependency '{node.module}' detected in UI file. Move logic to core.",
627
- "code": ast.unparse(node),
628
- }
629
- )
630
- self.generic_visit(node)
631
-
632
-
633
- # The helper functions previously here have been moved to src/analyzer/utils/ast_utils.py
45
+ class ResearchMetrics(TypedDict):
46
+ """Structured research metrics for a module."""
47
+
48
+ docstring_styles: List[str]
49
+ type_hint_stats: Dict[str, Any]
50
+ docstring_stats: Dict[str, Any]
51
+ security_findings_count: int
52
+
53
+
54
+ class ModuleAnalysisResult(TypedDict, total=False):
55
+ """Formal structure for module analysis results."""
56
+
57
+ path: str
58
+ lines: int
59
+ functions: List[Dict[str, Any]]
60
+ classes: List[str]
61
+ imports: List[str]
62
+ complexity: int
63
+ has_main: bool
64
+ docstrings: Dict[str, bool]
65
+ file_size_kb: float
66
+ syntax_error: bool
67
+ ast_issues: List[Dict[str, Any]]
68
+ security_issues: List[Dict[str, Any]]
69
+ resource_usages: List[str]
70
+ research_metrics: ResearchMetrics
71
+ content: Optional[str]
72
+
73
+
74
+ # --- Constants ---
75
+
76
+ SEVERITY_MAP = {
77
+ "error": "high",
78
+ "warning": "medium",
79
+ "info": "low",
80
+ }
634
81
 
635
82
 
636
83
  def analyze_module_worker(
@@ -638,7 +85,7 @@ def analyze_module_worker(
638
85
  project_path: pathlib.Path,
639
86
  cached_data: Optional[Dict[str, Any]] = None,
640
87
  rules_config: Optional[Dict[str, Any]] = None,
641
- ) -> Optional[Dict[str, Any]]:
88
+ ) -> Optional[ModuleAnalysisResult]:
642
89
  """Worker function for module analysis, intended for parallel execution.
643
90
 
644
91
  Args:
@@ -652,143 +99,204 @@ def analyze_module_worker(
652
99
  could not be processed.
653
100
  """
654
101
  try:
655
- rel_path = str(py_file.relative_to(project_path))
656
-
657
- # Fast read
658
- with open(py_file, encoding="utf-8-sig", errors="replace") as f:
659
- content = f.read()
660
-
102
+ rel_path = _get_relative_path(py_file, project_path)
103
+ content = _read_file_content(py_file)
661
104
  if not content:
662
105
  return None
663
106
 
664
- # Parse AST
665
- try:
666
- tree = ast.parse(content)
667
- except SyntaxError:
668
- return {
669
- "path": rel_path,
670
- "lines": content.count("\n") + 1,
671
- "syntax_error": True,
672
- "file_size_kb": py_file.stat().st_size / 1024,
673
- "complexity": 1,
674
- "functions": [],
675
- "classes": [],
676
- "imports": [],
677
- "has_main": False,
678
- "docstrings": {"module": False},
679
- "ast_issues": [],
680
- "research_metrics": {
681
- "docstring_styles": [],
682
- "type_hint_stats": {
683
- "total_parameters": 0,
684
- "annotated_parameters": 0,
685
- "has_return_hint": 0,
686
- "total_functions": 0,
687
- },
688
- "docstring_stats": {"total_public_items": 0, "has_docstring": 0},
689
- },
690
- }
691
-
692
- # Extract information using helper functions
693
- functions = extract_functions_from_ast(tree)
694
- classes = extract_classes_from_ast(tree)
695
- imports = extract_imports_from_ast(tree)
696
- module_complexity = calculate_module_complexity(tree)
697
- has_main = check_main_guard(tree)
107
+ # Parse AST with error handling
108
+ tree_or_error = _parse_ast(content, rel_path, py_file)
109
+ if isinstance(tree_or_error, dict) and tree_or_error.get("syntax_error"):
110
+ # Ensure it fits the return type
111
+ return tree_or_error # type: ignore
698
112
 
699
- # Custom AST Audit
700
- visitor = QGISASTVisitor(rel_path, rules_config=rules_config)
701
- visitor.visit(tree)
113
+ tree = tree_or_error
702
114
 
703
- return {
115
+ # Extract information using helper functions
116
+ results: ModuleAnalysisResult = {
704
117
  "path": rel_path,
705
118
  "lines": content.count("\n") + 1,
706
- "functions": functions,
707
- "classes": classes,
708
- "imports": imports,
709
- "complexity": module_complexity,
710
- "has_main": has_main,
711
- "docstrings": {
712
- "module": ast.get_docstring(tree) is not None,
713
- },
119
+ "functions": extract_functions_from_ast(tree),
120
+ "classes": extract_classes_from_ast(tree),
121
+ "imports": extract_imports_from_ast(tree),
122
+ "complexity": calculate_module_complexity(tree),
123
+ "has_main": check_main_guard(tree),
124
+ "docstrings": {"module": ast.get_docstring(tree) is not None},
714
125
  "file_size_kb": py_file.stat().st_size / 1024,
715
126
  "syntax_error": False,
716
- "ast_issues": visitor.issues,
717
- "resource_usages": getattr(visitor, "resource_usages", []),
718
- "research_metrics": {
719
- "docstring_styles": list(set(visitor.docstring_styles)),
720
- "type_hint_stats": visitor.type_hint_stats,
721
- "docstring_stats": visitor.docstring_stats,
722
- },
723
127
  "content": content,
724
128
  }
129
+
130
+ # Run Audits
131
+ visitor = QGISASTVisitor(rel_path, rules_config=rules_config)
132
+ visitor.visit(tree)
133
+
134
+ security_issues = _collect_security_issues(tree, content, rel_path)
135
+ results.update(
136
+ {
137
+ "ast_issues": visitor.issues,
138
+ "security_issues": security_issues,
139
+ "resource_usages": getattr(visitor, "resource_usages", []),
140
+ "research_metrics": {
141
+ "docstring_styles": list(set(visitor.docstring_styles)),
142
+ "type_hint_stats": visitor.type_hint_stats,
143
+ "docstring_stats": visitor.docstring_stats,
144
+ "security_findings_count": len(security_issues),
145
+ },
146
+ }
147
+ )
148
+
149
+ return results
725
150
  except Exception:
726
151
  return None
727
152
 
728
153
 
154
+ def _get_relative_path(py_file: pathlib.Path, project_path: pathlib.Path) -> str:
155
+ """Safely calculates the relative path of a file."""
156
+ if project_path.is_file():
157
+ return py_file.name
158
+ return str(py_file.relative_to(project_path))
159
+
160
+
161
+ def _read_file_content(py_file: pathlib.Path) -> Optional[str]:
162
+ """Reads file content handling common encoding issues."""
163
+ try:
164
+ with open(py_file, encoding="utf-8-sig", errors="replace") as f:
165
+ return f.read()
166
+ except Exception:
167
+ return None
168
+
169
+
170
+ def _parse_ast(content: str, rel_path: str, py_file: pathlib.Path) -> Any:
171
+ """Parses AST or returns a structured error dictionary."""
172
+ try:
173
+ return ast.parse(content)
174
+ except SyntaxError:
175
+ return _create_empty_analysis_result(rel_path, py_file, content, syntax_error=True)
176
+
177
+
178
+ def _create_empty_analysis_result(
179
+ rel_path: str, py_file: pathlib.Path, content: str, syntax_error: bool = False
180
+ ) -> ModuleAnalysisResult:
181
+ """Creates a basic results structure for errors or empty files."""
182
+ return {
183
+ "path": rel_path,
184
+ "lines": content.count("\n") + 1,
185
+ "syntax_error": syntax_error,
186
+ "file_size_kb": py_file.stat().st_size / 1024,
187
+ "complexity": 1,
188
+ "functions": [],
189
+ "classes": [],
190
+ "imports": [],
191
+ "has_main": False,
192
+ "docstrings": {"module": False},
193
+ "ast_issues": [],
194
+ "research_metrics": {
195
+ "docstring_styles": [],
196
+ "type_hint_stats": {
197
+ "total_parameters": 0,
198
+ "annotated_parameters": 0,
199
+ "has_return_hint": 0,
200
+ "total_functions": 0,
201
+ },
202
+ "docstring_stats": {"total_public_items": 0, "has_docstring": 0},
203
+ "security_findings_count": 0,
204
+ },
205
+ }
206
+
207
+
208
+ def _collect_security_issues(tree: ast.AST, content: str, rel_path: str) -> List[Dict[str, Any]]:
209
+ """Consolidates issues from AST security visitor and secret scanner."""
210
+ security_visitor = QGISSecurityVisitor(rel_path)
211
+ security_visitor.visit(tree)
212
+ issues = security_visitor.findings
213
+
214
+ secret_scanner = SecretScanner()
215
+ for sf in secret_scanner.scan_text(content):
216
+ issues.append(
217
+ {
218
+ "file": rel_path,
219
+ "line": sf.line,
220
+ "type": sf.type,
221
+ "severity": "high" if sf.confidence == "HIGH" else "medium",
222
+ "message": sf.message,
223
+ "confidence": sf.confidence.lower(),
224
+ }
225
+ )
226
+ return issues
227
+
228
+
729
229
  def audit_qgis_standards(
730
- modules_data: List[Dict[str, Any]],
230
+ modules_data: List[ModuleAnalysisResult],
731
231
  project_path: pathlib.Path,
732
232
  rules_config: Optional[Dict[str, Any]] = None,
733
233
  ) -> Dict[str, Any]:
734
- """Executes a comprehensive QGIS standards audit using regex and AST results.
735
-
736
- Args:
737
- modules_data: List of already analyzed module data.
738
- project_path: Root path of the project.
739
- rules_config: Optional rule configuration overrides.
740
-
741
- Returns:
742
- A dictionary consolidating all detected issues and the total issue count.
743
- """
234
+ """Executes a comprehensive QGIS standards audit using regex and AST results."""
744
235
  rules = get_qgis_audit_rules()
745
236
  results: Dict[str, Any] = {"issues": [], "issues_count": 0}
746
237
 
747
238
  for module in modules_data:
748
- # Add issues found via AST
749
- if "ast_issues" in module:
750
- results["issues"].extend(module["ast_issues"])
751
-
752
- # Use cached content if available
753
- path = module.get("path")
754
- content = module.get("content")
755
-
756
- if content is None and path:
757
- full_path = project_path / path
758
- if full_path.exists():
759
- try:
760
- content = full_path.read_text(encoding="utf-8", errors="replace")
761
- except Exception:
762
- continue
763
-
764
- if content is None:
765
- continue
239
+ # Add AST issues
240
+ results["issues"].extend(module.get("ast_issues", []))
766
241
 
767
- for rule in rules:
768
- rule_id = rule["id"]
769
- severity_val = rules_config.get(rule_id, "warning") if rules_config else "warning"
770
- if severity_val == "ignore":
771
- continue
772
-
773
- # Map config severity to internal severity
774
- severity_map = {"error": "high", "warning": "medium", "info": "low"}
775
- internal_severity = severity_map.get(severity_val, rule["severity"])
776
-
777
- for match in rule["pattern"].finditer(content):
778
- line_no = content.count("\n", 0, match.start()) + 1
779
- results["issues"].append(
780
- {
781
- "file": path,
782
- "line": line_no,
783
- "type": rule["id"],
784
- "severity": internal_severity,
785
- "message": rule["message"],
786
- "code": content[match.start() : match.end() + 20].strip(),
787
- }
788
- )
242
+ # Run Regex rules
243
+ path = module.get("path", "")
244
+ content = module.get("content") or _try_read_module_file(path, project_path)
245
+
246
+ if content:
247
+ _run_regex_audit_on_module(content, path, rules, rules_config, results["issues"])
789
248
 
790
249
  results["issues_count"] = len(results["issues"])
791
250
  return results
792
251
 
793
252
 
253
+ def _run_regex_audit_on_module(
254
+ content: str,
255
+ path: str,
256
+ rules: List[Dict[str, Any]],
257
+ rules_config: Optional[Dict[str, Any]],
258
+ issues_out: List[Dict[str, Any]],
259
+ ) -> None:
260
+ """Runs all regex rules on a module's content."""
261
+ for rule in rules:
262
+ internal_severity = _get_rule_severity(rule, rules_config)
263
+ if internal_severity == "ignore":
264
+ continue
265
+
266
+ for match in rule["pattern"].finditer(content):
267
+ line_no = content.count("\n", 0, match.start()) + 1
268
+ issues_out.append(
269
+ {
270
+ "file": path,
271
+ "line": line_no,
272
+ "type": rule["id"],
273
+ "severity": internal_severity,
274
+ "message": rule["message"],
275
+ "code": content[match.start() : match.end() + 20].strip(),
276
+ }
277
+ )
278
+
279
+
280
+ def _try_read_module_file(path: Optional[str], project_path: pathlib.Path) -> Optional[str]:
281
+ """Attempts to read a module file from path if content is missing."""
282
+ if not path:
283
+ return None
284
+ full_path = project_path / path
285
+ if full_path.exists():
286
+ return _read_file_content(full_path)
287
+ return None
288
+
289
+
290
+ def _get_rule_severity(rule: Dict[str, Any], config: Optional[Dict[str, Any]]) -> str:
291
+ """Calculates rule severity based on configuration."""
292
+ rule_id = rule["id"]
293
+ severity_val = config.get(rule_id, "warning") if config else "warning"
294
+
295
+ if severity_val == "ignore":
296
+ return "ignore"
297
+
298
+ severity = SEVERITY_MAP.get(severity_val, rule["severity"])
299
+ return str(severity)
300
+
301
+
794
302
  # End of scanner.py