qgis-plugin-analyzer 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
analyzer/engine.py ADDED
@@ -0,0 +1,586 @@
1
+ # /***************************************************************************
2
+ # QGIS Plugin Analyzer
3
+ # A QGIS tool
4
+ # Static code analysis and standards audit for QGIS plugins.
5
+ # -------------------
6
+ # begin : 2025-12-28
7
+ # git sha : $Format:%H$
8
+ # copyright : (C) 2025 by Juan M Bernales
9
+ # email : juanbernales@gmail.com
10
+ # ***************************************************************************/
11
+ #
12
+ # /***************************************************************************
13
+ # * *
14
+ # * This program is free software; you can redistribute it and/or modify *
15
+ # * it under the terms of the GNU General Public License as published by *
16
+ # * the Free Software Foundation; either version 2 of the License, or *
17
+ # * (at your option) any later version. *
18
+ # * *
19
+ # ***************************************************************************/
20
+
21
+ import json
22
+ import math
23
+ import os
24
+ import pathlib
25
+ import subprocess
26
+ from concurrent.futures import ProcessPoolExecutor, as_completed
27
+ from typing import Any, Dict, List, Optional
28
+
29
+ from .reporters import (
30
+ generate_html_report,
31
+ generate_markdown_summary,
32
+ save_json_context,
33
+ )
34
+ from .scanner import (
35
+ analyze_module_worker,
36
+ audit_qgis_standards,
37
+ )
38
+ from .semantic import DependencyGraph, ResourceValidator
39
+ from .utils import (
40
+ IgnoreMatcher,
41
+ ProgressTracker,
42
+ load_ignore_patterns,
43
+ load_profile_config,
44
+ logger,
45
+ safe_path_resolve,
46
+ setup_logger,
47
+ )
48
+ from .validators import (
49
+ calculate_package_size,
50
+ scan_for_binaries,
51
+ validate_metadata,
52
+ validate_metadata_urls,
53
+ validate_plugin_structure,
54
+ )
55
+
56
+
57
+ class ProjectAnalyzer:
58
+ def __init__(
59
+ self,
60
+ project_path: str,
61
+ output_dir: Optional[str] = None,
62
+ profile: str = "default",
63
+ ) -> None:
64
+ """Initializes the Project Analyzer.
65
+
66
+ Args:
67
+ project_path: Root path of the project to analyze.
68
+ output_dir: Directory to save analysis reports. Defaults to "./analysis_results".
69
+ profile: Configuration profile name from pyproject.toml. Defaults to "default".
70
+ """
71
+ self.project_path = pathlib.Path(project_path).resolve()
72
+ self.output_dir = pathlib.Path(output_dir or "./analysis_results").resolve()
73
+ self.output_dir.mkdir(parents=True, exist_ok=True)
74
+
75
+ # Initialize logging
76
+ setup_logger(self.output_dir)
77
+
78
+ # Limit workers to 4 or cpu count, whichever is smaller, to prevent OOM
79
+ self.max_workers = min(os.cpu_count() or 4, 4)
80
+ self.max_file_size_kb = 500
81
+
82
+ # Load profile config
83
+ self.config = load_profile_config(self.project_path, profile)
84
+
85
+ # Detect project type
86
+ self.project_type = self.config.get("project_type", "auto")
87
+ if self.project_type == "auto":
88
+ metadata_file = self.project_path / "metadata.txt"
89
+ self.project_type = "qgis" if metadata_file.exists() else "generic"
90
+
91
+ logger.info(f"📁 Project type: {self.project_type.upper()}")
92
+
93
+ # Load .analyzerignore
94
+ ignore_file = self.project_path / ".analyzerignore"
95
+ patterns = load_ignore_patterns(ignore_file)
96
+ self.matcher = IgnoreMatcher(self.project_path, patterns)
97
+
98
+ def get_python_files(self) -> List[pathlib.Path]:
99
+ """Scans Python files ignoring common folders and .analyzerignore patterns.
100
+
101
+ Returns:
102
+ A sorted list of pathlib.Path objects for all detected Python files.
103
+ """
104
+ python_files = []
105
+ for root, dirs, files in os.walk(self.project_path):
106
+ root_path = pathlib.Path(root)
107
+
108
+ # Filter directories
109
+ dirs[:] = [d for d in dirs if not self.matcher.is_ignored(root_path / d)]
110
+
111
+ for file in files:
112
+ file_path = root_path / file
113
+ if file.endswith(".py") and not self.matcher.is_ignored(file_path):
114
+ # Skip very large files to avoid OOM
115
+ if file_path.stat().st_size > self.max_file_size_kb * 1024:
116
+ logger.warning(
117
+ f"⚠️ Skipping large file: {file_path.name} (> {self.max_file_size_kb}KB)"
118
+ )
119
+ continue
120
+ python_files.append(file_path)
121
+ return sorted(python_files)
122
+
123
+ def run_ruff_audit(self) -> List[Dict[str, Any]]:
124
+ """Executes Ruff linting via subprocess.
125
+
126
+ Returns:
127
+ A list of dictionaries representing Ruff findings. Returns an empty
128
+ list if Ruff is not available or errors occur.
129
+ """
130
+ try:
131
+ cmd = [
132
+ "ruff",
133
+ "check",
134
+ str(self.project_path),
135
+ "--format",
136
+ "json",
137
+ "--quiet",
138
+ ]
139
+ result = subprocess.run(cmd, capture_output=True, text=True, check=False)
140
+ if result.stdout:
141
+ data: List[Dict[str, Any]] = json.loads(result.stdout)
142
+ return data
143
+ return []
144
+ except Exception as e:
145
+ logger.error(f"Error running Ruff: {e}")
146
+ return []
147
+
148
+ def _run_parallel_analysis(
149
+ self, files: List[pathlib.Path], rules_config: dict
150
+ ) -> List[Dict[str, Any]]:
151
+ """Runs parallel analysis on all Python files.
152
+
153
+ Args:
154
+ files: List of paths to analyze.
155
+ rules_config: Rule-specific configuration overrides.
156
+
157
+ Returns:
158
+ A list of module analysis results.
159
+ """
160
+ tracker = ProgressTracker(len(files))
161
+ modules_data = []
162
+
163
+ with ProcessPoolExecutor(max_workers=self.max_workers) as executor:
164
+ futures = {
165
+ executor.submit(analyze_module_worker, f, self.project_path, None, rules_config): f
166
+ for f in files
167
+ }
168
+ for future in as_completed(futures):
169
+ res = future.result()
170
+ if res:
171
+ modules_data.append(res)
172
+ tracker.update(futures[future], 0)
173
+
174
+ tracker.complete()
175
+ return modules_data
176
+
177
+ def _run_qgis_specific_checks(
178
+ self, modules_data: List[Dict[str, Any]], rules_config: dict
179
+ ) -> tuple:
180
+ """Runs QGIS-specific validation checks.
181
+
182
+ Args:
183
+ modules_data: List of already analyzed module data.
184
+ rules_config: Rule-specific configuration overrides.
185
+
186
+ Returns:
187
+ A tuple of (compliance, structure, metadata, binaries, package_size, url_status).
188
+ """
189
+ compliance = audit_qgis_standards(
190
+ modules_data, self.project_path, rules_config=rules_config
191
+ )
192
+
193
+ # Official repository audit
194
+ metadata_path = safe_path_resolve(self.project_path, "metadata.txt")
195
+ structure = validate_plugin_structure(self.project_path)
196
+ metadata = validate_metadata(metadata_path)
197
+
198
+ # Repository Compliance Checks
199
+ logger.info("Running QGIS repository compliance checks...")
200
+ binaries = scan_for_binaries(self.project_path, self.matcher)
201
+ package_size = calculate_package_size(self.project_path, self.matcher)
202
+ url_status = {}
203
+ if metadata.get("is_valid") and "metadata" in metadata:
204
+ url_status = validate_metadata_urls(metadata["metadata"])
205
+
206
+ return compliance, structure, metadata, binaries, package_size, url_status
207
+
208
+ def _run_semantic_analysis(self, modules_data: List[Dict[str, Any]]) -> tuple:
209
+ """Runs semantic analysis including dependencies and resources.
210
+
211
+ Args:
212
+ modules_data: List of analyzed module entries.
213
+
214
+ Returns:
215
+ A tuple of (cycles, metrics, missing_resources).
216
+ """
217
+ dep_graph = DependencyGraph()
218
+ all_resource_usages = []
219
+ res_validator = None
220
+
221
+ if self.project_type == "qgis":
222
+ res_validator = ResourceValidator(self.project_path)
223
+ res_validator.scan_project_resources(self.matcher)
224
+
225
+ for m in modules_data:
226
+ dep_graph.add_node(m["path"], m)
227
+ if self.project_type == "qgis" and "resource_usages" in m:
228
+ all_resource_usages.extend(m["resource_usages"])
229
+
230
+ dep_graph.build_edges(self.project_path)
231
+ cycles = dep_graph.detect_cycles()
232
+ metrics = dep_graph.get_coupling_metrics()
233
+
234
+ missing_resources = []
235
+ if self.project_type == "qgis" and res_validator:
236
+ missing_resources = res_validator.validate_usage(all_resource_usages)
237
+
238
+ return cycles, metrics, missing_resources
239
+
240
+ def _build_analysis_results(
241
+ self,
242
+ files: List[pathlib.Path],
243
+ modules_data: List[Dict[str, Any]],
244
+ ruff_findings: List[Dict[str, Any]],
245
+ code_score: float,
246
+ maint_score: float,
247
+ qgis_score: float,
248
+ compliance: Dict[str, Any],
249
+ structure: Dict[str, Any],
250
+ metadata: Dict[str, Any],
251
+ cycles: List[List[str]],
252
+ metrics: Dict[str, Any],
253
+ missing_resources: List[str],
254
+ binaries: List[str],
255
+ package_size: float,
256
+ url_status: Dict[str, str],
257
+ ) -> Dict[str, Any]:
258
+ """Consolidates analysis results into a single dictionary.
259
+
260
+ Args:
261
+ files: List of analyzed files.
262
+ modules_data: Detailed analysis for each module.
263
+ ruff_findings: Results from Ruff linting.
264
+ code_score: Calculated module stability score.
265
+ maint_score: Calculated maintainability score.
266
+ qgis_score: Calculated QGIS compliance score.
267
+ compliance: Detailed QGIS compliance findings.
268
+ structure: Plugin structure validation results.
269
+ metadata: Metadata validation results.
270
+ cycles: Detected circular dependency cycles.
271
+ metrics: Coupling and complexity metrics.
272
+ missing_resources: List of missing QRC resources.
273
+ binaries: List of prohibited binary files.
274
+ package_size: Size of the plugin package in MB.
275
+ url_status: Status of URLs in metadata.txt.
276
+
277
+ Returns:
278
+ The final analysis results dictionary.
279
+ """
280
+ metrics_summary = {
281
+ "total_files": len(files),
282
+ "total_lines": sum(m["lines"] for m in modules_data),
283
+ "quality_score": round(code_score, 1),
284
+ "maintainability_score": round(maint_score, 1),
285
+ }
286
+
287
+ if self.project_type == "qgis":
288
+ metrics_summary["overall_score"] = round((code_score * 0.5) + (qgis_score * 0.5), 1)
289
+
290
+ analyses = {
291
+ "project_name": self.project_path.name,
292
+ "project_type": self.project_type,
293
+ "metrics": metrics_summary,
294
+ "ruff_findings": ruff_findings,
295
+ "semantic": {"circular_dependencies": cycles, "coupling_metrics": metrics},
296
+ "modules": modules_data,
297
+ }
298
+
299
+ # Aggregate research metrics for summary
300
+ total_functions = 0
301
+ total_params = 0
302
+ annotated_params = 0
303
+ has_return_hint = 0
304
+ has_docstring_count = 0
305
+ total_public_items = 0
306
+ detected_styles = set()
307
+
308
+ for m in modules_data:
309
+ r_metrics = m.get("research_metrics", {})
310
+ d_stats = r_metrics.get("docstring_stats", {})
311
+ total_public_items += d_stats.get("total_public_items", 0)
312
+ has_docstring_count += d_stats.get("has_docstring", 0)
313
+
314
+ t_stats = r_metrics.get("type_hint_stats", {})
315
+ total_functions += t_stats.get("total_functions", 0)
316
+ total_params += t_stats.get("total_parameters", 0)
317
+ annotated_params += t_stats.get("annotated_parameters", 0)
318
+ has_return_hint += t_stats.get("has_return_hint", 0)
319
+
320
+ detected_styles.update(r_metrics.get("docstring_styles", []))
321
+
322
+ analyses["research_summary"] = {
323
+ "type_hint_coverage": round((annotated_params / max(1, total_params)) * 100, 1)
324
+ if total_params > 0
325
+ else 0.0,
326
+ "return_hint_coverage": (
327
+ round((has_return_hint / total_functions) * 100, 1) if total_functions > 0 else 0.0
328
+ ),
329
+ "docstring_coverage": round((has_docstring_count / max(1, total_public_items)) * 100, 1)
330
+ if total_public_items > 0
331
+ else 0.0,
332
+ "detected_docstring_styles": list(detected_styles),
333
+ }
334
+
335
+ if self.project_type == "qgis":
336
+ analyses["qgis_compliance"] = {
337
+ "compliance_score": round(qgis_score, 1),
338
+ "best_practices": compliance,
339
+ "repository_standards": {"structure": structure, "metadata": metadata},
340
+ }
341
+ analyses["semantic"]["missing_resources"] = missing_resources
342
+ analyses["repository_compliance"] = {
343
+ "binaries": binaries,
344
+ "package_size_mb": round(package_size, 2),
345
+ "url_validation": url_status,
346
+ "is_compliant": len(binaries) == 0 and package_size <= 20,
347
+ }
348
+
349
+ return analyses
350
+
351
+ def _save_reports(self, analyses: Dict[str, Any]) -> None:
352
+ """Saves all generated analysis reports to the output directory.
353
+
354
+ Args:
355
+ analyses: The consolidated analysis results dictionary.
356
+ """
357
+ generate_markdown_summary(analyses, self.output_dir / "PROJECT_SUMMARY.md")
358
+ if self.config.get("generate_html", True):
359
+ generate_html_report(analyses, self.output_dir / "PROJECT_SUMMARY.html")
360
+ save_json_context(analyses, self.output_dir / "project_context.json")
361
+
362
+ def run(self) -> bool:
363
+ """Executes the complete analysis pipeline.
364
+
365
+ Returns:
366
+ True if analysis completed successfully (even if issues were found),
367
+ False if it failed due to critical system errors or strict mode violations.
368
+ """
369
+ logger.info(f"🔍 Analyzing: {self.project_path}")
370
+ files = self.get_python_files()
371
+ rules_config = self.config.get("rules", {})
372
+
373
+ # Parallel analysis
374
+ modules_data = self._run_parallel_analysis(files, rules_config)
375
+
376
+ # Ruff audit
377
+ ruff_findings = self.run_ruff_audit()
378
+
379
+ # Initialize defaults
380
+ compliance: Dict[str, Any] = {"issues": [], "issues_count": 0}
381
+ structure: Dict[str, Any] = {"is_valid": True}
382
+ metadata: Dict[str, Any] = {"is_valid": True}
383
+ binaries: List[str] = []
384
+ package_size = 0
385
+ url_status = {}
386
+
387
+ # QGIS-specific checks
388
+ if self.project_type == "qgis":
389
+ compliance, structure, metadata, binaries, package_size, url_status = (
390
+ self._run_qgis_specific_checks(modules_data, rules_config)
391
+ )
392
+
393
+ # Semantic Analysis
394
+ semantic_res = self._run_semantic_analysis(modules_data)
395
+ cycles = semantic_res[0] if len(semantic_res) > 0 else []
396
+ metrics = semantic_res[1] if len(semantic_res) > 1 else {}
397
+ missing_resources = semantic_res[2] if len(semantic_res) > 2 else []
398
+
399
+ # Calculate scores
400
+ scores = self._calculate_scores(
401
+ modules_data,
402
+ ruff_findings,
403
+ compliance,
404
+ structure,
405
+ metadata,
406
+ cycles,
407
+ missing_resources,
408
+ binaries,
409
+ package_size,
410
+ )
411
+ # Handle potential return length mismatches gracefully (Robustness v1.0.0+)
412
+ code_score = scores[0] if len(scores) > 0 else 0.0
413
+ maint_score = scores[1] if len(scores) > 1 else 0.0
414
+ qgis_score = scores[2] if len(scores) > 2 else 0.0
415
+
416
+ # Build results
417
+ analyses = self._build_analysis_results(
418
+ files,
419
+ modules_data,
420
+ ruff_findings,
421
+ code_score,
422
+ maint_score,
423
+ qgis_score,
424
+ compliance,
425
+ structure,
426
+ metadata,
427
+ cycles,
428
+ metrics,
429
+ missing_resources,
430
+ binaries,
431
+ package_size,
432
+ url_status,
433
+ )
434
+
435
+ # Save reports
436
+ self._save_reports(analyses)
437
+
438
+ logger.info(f"✅ Analysis completed. Reports in: {self.output_dir}")
439
+
440
+ # Fail on error if strict mode is on
441
+ if self.config.get("fail_on_error") and self.project_type == "qgis":
442
+ if (
443
+ int(compliance.get("issues_count", 0)) > 0
444
+ or not structure["is_valid"]
445
+ or not metadata["is_valid"]
446
+ ):
447
+ logger.error(
448
+ "❌ Strict Mode: Critical QGIS compliance issues detected. Failing analysis."
449
+ )
450
+ return False
451
+
452
+ return True
453
+
454
+ def _calculate_scores(
455
+ self,
456
+ modules_data: List[Dict[str, Any]],
457
+ ruff_findings: List[Dict[str, Any]],
458
+ compliance: Dict[str, Any],
459
+ structure: Dict[str, Any],
460
+ metadata: Dict[str, Any],
461
+ cycles: List[List[str]],
462
+ missing_resources: List[str],
463
+ binaries: List[str],
464
+ package_size: float,
465
+ ) -> tuple:
466
+ """Calculates project quality scores based on industry-standard formulas.
467
+
468
+ Args:
469
+ modules_data: Detailed analysis results for each module.
470
+ ruff_findings: List of Ruff linting findings.
471
+ compliance: Findings from QGIS standard audit.
472
+ structure: Results of plugin structure validation.
473
+ metadata: Results of metadata.txt validation.
474
+ cycles: List of circular dependency cycles.
475
+ missing_resources: List of missing QRC resource paths.
476
+ binaries: List of prohibited binary files.
477
+ package_size: Size of the plugin package in MB.
478
+
479
+ Returns:
480
+ A tuple of (module_stability, maintainability, qgis_compliance) scores out of 100.
481
+ """
482
+ if not modules_data:
483
+ return 0.0, 0.0, 0.0
484
+
485
+ # 1. Module stability based on Maintainability Index (MI)
486
+ # Formula: MI = max(0, (171 - 0.23 * CC - 16.2 * ln(SLOC)) * 100 / 171)
487
+ mi_scores = []
488
+ for m in modules_data:
489
+ cc = m.get("complexity", 1)
490
+ sloc = max(1, m.get("lines", 1))
491
+ mi = (171 - 0.23 * cc - 16.2 * math.log(sloc)) * 100 / 171
492
+ mi_scores.append(max(0, mi))
493
+
494
+ module_score = sum(mi_scores) / len(mi_scores) if mi_scores else 0.0
495
+
496
+ # 2. Maintainability based on Function Complexity
497
+ all_func_comp = []
498
+ for m in modules_data:
499
+ for f in m.get("functions", []):
500
+ all_func_comp.append(f["complexity"])
501
+
502
+ avg_func_comp = sum(all_func_comp) / len(all_func_comp) if all_func_comp else 1.0
503
+ # Function complexity score: 100 is perfect, -5 per point over 10
504
+ func_score = max(0, 100 - (max(0, avg_func_comp - 10) * 5))
505
+
506
+ # 3. Lint Scoring (Pylint style)
507
+ # 10 - ((5*E + W + R + C) / statements) * 10
508
+ total_lines = sum(m.get("lines", 0) for m in modules_data)
509
+ errors = 0
510
+ others = 0
511
+ for find in ruff_findings:
512
+ code = find.get("code", "")
513
+ if code.startswith(("E", "F")):
514
+ errors += 1
515
+ else:
516
+ others += 1
517
+
518
+ lint_penalty = ((5 * errors + others) / max(1, total_lines / 10)) * 10
519
+ lint_score = max(0, 100 - lint_penalty)
520
+
521
+ # Composite Maintainability Score
522
+ maintainability_score = (func_score * 0.7) + (lint_score * 0.3)
523
+
524
+ # 4. Research-based Bonuses & Modernization
525
+ total_public_items = 0
526
+ has_docstring_count = 0
527
+ total_functions = 0
528
+ total_params = 0
529
+ annotated_params = 0
530
+ has_return_hint = 0
531
+ detected_styles = set()
532
+
533
+ for m in modules_data:
534
+ metrics = m.get("research_metrics", {})
535
+ d_stats = metrics.get("docstring_stats", {})
536
+ total_public_items += d_stats.get("total_public_items", 0)
537
+ has_docstring_count += d_stats.get("has_docstring", 0)
538
+
539
+ t_stats = metrics.get("type_hint_stats", {})
540
+ total_functions += t_stats.get("total_functions", 0)
541
+ total_params += t_stats.get("total_parameters", 0)
542
+ annotated_params += t_stats.get("annotated_parameters", 0)
543
+ has_return_hint += t_stats.get("has_return_hint", 0)
544
+
545
+ detected_styles.update(metrics.get("docstring_styles", []))
546
+
547
+ # Bonuses
548
+ modernization_bonus = 0.0
549
+ # Type Hint Bonus: > 80% coverage on params and returns
550
+ if total_params > 0 or total_functions > 0:
551
+ param_cov = annotated_params / max(1, total_params)
552
+ ret_cov = has_return_hint / max(1, total_functions)
553
+ if param_cov >= 0.8 and ret_cov >= 0.8:
554
+ modernization_bonus += 5.0
555
+
556
+ # Docstring Style Bonus: Standardized formats (Google/NumPy)
557
+ if detected_styles:
558
+ modernization_bonus += 2.0
559
+
560
+ maintainability_score = min(100.0, maintainability_score + modernization_bonus)
561
+
562
+ # Global penalties
563
+ penalty = len(cycles) * 10
564
+ module_score = max(0, module_score - penalty)
565
+ maintainability_score = max(0, maintainability_score - penalty)
566
+
567
+ if self.project_type == "generic":
568
+ return round(module_score, 1), round(maintainability_score, 1), 0.0
569
+
570
+ # ... (qgis_score logic remains same) ...
571
+ qgis_score = 100.0
572
+ qgis_score -= compliance.get("issues_count", 0) * 2
573
+ if not structure.get("is_valid", True):
574
+ qgis_score -= 20
575
+ if not metadata.get("is_valid", True):
576
+ qgis_score -= 10
577
+ qgis_score -= len(missing_resources) * 5
578
+ qgis_score -= len(binaries) * 50
579
+ if package_size > 20:
580
+ qgis_score -= 10
581
+
582
+ return (
583
+ round(module_score, 1),
584
+ round(maintainability_score, 1),
585
+ round(max(0, qgis_score), 1),
586
+ )