PyPI - diffsense - Versions diffs - 2.2.12__py3-none-any.whl - Mend

diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

adapters/__init__.py +0 -0
adapters/base.py +27 -0
adapters/github_adapter.py +164 -0
adapters/gitlab_adapter.py +207 -0
adapters/local_adapter.py +136 -0
banner.py +71 -0
cli.py +606 -0
config/__init__.py +1 -0
config/rules.yaml +371 -0
core/__init__.py +235 -0
core/ast_detector.py +853 -0
core/change.py +46 -0
core/composer.py +93 -0
core/evaluator.py +15 -0
core/ignore_manager.py +71 -0
core/knowledge.py +77 -0
core/parser.py +181 -0
core/parser_manager.py +104 -0
core/quality_manager.py +117 -0
core/renderer.py +197 -0
core/rule_base.py +98 -0
core/rule_runtime.py +103 -0
core/rules.py +718 -0
core/run_config.py +85 -0
core/semantic_diff.py +359 -0
core/signal_model.py +21 -0
core/signals_registry.py +62 -0
diffsense-2.2.12.dist-info/METADATA +18 -0
diffsense-2.2.12.dist-info/RECORD +58 -0
diffsense-2.2.12.dist-info/WHEEL +5 -0
diffsense-2.2.12.dist-info/entry_points.txt +3 -0
diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
diffsense-2.2.12.dist-info/top_level.txt +11 -0
diffsense_mcp/__init__.py +1 -0
diffsense_mcp/launcher.py +28 -0
diffsense_mcp/server.py +687 -0
governance/lifecycle.py +54 -0
main.py +318 -0
rules/__init__.py +246 -0
rules/api_compatibility.py +372 -0
rules/collection_handling.py +349 -0
rules/concurrency.py +194 -0
rules/concurrency_adapter.py +250 -0
rules/cross_language_adapter.py +444 -0
rules/exception_handling.py +320 -0
rules/go_rules.py +401 -0
rules/null_safety.py +301 -0
rules/resource_management.py +222 -0
rules/yaml_adapter.py +195 -0
run_audit.py +478 -0
sdk/cpp_adapter.py +238 -0
sdk/go_adapter.py +199 -0
sdk/java_adapter.py +199 -0
sdk/javascript_adapter.py +229 -0
sdk/language_adapter.py +313 -0
sdk/python_adapter.py +195 -0
sdk/rule.py +63 -0
sdk/signal.py +14 -0

core/quality_manager.py ADDED Viewed

@@ -0,0 +1,117 @@
+import os
+import json
+import time
+from typing import Dict, Any, Tuple, List
+class RuleQualityManager:
+    def __init__(self, path: str, auto_tune: bool, degrade_threshold: float, disable_threshold: float, min_samples: int):
+        self.path = path
+        self.auto_tune = auto_tune
+        self.degrade_threshold = degrade_threshold
+        self.disable_threshold = disable_threshold
+        self.min_samples = min_samples
+        self.data = self._load()
+    def _load(self) -> Dict[str, Any]:
+        if not os.path.exists(self.path):
+            return {"rules": {}}
+        try:
+            with open(self.path, "r", encoding="utf-8") as f:
+                data = json.load(f)
+            if isinstance(data, dict) and isinstance(data.get("rules"), dict):
+                return data
+        except Exception:
+            return {"rules": {}}
+        return {"rules": {}}
+    def persist(self) -> None:
+        self.data["updated_at"] = int(time.time())
+        try:
+            with open(self.path, "w", encoding="utf-8") as f:
+                json.dump(self.data, f, ensure_ascii=False, indent=2)
+        except Exception:
+            pass
+    def get_metrics(self) -> Dict[str, Any]:
+        return self.data.get("rules", {})
+    def _entry(self, rule_id: str) -> Dict[str, Any]:
+        rules = self.data.setdefault("rules", {})
+        entry = rules.get(rule_id)
+        if not isinstance(entry, dict):
+            entry = {"hits": 0, "confirmed": 0, "false_positive": 0, "precision": 1.0}
+            rules[rule_id] = entry
+        if "precision" not in entry:
+            entry["precision"] = self._precision(entry)
+        return entry
+    def _precision(self, entry: Dict[str, Any]) -> float:
+        hits = int(entry.get("hits", 0))
+        confirmed = int(entry.get("confirmed", 0))
+        return (confirmed / hits) if hits else 1.0
+    def record_hit(self, rule_id: str) -> Dict[str, Any]:
+        entry = self._entry(rule_id)
+        entry["hits"] = int(entry.get("hits", 0)) + 1
+        entry["confirmed"] = int(entry.get("confirmed", 0)) + 1
+        entry["precision"] = self._precision(entry)
+        return entry
+    def record_false_positive(self, rule_id: str) -> Dict[str, Any]:
+        entry = self._entry(rule_id)
+        entry["hits"] = int(entry.get("hits", 0)) + 1
+        entry["false_positive"] = int(entry.get("false_positive", 0)) + 1
+        entry["precision"] = self._precision(entry)
+        return entry
+    def status(self, rule_id: str) -> Tuple[str, float, int]:
+        entry = self._entry(rule_id)
+        hits = int(entry.get("hits", 0))
+        precision = float(entry.get("precision", 1.0))
+        if hits < self.min_samples:
+            return "insufficient", precision, hits
+        if precision < self.disable_threshold:
+            return "disabled", precision, hits
+        if precision < self.degrade_threshold:
+            return "degraded", precision, hits
+        return "normal", precision, hits
+    def should_skip(self, rule_id: str) -> bool:
+        """
+        [Architecture Principle Violation] NEVER automatically skip a rule based on quality.
+        Decision must be human-led.
+        """
+        return False
+    def adjust_severity(self, severity: str, rule_id: str) -> str:
+        """
+        [Architecture Principle Violation] NEVER automatically downgrade severity.
+        Severity defines risk semantics, not frequency.
+        """
+        return severity
+    def warnings(self) -> List[Dict[str, Any]]:
+        rows = []
+        for rule_id, entry in self.get_metrics().items():
+            if not isinstance(entry, dict):
+                continue
+            status, precision, hits = self.status(rule_id)
+            if status in ["degraded", "disabled"]:
+                rows.append({
+                    "rule_id": rule_id,
+                    "precision": precision,
+                    "hits": hits,
+                    "false_positive": entry.get("false_positive", 0),
+                    "confirmed": entry.get("confirmed", 0),
+                    "status": status
+                })
+        return sorted(rows, key=lambda r: (r["status"], r["precision"]))
+    def update_report(self, metrics: Dict[str, Dict[str, Any]], confidences: Dict[str, float]) -> None:
+        for rule_id, m in metrics.items():
+            entry = self._entry(rule_id)
+            calls = int(m.get("calls", 0))
+            time_ns = int(m.get("time_ns", 0))
+            avg_time_ms = (time_ns / 1_000_000 / calls) if calls else 0.0
+            entry["avg_time_ms"] = avg_time_ms
+            entry["confidence"] = float(confidences.get(rule_id, entry.get("confidence", 1.0)))

core/renderer.py ADDED Viewed

@@ -0,0 +1,197 @@
+from typing import Dict, Any
+import html as _html
+class MarkdownRenderer:
+    def render(self, result: Dict[str, Any]) -> str:
+        """
+        Renders the audit result into a Markdown string.
+        """
+        review_level = result.get("review_level", "unknown").capitalize()
+        details = result.get("details", [])
+        lines = []
+        if review_level in ["Elevated", "Critical"]:
+            lines.append(f"# 🚨 DiffSense Risk Signal: {review_level}")
+        else:
+            lines.append(f"# ✅ DiffSense Audit: {review_level}")
+        lines.append("")
+        if not details:
+            lines.append("No warnings detected.")
+            rule_stats = (result.get("_metrics") or {}).get("rule_stats", {})
+            total_rules = rule_stats.get("total_rules", 0)
+            executed_count = rule_stats.get("executed_count", 0)
+            if total_rules or executed_count:
+                lines.append("")
+                lines.append(f"Rules executed: {executed_count} / {total_rules}")
+            return "\n".join(lines)
+        severity_rank = {
+            "critical": 0,
+            "high": 1,
+            "medium": 2,
+            "low": 3,
+            "unknown": 4
+        }
+        grouped = {}
+        for d in details:
+            file_path = d.get("file") or d.get("matched_file") or "unknown"
+            rule_id = d.get("rule_id") or d.get("id") or "unknown"
+            severity = (d.get("severity") or "unknown").lower()
+            impact = d.get("impact") or "unknown"
+            rationale = d.get("rationale") or ""
+            grouped.setdefault(file_path, []).append({
+                "rule_id": rule_id,
+                "severity": severity,
+                "impact": impact,
+                "rationale": rationale
+            })
+        # Print risky files to stderr for CI logs
+        import sys
+        sys.stderr.write("\n" + "="*40 + "\n")
+        sys.stderr.write("🔍 DiffSense Risk Files\n")
+        sys.stderr.write("="*40 + "\n")
+        for file_path in sorted(grouped.keys()):
+            if file_path != "unknown":
+                issues_count = len(grouped[file_path])
+                max_severity = min(grouped[file_path], key=lambda x: severity_rank.get(x["severity"], 4))["severity"]
+                sys.stderr.write(f"  📁 {file_path} ({issues_count} issue(s), severity: {max_severity.upper()})\n")
+        sys.stderr.write("="*40 + "\n\n")
+        lines.append("## ⚠️ Warnings by File")
+        for file_path in sorted(grouped.keys()):
+            lines.append("")
+            lines.append(f"### `{file_path}`")
+            for item in sorted(grouped[file_path], key=lambda x: severity_rank.get(x["severity"], 4)):
+                sev_label = item["severity"].upper() if item["severity"] else "UNKNOWN"
+                lines.append(f"- **{sev_label}** `{item['rule_id']}` ({item['impact']})")
+                if item["rationale"]:
+                    lines.append(f"  - {item['rationale']}")
+        if review_level in ["Elevated", "Critical"]:
+            lines.append("")
+            lines.append("---")
+            lines.append("**Required action:**")
+            lines.append("This is a risk signal, not a block.")
+            lines.append("")
+            lines.append("👉 **Approve this PR** OR **React with 👍** to this comment, then **Re-run this job** to pass.")
+        rule_stats = (result.get("_metrics") or {}).get("rule_stats", {})
+        total_rules = rule_stats.get("total_rules", 0)
+        executed_count = rule_stats.get("executed_count", 0)
+        if total_rules or executed_count:
+            lines.append("")
+            lines.append(f"Rules executed: {executed_count} / {total_rules}")
+        return "\n".join(lines)
+class HtmlRenderer:
+    def render(self, result: Dict[str, Any]) -> str:
+        review_level = result.get("review_level", "unknown")
+        details = result.get("details", [])
+        metrics = result.get("_metrics", {})
+        rule_metrics = result.get("_metrics", {})
+        rule_quality = result.get("_rule_quality", {})
+        def esc(value: Any) -> str:
+            return _html.escape(str(value))
+        rows = []
+        for d in details:
+            rows.append(
+                "<tr>"
+                f"<td>{esc(d.get('rule_id', ''))}</td>"
+                f"<td>{esc(d.get('severity', ''))}</td>"
+                f"<td>{esc(d.get('file', ''))}</td>"
+                f"<td>{esc(d.get('impact', ''))}</td>"
+                f"<td>{esc(d.get('rationale', ''))}</td>"
+                f"<td>{esc(d.get('precision', ''))}</td>"
+                f"<td>{esc(d.get('quality_status', ''))}</td>"
+                "</tr>"
+            )
+        detail_table = "\n".join(rows) if rows else "<tr><td colspan='7'>No warnings detected.</td></tr>"
+        cache = metrics.get("cache", {})
+        diff_cache = cache.get("diff", {})
+        ast_cache = cache.get("ast", {})
+        d_total = diff_cache.get("hits", 0) + diff_cache.get("misses", 0)
+        a_total = ast_cache.get("hits", 0) + ast_cache.get("misses", 0)
+        d_rate = (diff_cache.get("hits", 0) / d_total * 100) if d_total else 0
+        a_rate = (ast_cache.get("hits", 0) / a_total * 100) if a_total else 0
+        rule_stats = metrics.get("rule_stats", {})
+        total_rules = rule_stats.get("total_rules", 0)
+        executed_count = rule_stats.get("executed_count", 0)
+        exec_pct = (executed_count / total_rules * 100) if total_rules else 0
+        rules_executed_line = f"<div>Rules executed: {executed_count} / {total_rules} ({exec_pct:.0f}%)</div>"
+        rule_rows = []
+        for rule_id, m in rule_metrics.items():
+            if rule_id in ("cache", "rule_stats"):
+                continue
+            time_ms = (m.get("time_ns", 0) / 1_000_000) if isinstance(m, dict) else 0
+            hits = m.get("hits", 0) if isinstance(m, dict) else 0
+            ignores = m.get("ignores", 0) if isinstance(m, dict) else 0
+            errors = m.get("errors", 0) if isinstance(m, dict) else 0
+            precision = ""
+            q = rule_quality.get(rule_id)
+            if isinstance(q, dict):
+                precision = q.get("precision", "")
+            rule_rows.append(
+                "<tr>"
+                f"<td>{esc(rule_id)}</td>"
+                f"<td>{esc(hits)}</td>"
+                f"<td>{esc(ignores)}</td>"
+                f"<td>{esc(errors)}</td>"
+                f"<td>{esc(time_ms)}</td>"
+                f"<td>{esc(precision)}</td>"
+                "</tr>"
+            )
+        rule_table = "\n".join(rule_rows) if rule_rows else "<tr><td colspan='6'>No rule metrics.</td></tr>"
+        return f"""<!DOCTYPE html>
+<html>
+<head>
+<meta charset="utf-8">
+<title>DiffSense Report</title>
+<style>
+body{{font-family:Arial,sans-serif;margin:20px}}
+table{{border-collapse:collapse;width:100%}}
+th,td{{border:1px solid #ddd;padding:8px;text-align:left}}
+th{{background:#f4f4f4}}
+.summary{{margin-bottom:16px}}
+</style>
+</head>
+<body>
+<h1>DiffSense Report</h1>
+<div class="summary">
+<div>Review Level: {esc(review_level)}</div>
+<div>Diff Cache Hit: {d_rate:.1f}% ({diff_cache.get("hits", 0)}/{d_total})</div>
+<div>AST Cache Hit: {a_rate:.1f}% ({ast_cache.get("hits", 0)}/{a_total})</div>
+{rules_executed_line}
+</div>
+<h2>Findings</h2>
+<table>
+<thead>
+<tr>
+<th>Rule</th><th>Severity</th><th>File</th><th>Impact</th><th>Rationale</th><th>Precision</th><th>Quality</th>
+</tr>
+</thead>
+<tbody>
+{detail_table}
+</tbody>
+</table>
+<h2>Rule Metrics</h2>
+<table>
+<thead>
+<tr>
+<th>Rule</th><th>Hits</th><th>Ignores</th><th>Errors</th><th>Time(ms)</th><th>Precision</th>
+</tr>
+</thead>
+<tbody>
+{rule_table}
+</tbody>
+</table>
+</body>
+</html>"""

core/rule_base.py ADDED Viewed

@@ -0,0 +1,98 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional, List
+class Rule(ABC):
+    """
+    Abstract Base Class for all DiffSense Rules.
+    This defines the Plugin Interface (SDK).
+    """
+    @property
+    @abstractmethod
+    def id(self) -> str:
+        """Unique Rule ID (e.g., 'runtime.concurrency.lock_removed')"""
+        pass
+    @property
+    @abstractmethod
+    def severity(self) -> str:
+        """Severity level: critical, high, medium, low"""
+        pass
+    @property
+    @abstractmethod
+    def impact(self) -> str:
+        """Impact dimension: security, runtime, data, maintenance"""
+        pass
+    @property
+    @abstractmethod
+    def rationale(self) -> str:
+        """Explanation of why this rule exists and what risk it prevents"""
+        pass
+    @property
+    def title(self) -> str:
+        """Human-readable title for the rule (optional, defaults to rule id)"""
+        return self.id
+    # Optional metadata (defaults for built-in rules; YamlRule overrides from YAML)
+    @property
+    def category(self) -> str:
+        """Rule category: concurrency, performance, reliability, security, general"""
+        return "general"
+    @property
+    def confidence(self) -> float:
+        """Confidence score 0.0-1.0"""
+        return 1.0
+    @property
+    def tags(self) -> List[str]:
+        """Optional tags for filtering"""
+        return []
+    @property
+    def enabled(self) -> bool:
+        """Whether this rule is enabled (engine skips if False)"""
+        return True
+    @property
+    def language(self) -> str:
+        """Language scope: * for all, or java, go, js, etc."""
+        return "*"
+    @property
+    def scope(self) -> str:
+        """File scope pattern (e.g. ** or **/core/**)"""
+        return "**"
+    @property
+    def rule_type(self) -> str:
+        """Rule type: regression (depends on diff history) or absolute (context-independent)"""
+        return "absolute"
+    @property
+    def is_blocking(self) -> bool:
+        """If True, any hit will force a 'critical' review level and suggested 'block_pr' action"""
+        return False
+    @property
+    def status(self) -> str:
+        """Lifecycle status: experimental, beta, stable, deprecated, disabled. Engine skips disabled."""
+        return "stable"
+    @abstractmethod
+    def evaluate(self, diff_data: Dict[str, Any], ast_signals: List[Any]) -> Optional[Dict[str, Any]]:
+        """
+        Execute the rule logic against the diff and signals.
+        Args:
+            diff_data: The raw diff parsing result
+            ast_signals: List of AST signals detected by ASTDetector
+        Returns:
+            Dict with match details (must contain 'file' key) if matched,
+            None if not matched.
+        """
+        pass

core/rule_runtime.py ADDED Viewed

@@ -0,0 +1,103 @@
+"""
+Legacy rule runtime. Prefer core.rules.RuleEngine as the single entry point;
+RuleEngine now integrates LifecycleManager, profile, directory/entry_point loading.
+"""
+import yaml
+import time
+from typing import Dict, List, Any, Optional
+from sdk.rule import BaseRule
+from sdk.signal import Signal
+from governance.lifecycle import LifecycleManager
+from rules.concurrency import (
+    ThreadPoolSemanticChangeRule,
+    ConcurrencyRegressionRule,
+    ThreadSafetyRemovalRule,
+    LatchMisuseRule
+)
+from rules.yaml_adapter import YamlRule
+class RuleRuntime:
+    """
+    The orchestrator for executing rules.
+    Handles Lifecycle, Metrics, Suppression, and Feedback.
+    """
+    def __init__(self, rules_path: str, config: Dict[str, Any] = None):
+        self.rules: List[BaseRule] = []
+        self.metrics: Dict[str, Dict[str, Any]] = {}
+        self.lifecycle = LifecycleManager(config)
+        # 1. Register Built-in Rules (Plugins)
+        self._register_builtins()
+        # 2. Load YAML Rules (Plugins)
+        self._load_yaml_rules(rules_path)
+    def _register_builtins(self):
+        """
+        Registers core rules. In a real OS, this would be a dynamic plugin loader.
+        """
+        self.rules.append(ThreadPoolSemanticChangeRule())
+        self.rules.append(ConcurrencyRegressionRule())
+        self.rules.append(ThreadSafetyRemovalRule())
+        self.rules.append(LatchMisuseRule())
+    def _load_yaml_rules(self, path: str):
+        try:
+            with open(path, 'r', encoding='utf-8') as f:
+                data = yaml.safe_load(f) or {}
+                raw_rules = data.get('rules', [])
+                for r in raw_rules:
+                    self.rules.append(YamlRule(r))
+        except FileNotFoundError:
+            pass
+    def execute(self, diff_data: Dict[str, Any], signals: List[Signal]) -> List[Dict[str, Any]]:
+        """
+        Main execution pipeline.
+        """
+        findings = []
+        for rule in self.rules:
+            # 1. Lifecycle Check
+            if not self.lifecycle.should_run(rule):
+                continue
+            rule_id = rule.id
+            if rule_id not in self.metrics:
+                self.metrics[rule_id] = {"calls": 0, "hits": 0, "time_ns": 0, "errors": 0}
+            self.metrics[rule_id]["calls"] += 1
+            start_time = time.time_ns()
+            match_details = None
+            try:
+                # 2. Execute Rule
+                match_details = rule.evaluate(diff_data, signals)
+            except Exception:
+                self.metrics[rule_id]["errors"] += 1
+            finally:
+                duration = time.time_ns() - start_time
+                self.metrics[rule_id]["time_ns"] += duration
+            if match_details:
+                # 3. Suppress Check (TODO)
+                # if self.suppress.is_suppressed(rule_id, match_details): continue
+                self.metrics[rule_id]["hits"] += 1
+                # 4. Severity Adjustment
+                severity = self.lifecycle.adjust_severity(rule, rule.severity)
+                findings.append({
+                    "id": rule.id,
+                    "severity": severity,
+                    "impact": rule.impact,
+                    "rationale": rule.rationale,
+                    "matched_file": match_details.get('file', 'unknown')
+                })
+        return findings
+    def get_metrics(self) -> Dict[str, Any]:
+        return self.metrics