PyPI - diffsense - Versions diffs - 2.2.12__py3-none-any.whl - Mend

diffsense 2.2.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

adapters/__init__.py +0 -0
adapters/base.py +27 -0
adapters/github_adapter.py +164 -0
adapters/gitlab_adapter.py +207 -0
adapters/local_adapter.py +136 -0
banner.py +71 -0
cli.py +606 -0
config/__init__.py +1 -0
config/rules.yaml +371 -0
core/__init__.py +235 -0
core/ast_detector.py +853 -0
core/change.py +46 -0
core/composer.py +93 -0
core/evaluator.py +15 -0
core/ignore_manager.py +71 -0
core/knowledge.py +77 -0
core/parser.py +181 -0
core/parser_manager.py +104 -0
core/quality_manager.py +117 -0
core/renderer.py +197 -0
core/rule_base.py +98 -0
core/rule_runtime.py +103 -0
core/rules.py +718 -0
core/run_config.py +85 -0
core/semantic_diff.py +359 -0
core/signal_model.py +21 -0
core/signals_registry.py +62 -0
diffsense-2.2.12.dist-info/METADATA +18 -0
diffsense-2.2.12.dist-info/RECORD +58 -0
diffsense-2.2.12.dist-info/WHEEL +5 -0
diffsense-2.2.12.dist-info/entry_points.txt +3 -0
diffsense-2.2.12.dist-info/licenses/LICENSE +176 -0
diffsense-2.2.12.dist-info/top_level.txt +11 -0
diffsense_mcp/__init__.py +1 -0
diffsense_mcp/launcher.py +28 -0
diffsense_mcp/server.py +687 -0
governance/lifecycle.py +54 -0
main.py +318 -0
rules/__init__.py +246 -0
rules/api_compatibility.py +372 -0
rules/collection_handling.py +349 -0
rules/concurrency.py +194 -0
rules/concurrency_adapter.py +250 -0
rules/cross_language_adapter.py +444 -0
rules/exception_handling.py +320 -0
rules/go_rules.py +401 -0
rules/null_safety.py +301 -0
rules/resource_management.py +222 -0
rules/yaml_adapter.py +195 -0
run_audit.py +478 -0
sdk/cpp_adapter.py +238 -0
sdk/go_adapter.py +199 -0
sdk/java_adapter.py +199 -0
sdk/javascript_adapter.py +229 -0
sdk/language_adapter.py +313 -0
sdk/python_adapter.py +195 -0
sdk/rule.py +63 -0
sdk/signal.py +14 -0

adapters/__init__.py ADDED Viewed

File without changes

adapters/base.py ADDED Viewed

@@ -0,0 +1,27 @@
+from abc import ABC, abstractmethod
+class PlatformAdapter(ABC):
+    @abstractmethod
+    def fetch_diff(self) -> str:
+        """
+        Fetch unified diff content from the platform.
+        """
+        pass
+    @abstractmethod
+    def post_comment(self, content: str):
+        """
+        Post a comment to the MR/PR.
+        Should handle update logic if applicable (e.g. edit existing comment).
+        """
+        pass
+    def is_approved(self) -> bool:
+        """
+        Check if the MR/PR is approved by a reviewer.
+        Default implementation returns False.
+        """
+        return False
+    def post_inline_comments(self, comments):
+        return None

adapters/github_adapter.py ADDED Viewed

@@ -0,0 +1,164 @@
+import os
+from github import Github, GithubException
+import requests
+from .base import PlatformAdapter
+class GitHubAdapter(PlatformAdapter):
+    def __init__(self, token: str, repo_name: str, pr_number: int):
+        self.gh = Github(token)
+        self.repo = self.gh.get_repo(repo_name)
+        self.pr = self.repo.get_pull(pr_number)
+        self.comment_tag = "<!-- diffsense_audit_report -->"
+    def fetch_diff(self) -> str:
+        # PyGithub's get_files() doesn't give raw unified diff easily for the whole PR.
+        # It's better to fetch the diff url directly.
+        # But wait, self.pr.diff_url gives the url, we need to download it.
+        # However, accessing the URL requires auth if the repo is private.
+        # We can use the token in headers.
+        headers = {
+            'Authorization': f'token {self.gh.get_user().login if False else os.environ.get("GITHUB_TOKEN")}',
+            'Accept': 'application/vnd.github.v3.diff'
+        }
+        # Actually PyGithub handles auth, but for raw request we need to handle it.
+        # Let's use requests.
+        # Note: os.environ.get("GITHUB_TOKEN") is usually passed via constructor,
+        # but here we rely on the passed token.
+        # Re-construct headers properly
+        # We need to use the token passed in init.
+        # But wait, Github object doesn't expose raw token easily?
+        # Actually it does, but let's just use the one passed to init.
+        # Wait, self.gh is authenticated.
+        # self.pr.diff_url is public accessible? No, for private repos it needs auth.
+        # Let's use requests with the token.
+        # BUT, there's a simpler way:
+        # response = requests.get(self.pr.diff_url, headers={'Authorization': f'token {token}'})
+        # I need to store the token.
+        pass
+        # Let's refactor init to store token or handle this better.
+        # Actually, self.pr has not 'diff' attribute directly?
+        # PyGithub requests:
+        # content = self.repo._requester.requestJsonAndCheck("GET", self.pr.url, headers={"Accept": "application/vnd.github.v3.diff"})
+        # This is internal API usage.
+        # Safer way: requests.
+        pass
+    def fetch_diff_safe(self, token: str) -> str:
+        headers = {
+            'Authorization': f'token {token}',
+            'Accept': 'application/vnd.github.v3.diff'
+        }
+        response = requests.get(self.pr.url, headers=headers)
+        response.raise_for_status()
+        return response.text
+    def post_comment(self, content: str):
+        # Check for existing comment
+        comments = self.pr.get_issue_comments()
+        existing_comment = None
+        for comment in comments:
+            if self.comment_tag in comment.body:
+                existing_comment = comment
+                break
+        body = f"{self.comment_tag}\n{content}"
+        if existing_comment:
+            existing_comment.edit(body)
+            print(f"Updated existing comment {existing_comment.id}")
+        else:
+            self.pr.create_issue_comment(body)
+            print("Created new comment")
+# Redefine class to include token storage and proper fetch
+class GitHubAdapter(PlatformAdapter):
+    def __init__(self, token: str, repo_name: str, pr_number: int):
+        self.token = token
+        self.gh = Github(token)
+        self.repo = self.gh.get_repo(repo_name)
+        self.pr = self.repo.get_pull(pr_number)
+        self.comment_tag = "<!-- diffsense_audit_report -->"
+    def fetch_diff(self) -> str:
+        headers = {
+            'Authorization': f'token {self.token}',
+            'Accept': 'application/vnd.github.v3.diff'
+        }
+        # self.pr.url gives the API url (e.g. https://api.github.com/repos/...)
+        # Requesting it with diff header gives the diff.
+        response = requests.get(self.pr.url, headers=headers)
+        response.raise_for_status()
+        return response.text
+    def post_comment(self, content: str):
+        comments = self.pr.get_issue_comments()
+        existing_comment = None
+        for comment in comments:
+            if self.comment_tag in comment.body:
+                existing_comment = comment
+                break
+        final_body = f"{content}\n\n{self.comment_tag}"
+        if existing_comment:
+            existing_comment.edit(final_body)
+            print(f"Updated GitHub comment {existing_comment.id}")
+        else:
+            self.pr.create_issue_comment(final_body)
+            print("Created GitHub comment")
+    def post_inline_comments(self, comments):
+        if not comments:
+            return
+        commit = self.pr.head.sha
+        for c in comments:
+            path = c.get("path")
+            position = c.get("position")
+            body = c.get("body")
+            if not path or not position or not body:
+                continue
+            try:
+                self.pr.create_review_comment(body, commit, path, position)
+            except Exception as e:
+                print(f"Inline comment failed: {e}")
+    def is_approved(self) -> bool:
+        reviews = self.pr.get_reviews()
+        reviewer_states = {}
+        for review in reviews:
+            # Dismissed reviews are not active, but get_reviews might return them?
+            # State can be APPROVED, CHANGES_REQUESTED, COMMENTED, DISMISSED, PENDING.
+            # We only care about the latest state per user.
+            reviewer_states[review.user.login] = review.state
+        has_approval = False
+        has_changes_requested = False
+        for state in reviewer_states.values():
+            if state == 'APPROVED':
+                has_approval = True
+            elif state == 'CHANGES_REQUESTED':
+                has_changes_requested = True
+        # If any changes requested, it's not approved.
+        # If approved by at least one and no changes requested, it's approved.
+        return has_approval and not has_changes_requested
+    def has_ack_reaction(self) -> bool:
+        """
+        Check if the bot's comment has a Thumbs Up (👍) reaction.
+        """
+        comments = self.pr.get_issue_comments()
+        for comment in comments:
+            if self.comment_tag in comment.body:
+                # Check reactions
+                reactions = comment.get_reactions()
+                for reaction in reactions:
+                    if reaction.content == "+1": # +1 corresponds to 👍
+                        return True
+        return False

adapters/gitlab_adapter.py ADDED Viewed

@@ -0,0 +1,207 @@
+import gitlab
+import requests
+from .base import PlatformAdapter
+class GitLabAdapter(PlatformAdapter):
+    def __init__(self, url: str, token: str, project_id: str, mr_iid: int):
+        self.gl = gitlab.Gitlab(url, private_token=token)
+        try:
+            self.project = self.gl.projects.get(project_id)
+            self.mr = self.project.mergerequests.get(mr_iid)
+        except gitlab.exceptions.GitlabGetError as e:
+            if e.response_code == 404:
+                print(f"❌ Error: Could not find Project {project_id} or MR {mr_iid} on {url}.")
+                print("   - Check if DIFFSENSE_TOKEN has 'api' scope.")
+                print("   - Ensure the token user is a member of the project.")
+                print("   - Verify the GitLab URL is correct (defaults to gitlab.com if not specified).")
+            raise e
+        self.comment_tag = "<!-- diffsense_audit_report -->"
+        self.inline_comment_tag = "<!-- diffsense_inline_report -->"
+        self.token = token # store for manual request if needed
+    def fetch_diff(self) -> str:
+        # GitLab API returns diffs in list of dicts via /changes
+        # or we can get unified diff via .diff endpoint.
+        # However, for large MRs, the .diff endpoint might be paginated or truncated?
+        # Let's try to use the project.mergerequests.changes() method which gives structured diffs
+        # and reconstruct unified diff if needed, OR just use the raw diff endpoint.
+        # Issue: If the raw diff endpoint returns something unexpected or empty.
+        # Let's try to use the changes API as a fallback or primary source if raw fails.
+        base_url = self.gl.url.rstrip('/')
+        diff_url = f"{base_url}/api/v4/projects/{self.project.id}/merge_requests/{self.mr.iid}.diff"
+        headers = {
+            'PRIVATE-TOKEN': self.token
+        }
+        try:
+            response = requests.get(diff_url, headers=headers)
+            response.raise_for_status()
+            content = response.text
+            # Validation: Check if it's JSON (API error or wrong endpoint behavior)
+            if content.strip().startswith('{') and '"id":' in content:
+                 print("Warning: .diff endpoint returned JSON. Falling back to changes API.")
+                 return self._fetch_diff_fallback()
+            if not content.strip():
+                 print("Warning: Raw diff is empty. Trying fallback to changes API.")
+                 return self._fetch_diff_fallback()
+            return content
+        except Exception as e:
+            print(f"Warning: Failed to fetch raw diff: {e}. Trying fallback.")
+            return self._fetch_diff_fallback()
+    def _fetch_diff_fallback(self) -> str:
+        # Fallback: Use python-gitlab changes() API and reconstruct unified-like diff
+        # This is robust because it uses the official API structure
+        mr_changes = self.mr.changes()
+        diffs = mr_changes.get('changes', [])
+        unified_diff = []
+        for d in diffs:
+            old_path = d.get('old_path')
+            new_path = d.get('new_path')
+            diff_text = d.get('diff', '')
+            unified_diff.append(f"diff --git a/{old_path} b/{new_path}")
+            if d.get('new_file'):
+                unified_diff.append(f"--- /dev/null")
+                unified_diff.append(f"+++ b/{new_path}")
+            elif d.get('deleted_file'):
+                unified_diff.append(f"--- a/{old_path}")
+                unified_diff.append(f"+++ /dev/null")
+            elif d.get('renamed_file'):
+                unified_diff.append(f"--- a/{old_path}")
+                unified_diff.append(f"+++ b/{new_path}")
+            else:
+                unified_diff.append(f"--- a/{old_path}")
+                unified_diff.append(f"+++ b/{new_path}")
+            unified_diff.append(diff_text)
+        return "\n".join(unified_diff)
+    def post_comment(self, content: str):
+        # Check for existing comment
+        notes = self.mr.notes.list(all=True)
+        existing_note = None
+        for note in notes:
+            if self.comment_tag in note.body:
+                existing_note = note
+                break
+        # Ensure content is properly formatted with the tag
+        final_body = f"{content}\n\n{self.comment_tag}"
+        if existing_note:
+            # Update existing comment
+            existing_note.body = final_body
+            existing_note.save()
+            print(f"Updated GitLab note {existing_note.id}")
+        else:
+            # Create new comment
+            self.mr.notes.create({'body': final_body})
+            print("Created GitLab note")
+    def post_inline_comments(self, comments):
+        if not comments:
+            return
+        # IMPORTANT:
+        # Do not call post_comment() here. That would overwrite the main
+        # markdown audit report (regression bug), causing plain text fallback.
+        lines = ["## Inline Findings", ""]
+        for c in comments:
+            path = c.get("path", "")
+            line = c.get("line", "")
+            body = c.get("body", "")
+            lines.append(f"- `{path}:{line}` {body}")
+        content = "\n".join(lines)
+        final_body = f"{content}\n\n{self.inline_comment_tag}"
+        notes = self.mr.notes.list(all=True)
+        existing_note = None
+        for note in notes:
+            if self.inline_comment_tag in note.body:
+                existing_note = note
+                break
+        if existing_note:
+            existing_note.body = final_body
+            existing_note.save()
+            print(f"Updated GitLab inline note {existing_note.id}")
+        else:
+            self.mr.notes.create({"body": final_body})
+            print("Created GitLab inline note")
+    def is_approved(self) -> bool:
+        """
+        Check if MR is approved using GitLab's Approvals API.
+        """
+        try:
+            # Need to fetch approvals explicitly
+            approvals = self.mr.approvals.get()
+            # Logic: If approved_by list is not empty, consider it approved?
+            # Or check approvals_left <= 0?
+            # Dubbo/OpenSource usually relies on 'approved' state.
+            # Strategy 1: Check if any approval exists
+            # Note: approvals.approved_by is a list of users
+            if hasattr(approvals, 'approved_by') and approvals.approved_by and len(approvals.approved_by) > 0:
+                return True
+            # Strategy 2: Check approvals_left (if configured)
+            # Note: approvals_left might not exist if no rules are set
+            if hasattr(approvals, 'approvals_left') and approvals.approvals_left == 0:
+                return True
+            # Strategy 3: Check 'approved' attribute directly (some GitLab versions)
+            if hasattr(approvals, 'approved') and approvals.approved:
+                return True
+            return False
+        except Exception as e:
+            print(f"Warning: Failed to fetch GitLab approvals: {e}")
+            return False
+    def has_ack_reaction(self) -> bool:
+        """
+        Check if the bot's report comment has a 'thumbsup' or 'rocket' reaction.
+        This allows 'Click-to-Ack' flow without formal approval.
+        """
+        try:
+            notes = self.mr.notes.list(all=True)
+            target_note = None
+            for note in notes:
+                if self.comment_tag in note.body:
+                    target_note = note
+                    break
+            if not target_note:
+                return False
+            # Fetch award emojis for this note
+            # python-gitlab note object usually has 'awardemojis' manager?
+            # Or we need to fetch specifically.
+            # Try efficient way first
+            # The list() might not include award_emoji info directly.
+            # Using specific API call for the note
+            # endpoint: GET /projects/:id/merge_requests/:mr_iid/notes/:note_id/award_emoji
+            # Note: python-gitlab objects are lazy. accessing .awardemojis might work if supported.
+            # Let's try standard way
+            awards = target_note.awardemojis.list()
+            for award in awards:
+                if award.name in ['thumbsup', 'rocket', '+1']:
+                    return True
+            return False
+        except Exception as e:
+            print(f"Warning: Failed to check reaction: {e}")
+            return False

adapters/local_adapter.py ADDED Viewed

@@ -0,0 +1,136 @@
+import os
+import json
+from typing import Optional, List, Dict, Any
+from .base import PlatformAdapter
+class LocalFileAdapter(PlatformAdapter):
+    """
+    本地文件适配器，用于 AI Agent 和本地调试场景。
+    从本地文件读取 diff，将输出写入本地文件。
+    """
+    def __init__(
+        self,
+        diff_file_path: Optional[str] = None,
+        output_dir: str = ".",
+        report_filename: str = "diffsense-report.json",
+        comments_filename: str = "diffsense-comments.json",
+        html_filename: str = "diffsense-report.html"
+    ):
+        """
+        Args:
+            diff_file_path: diff 文件路径。如果为 None，则调用者需要在 fetch_diff 中提供内容
+            output_dir: 输出目录
+            report_filename: 审计报告文件名
+            comments_filename: 内联评论文件名
+            html_filename: HTML 报告文件名
+        """
+        self.diff_file_path = diff_file_path
+        self.output_dir = output_dir
+        self.report_filename = report_filename
+        self.comments_filename = comments_filename
+        self.html_filename = html_filename
+        self._last_diff_content: Optional[str] = None
+        self._last_comments: Optional[List[Dict[str, Any]]] = None
+    def set_diff_content(self, content: str):
+        """直接设置 diff 内容（用于流式场景）"""
+        self._last_diff_content = content
+    def fetch_diff(self) -> str:
+        """从文件读取 diff 内容"""
+        if self._last_diff_content is not None:
+            return self._last_diff_content
+        if self.diff_file_path is None:
+            raise ValueError("diff_file_path not set and no diff content provided")
+        with open(self.diff_file_path, 'r', encoding='utf-8') as f:
+            return f.read()
+    def post_comment(self, content: str):
+        """
+        将报告内容写入本地文件。
+        支持 JSON 和 Markdown 格式。
+        """
+        output_path = os.path.join(self.output_dir, self.report_filename)
+        # Try to parse as JSON first
+        try:
+            data = json.loads(content)
+            with open(output_path.replace('.json', '-comment.json'), 'w', encoding='utf-8') as f:
+                json.dump(data, f, ensure_ascii=False, indent=2)
+        except (json.JSONDecodeError, TypeError):
+            # Fall back to markdown
+            with open(output_path.replace('.json', '-comment.md'), 'w', encoding='utf-8') as f:
+                f.write(content)
+    def save_report(self, report_data: Dict[str, Any]):
+        """
+        直接保存结构化报告数据（推荐使用）
+        """
+        output_path = os.path.join(self.output_dir, self.report_filename)
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(report_data, f, ensure_ascii=False, indent=2)
+    def save_html_report(self, html_content: str):
+        """保存 HTML 报告"""
+        output_path = os.path.join(self.output_dir, self.html_filename)
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(html_content)
+    def post_inline_comments(self, comments: List[Dict[str, Any]]):
+        """保存内联评论到文件"""
+        output_path = os.path.join(self.output_dir, self.comments_filename)
+        self._last_comments = comments
+        with open(output_path, 'w', encoding='utf-8') as f:
+            json.dump(comments, f, ensure_ascii=False, indent=2)
+    def is_approved(self) -> bool:
+        """本地模式默认返回 False（无审批流程）"""
+        return False
+    def get_output_paths(self) -> Dict[str, str]:
+        """返回所有输出文件路径"""
+        return {
+            "report": os.path.join(self.output_dir, self.report_filename),
+            "comments": os.path.join(self.output_dir, self.comments_filename),
+            "html": os.path.join(self.output_dir, self.html_filename),
+        }
+class StreamingLocalAdapter(LocalFileAdapter):
+    """
+    流式本地适配器，支持处理未保存的 diff 内容。
+    适用于 AI Agent 场景，用户无需先保存 diff 文件。
+    """
+    def __init__(self, output_dir: str = ".", **kwargs):
+        super().__init__(diff_file_path=None, output_dir=output_dir, **kwargs)
+    def analyze_and_save(
+        self,
+        diff_content: str,
+        report_data: Dict[str, Any],
+        inline_comments: Optional[List[Dict[str, Any]]] = None,
+        html_report: Optional[str] = None
+    ):
+        """
+        一站式分析结果保存。
+        Args:
+            diff_content: 原始 diff 内容
+            report_data: 结构化审计报告
+            inline_comments: 内联评论列表
+            html_report: HTML 报告内容
+        """
+        self._last_diff_content = diff_content
+        self.save_report(report_data)
+        if inline_comments:
+            self.post_inline_comments(inline_comments)
+        if html_report:
+            self.save_html_report(html_report)

banner.py ADDED Viewed

@@ -0,0 +1,71 @@
+"""
+DiffSense 启动 Banner（类似 Spring Boot 的 banner.txt）
+在 CI 运行 audit 时在日志开头打印 Logo，便于识别流水线。
+"""
+def _get_version() -> str:
+    try:
+        from importlib.metadata import version
+        return version("diffsense")
+    except Exception:
+        return "2.2.6"
+def _get_build_info() -> dict:
+    """获取构建信息，用于详细的版本输出。"""
+    import os
+    import subprocess
+    build_info = {
+        "version": _get_version(),
+        "commit": "unknown",
+        "build_date": "unknown"
+    }
+    # Try to get git commit hash
+    try:
+        result = subprocess.run(
+            ["git", "rev-parse", "--short", "HEAD"],
+            capture_output=True,
+            text=True,
+            timeout=5
+        )
+        if result.returncode == 0:
+            build_info["commit"] = result.stdout.strip()
+    except Exception:
+        pass
+    # Try to get build date
+    try:
+        from datetime import datetime
+        build_info["build_date"] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    except Exception:
+        pass
+    return build_info
+# ASCII Art: DiffSense（等宽字体，适配 CI 日志）
+BANNER = r"""
+  ____  _     _ _____ _____ ____ _____
+ |  _ \(_) __| |  ___|  ___/ ___| ____|
+ | | | | |/ _` | |_  | |_  \___ \  _|
+ | |_| | | (_| |  _| |  _|  ___) | |___
+ |____/|_|\__,_|_|   |_|   |____/|_____|
+ :: DiffSense - MR/PR Risk Audit for CI/CD ::
+"""
+def print_banner() -> None:
+    """在 stdout 打印 DiffSense Logo 与版本，供 CI 流水线识别。"""
+    version = _get_version()
+    build_info = _get_build_info()
+    # 只去掉首尾换行，保留每行前导空格以保持 ASCII 对齐
+    print(BANNER.strip("\n"))
+    print(f" :: Version: v{version}")
+    print(f" :: Commit:  {build_info['commit']}")
+    print(f" :: Built:   {build_info['build_date']}")
+    print()