PyPI - cicada-mcp - Versions diffs - 0.1.4__py3-none-any.whl - Mend

cicada-mcp 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cicada-mcp might be problematic. Click here for more details.

Files changed (48) hide show

cicada/__init__.py +30 -0
cicada/clean.py +297 -0
cicada/command_logger.py +293 -0
cicada/dead_code_analyzer.py +282 -0
cicada/extractors/__init__.py +36 -0
cicada/extractors/base.py +66 -0
cicada/extractors/call.py +176 -0
cicada/extractors/dependency.py +361 -0
cicada/extractors/doc.py +179 -0
cicada/extractors/function.py +246 -0
cicada/extractors/module.py +123 -0
cicada/extractors/spec.py +151 -0
cicada/find_dead_code.py +270 -0
cicada/formatter.py +918 -0
cicada/git_helper.py +646 -0
cicada/indexer.py +629 -0
cicada/install.py +724 -0
cicada/keyword_extractor.py +364 -0
cicada/keyword_search.py +553 -0
cicada/lightweight_keyword_extractor.py +298 -0
cicada/mcp_server.py +1559 -0
cicada/mcp_tools.py +291 -0
cicada/parser.py +124 -0
cicada/pr_finder.py +435 -0
cicada/pr_indexer/__init__.py +20 -0
cicada/pr_indexer/cli.py +62 -0
cicada/pr_indexer/github_api_client.py +431 -0
cicada/pr_indexer/indexer.py +297 -0
cicada/pr_indexer/line_mapper.py +209 -0
cicada/pr_indexer/pr_index_builder.py +253 -0
cicada/setup.py +339 -0
cicada/utils/__init__.py +52 -0
cicada/utils/call_site_formatter.py +95 -0
cicada/utils/function_grouper.py +57 -0
cicada/utils/hash_utils.py +173 -0
cicada/utils/index_utils.py +290 -0
cicada/utils/path_utils.py +240 -0
cicada/utils/signature_builder.py +106 -0
cicada/utils/storage.py +111 -0
cicada/utils/subprocess_runner.py +182 -0
cicada/utils/text_utils.py +90 -0
cicada/version_check.py +116 -0
cicada_mcp-0.1.4.dist-info/METADATA +619 -0
cicada_mcp-0.1.4.dist-info/RECORD +48 -0
cicada_mcp-0.1.4.dist-info/WHEEL +5 -0
cicada_mcp-0.1.4.dist-info/entry_points.txt +8 -0
cicada_mcp-0.1.4.dist-info/licenses/LICENSE +21 -0
cicada_mcp-0.1.4.dist-info/top_level.txt +1 -0

cicada/pr_indexer/indexer.py ADDED Viewed

@@ -0,0 +1,297 @@
+"""
+PR Indexer - Indexes pull requests and their commits for fast offline lookup.
+Fetches all PRs from a GitHub repository and builds an index mapping commits to PRs.
+"""
+from pathlib import Path
+from typing import Optional, Dict, List, Any
+from .github_api_client import GitHubAPIClient
+from .pr_index_builder import PRIndexBuilder
+from .line_mapper import LineMapper
+class PRIndexer:
+    """
+    Indexes GitHub pull requests for fast offline lookup.
+    This class orchestrates the indexing process, delegating to specialized
+    components for API interaction, index building, and line mapping.
+    """
+    def __init__(self, repo_path: str = "."):
+        """
+        Initialize the PR indexer.
+        Args:
+            repo_path: Path to the git repository (defaults to current directory)
+        """
+        self.repo_path = Path(repo_path).resolve()
+        self._validate_git_repo()
+        # Initialize API client and get repo info
+        temp_client = GitHubAPIClient(self.repo_path, "", "")
+        temp_client.validate_gh_cli()
+        self.repo_owner, self.repo_name = temp_client.get_repo_info()
+        # Initialize components
+        self.api_client = GitHubAPIClient(
+            self.repo_path, self.repo_owner, self.repo_name
+        )
+        self.index_builder = PRIndexBuilder(self.repo_owner, self.repo_name)
+        self.line_mapper = LineMapper(self.repo_path)
+    def _validate_git_repo(self):
+        """Validate that the path is a git repository."""
+        git_dir = self.repo_path / ".git"
+        if not git_dir.exists():
+            raise ValueError(f"Not a git repository: {self.repo_path}")
+    def fetch_all_prs(self, state: str = "all") -> List[Dict[str, Any]]:
+        """
+        Fetch all pull requests from GitHub using GraphQL for efficiency.
+        Args:
+            state: PR state filter ('all', 'open', 'closed', 'merged')
+        Returns:
+            List of PR dictionaries with full details
+        """
+        print(f"Fetching PRs from {self.repo_owner}/{self.repo_name}...")
+        try:
+            # Get list of PR numbers
+            # Use a very high limit to ensure we don't miss PRs in large repos
+            pr_numbers = self.api_client.fetch_pr_list(state=state, limit=100000)
+            print(f"Found {len(pr_numbers)} pull requests")
+            # Fetch detailed PR info in batches
+            detailed_prs = []
+            batch_size = 10
+            total_batches = (len(pr_numbers) + batch_size - 1) // batch_size
+            try:
+                for i in range(0, len(pr_numbers), batch_size):
+                    batch = pr_numbers[i : i + batch_size]
+                    print(
+                        f"  Fetching batch {i//batch_size + 1}/{total_batches} "
+                        f"({len(batch)} PRs)..."
+                    )
+                    batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
+                    detailed_prs.extend(batch_prs)
+            except KeyboardInterrupt:
+                print(
+                    f"\n\n⚠️  Interrupted by user. Fetched {len(detailed_prs)}/"
+                    f"{len(pr_numbers)} PRs."
+                )
+                print("Saving partial index...")
+                return detailed_prs
+            return detailed_prs
+        except RuntimeError as e:
+            raise RuntimeError(f"Failed to fetch PRs: {e}")
+    def incremental_update(
+        self, existing_index: Dict[str, Any]
+    ) -> List[Dict[str, Any]]:
+        """
+        Fetch PRs bidirectionally: newer (above max) and older (below min).
+        Args:
+            existing_index: The existing index dictionary
+        Returns:
+            List of new PRs
+        """
+        # Get min and max PR numbers currently in the index
+        existing_pr_numbers = [int(num) for num in existing_index.get("prs", {}).keys()]
+        if not existing_pr_numbers:
+            print("Empty index, performing full fetch...")
+            return self.fetch_all_prs()
+        min_pr = min(existing_pr_numbers)
+        max_pr = max(existing_pr_numbers)
+        total_prs_in_repo = self.api_client.get_total_pr_count()
+        print(
+            f"Performing incremental update (index range: #{min_pr}-#{max_pr}, "
+            f"repo has {total_prs_in_repo} PRs)..."
+        )
+        # Fetch newer PRs (> max_pr)
+        newer_pr_numbers = self._fetch_newer_prs(max_pr)
+        # Fetch older PRs (< min_pr)
+        older_pr_numbers = self._fetch_older_prs(min_pr)
+        all_to_fetch = newer_pr_numbers + older_pr_numbers
+        if not all_to_fetch:
+            print("Index is complete (no newer or older PRs to fetch)")
+            return []
+        print(f"Found {len(newer_pr_numbers)} newer PRs", end="")
+        if older_pr_numbers:
+            print(
+                f" and {len(older_pr_numbers)} older PRs to fetch "
+                f"(going downward from #{min_pr})"
+            )
+        else:
+            print()
+        # Fetch detailed info for all PRs
+        detailed_prs = self._fetch_prs_in_batches(
+            newer_pr_numbers, older_pr_numbers, min_pr
+        )
+        return detailed_prs
+    def _fetch_newer_prs(self, max_pr: int) -> List[int]:
+        """Fetch PR numbers newer than max_pr."""
+        pr_numbers = self.api_client.fetch_pr_list(state="all", limit=1000)
+        # PRs are returned newest-first, so stop when we hit max_pr
+        newer = []
+        for pr_num in pr_numbers:
+            if pr_num <= max_pr:
+                break
+            newer.append(pr_num)
+        return newer
+    def _fetch_older_prs(self, min_pr: int) -> List[int]:
+        """Fetch PR numbers older than min_pr."""
+        if min_pr <= 1:
+            return []
+        try:
+            # Fetch all PRs and filter for ones < min_pr
+            # Use a very high limit to ensure we don't miss older PRs in large repos
+            all_prs = self.api_client.fetch_pr_list(state="all", limit=100000)
+            older = sorted(
+                [pr_num for pr_num in all_prs if pr_num < min_pr],
+                reverse=True,  # Descending order
+            )
+            return older
+        except RuntimeError as e:
+            print(f"Warning: Could not fetch older PRs: {e}")
+            return []
+    def _fetch_prs_in_batches(
+        self, newer_pr_numbers: List[int], older_pr_numbers: List[int], min_pr: int
+    ) -> List[Dict[str, Any]]:
+        """Fetch PRs in batches, showing progress."""
+        detailed_prs = []
+        batch_size = 10
+        try:
+            # Fetch newer PRs first
+            if newer_pr_numbers:
+                newer_batches = (len(newer_pr_numbers) + batch_size - 1) // batch_size
+                print(f"\n⬆️  Fetching {len(newer_pr_numbers)} newer PRs...")
+                for i in range(0, len(newer_pr_numbers), batch_size):
+                    batch = newer_pr_numbers[i : i + batch_size]
+                    print(
+                        f"  Batch {i//batch_size + 1}/{newer_batches} ({len(batch)} PRs)..."
+                    )
+                    batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
+                    detailed_prs.extend(batch_prs)
+            # Then fetch older PRs
+            if older_pr_numbers:
+                older_batches = (len(older_pr_numbers) + batch_size - 1) // batch_size
+                print(
+                    f"\n⬇️  Fetching {len(older_pr_numbers)} older PRs "
+                    f"(going downward from #{min_pr})..."
+                )
+                for i in range(0, len(older_pr_numbers), batch_size):
+                    batch = older_pr_numbers[i : i + batch_size]
+                    print(
+                        f"  Batch {i//batch_size + 1}/{older_batches} ({len(batch)} PRs)..."
+                    )
+                    batch_prs = self.api_client.fetch_prs_batch_graphql(batch)
+                    detailed_prs.extend(batch_prs)
+        except KeyboardInterrupt:
+            print(
+                f"\n\n⚠️  Interrupted by user. Fetched {len(detailed_prs)}/"
+                f"{len(newer_pr_numbers) + len(older_pr_numbers)} PRs."
+            )
+            print("Saving partial index...")
+        return detailed_prs
+    def index_repository(
+        self, output_path: str = ".cicada/pr_index.json", incremental: bool = False
+    ):
+        """
+        Index the repository's PRs and save to file.
+        Args:
+            output_path: Path where the index will be saved
+            incremental: If True, only fetch new PRs since last index
+        """
+        # Load existing index to preserve last_pr_number if clean build is interrupted
+        existing_index = self.index_builder.load_existing_index(output_path)
+        if incremental:
+            if existing_index:
+                # Fetch only new PRs
+                new_prs = self.incremental_update(existing_index)
+                if not new_prs:
+                    print("No new PRs found. Index is up to date.")
+                    return existing_index
+                # Map comment lines
+                self.line_mapper.map_all_comment_lines(new_prs)
+                # Merge new PRs into existing index
+                index = self.index_builder.merge_indexes(existing_index, new_prs)
+            else:
+                print("No existing index found. Performing full index...")
+                index = self._perform_full_index(output_path, existing_index)
+        else:
+            # Full index (--clean)
+            index = self._perform_full_index(output_path, existing_index)
+        # Save index
+        self.index_builder.save_index(index, output_path)
+        return index
+    def _perform_full_index(
+        self, _output_path: str, existing_index: Optional[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Perform a full index build."""
+        total_prs_in_repo = self.api_client.get_total_pr_count()
+        print(f"Starting clean rebuild ({total_prs_in_repo} PRs in repository)...")
+        prs = self.fetch_all_prs()
+        # Map comment lines
+        self.line_mapper.map_all_comment_lines(prs)
+        # Check if this is a partial/interrupted fetch
+        is_partial = len(prs) < total_prs_in_repo
+        if is_partial:
+            print(f"⚠️  Partial fetch: got {len(prs)}/{total_prs_in_repo} PRs.")
+            print("   Setting last_pr_number=0 to allow incremental resume...")
+            if existing_index:
+                # Merge with existing index to preserve PR data
+                print("   Merging with existing index to preserve PR data...")
+                new_index = self.index_builder.build_index(prs, preserve_last_pr=0)
+                return self.index_builder.merge_partial_clean(existing_index, new_index)
+            else:
+                # No existing index - build new one with last_pr_number=0
+                return self.index_builder.build_index(prs, preserve_last_pr=0)
+        else:
+            # Complete fetch
+            return self.index_builder.build_index(prs)

cicada/pr_indexer/line_mapper.py ADDED Viewed

@@ -0,0 +1,209 @@
+"""
+Line Mapper for PR Comments.
+This module handles mapping comment line numbers from PR commits to current HEAD,
+allowing comments to track code changes over time.
+"""
+import subprocess
+from pathlib import Path
+from typing import Dict, List, Any, Optional
+from cicada.utils import SubprocessRunner
+class LineMapper:
+    """
+    Maps comment line numbers from PR commits to current file state.
+    When a PR comment is made on a specific line, that line might move
+    or be deleted as the codebase evolves. This class attempts to track
+    those changes.
+    """
+    def __init__(self, repo_path: Path):
+        """
+        Initialize the line mapper.
+        Args:
+            repo_path: Path to the git repository
+        """
+        self.repo_path = repo_path
+        self.runner = SubprocessRunner(cwd=repo_path)
+    def map_all_comment_lines(self, prs: List[Dict[str, Any]]) -> None:
+        """
+        Map all comment lines in PRs to current line numbers.
+        This updates the PRs in-place, setting the 'line' field for each comment.
+        Args:
+            prs: List of PR dictionaries to update in-place
+        """
+        print("Mapping comment lines to current file state...")
+        total_comments = sum(len(pr.get("comments", [])) for pr in prs)
+        if total_comments == 0:
+            return
+        mapped_count = 0
+        unmapped_count = 0
+        try:
+            for pr in prs:
+                for comment in pr.get("comments", []):
+                    original_line = comment.get("original_line")
+                    commit_sha = comment.get("commit_sha")
+                    file_path = comment.get("path")
+                    if original_line and commit_sha and file_path:
+                        current_line = self.map_line_to_current(
+                            file_path, original_line, commit_sha
+                        )
+                        comment["line"] = current_line
+                        if current_line is not None:
+                            mapped_count += 1
+                        else:
+                            unmapped_count += 1
+                    else:
+                        comment["line"] = None
+                        unmapped_count += 1
+        except KeyboardInterrupt:
+            print(
+                f"\n\n⚠️  Line mapping interrupted. "
+                f"Mapped {mapped_count}/{total_comments} comments."
+            )
+            print("Saving index with partial line mappings...")
+            # Re-raise to let the outer handler save the index
+            raise
+        print(f"  Mapped {mapped_count} comments, {unmapped_count} unmappable/outdated")
+    def map_line_to_current(
+        self, file_path: str, original_line: int, commit_sha: str
+    ) -> Optional[int]:
+        """
+        Map a line number from a PR commit to the current HEAD.
+        This is a best-effort attempt that:
+        1. Checks if the file still exists
+        2. Tries to find the same line content in the current file
+        3. Searches within a reasonable range if exact position doesn't match
+        Args:
+            file_path: Path to the file
+            original_line: Line number in the PR commit
+            commit_sha: The commit SHA where the comment was made
+        Returns:
+            Current line number, or None if line no longer exists or can't be tracked
+        """
+        if not original_line or not commit_sha or not file_path:
+            return None
+        try:
+            # Check if the file still exists
+            if not self._file_exists(file_path):
+                return None
+            # Get current file lines
+            current_lines = self._get_file_lines("HEAD", file_path)
+            if not current_lines:
+                return None
+            # Get original file lines
+            original_lines = self._get_file_lines(commit_sha, file_path)
+            if not original_lines or original_line > len(original_lines):
+                return None
+            # Get the actual line content from the original commit
+            original_content = original_lines[original_line - 1].strip()
+            if not original_content:
+                # Empty line, can't track reliably
+                return None
+            # Search for the same content in current file
+            # Look for exact match near the original line number
+            current_line = self._find_matching_line(
+                current_lines, original_content, original_line
+            )
+            return current_line
+        except (subprocess.CalledProcessError, IndexError, ValueError):
+            return None
+    def _file_exists(self, file_path: str) -> bool:
+        """
+        Check if a file exists in the current HEAD.
+        Args:
+            file_path: Path to the file
+        Returns:
+            True if file exists
+        """
+        try:
+            result = self.runner.run_git_command(["ls-files", file_path], check=False)
+            return bool(result.stdout.strip())
+        except subprocess.CalledProcessError:
+            return False
+    def _get_file_lines(self, ref: str, file_path: str) -> Optional[List[str]]:
+        """
+        Get file lines at a specific git ref.
+        Args:
+            ref: Git reference (commit SHA, branch name, HEAD, etc.)
+            file_path: Path to the file
+        Returns:
+            List of file lines, or None if file doesn't exist at that ref
+        """
+        try:
+            result = self.runner.run_git_command(
+                ["show", f"{ref}:{file_path}"], check=False
+            )
+            if result.returncode != 0:
+                return None
+            return result.stdout.split("\n")
+        except subprocess.CalledProcessError:
+            return None
+    def _find_matching_line(
+        self,
+        current_lines: List[str],
+        original_content: str,
+        original_line: int,
+        search_range: int = 20,
+    ) -> Optional[int]:
+        """
+        Find a matching line in the current file.
+        Searches for an exact content match near the original line number.
+        Args:
+            current_lines: Lines from the current file
+            original_content: Original line content (stripped)
+            original_line: Original line number
+            search_range: How many lines to search above/below (default: 20)
+        Returns:
+            Matching line number (1-indexed), or None if not found
+        """
+        # Search within a range near the original line
+        start = max(1, original_line - search_range)
+        end = min(len(current_lines), original_line + search_range)
+        for i in range(start - 1, end):
+            if current_lines[i].strip() == original_content:
+                return i + 1
+        # If not found nearby, return None (line likely deleted/changed)
+        return None