PyPI - cicada-mcp - Versions diffs - 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

cicada-mcp 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

cicada/_version_hash.py +4 -0
cicada/cli.py +6 -748
cicada/commands.py +1255 -0
cicada/dead_code/__init__.py +1 -0
cicada/{find_dead_code.py → dead_code/finder.py} +2 -1
cicada/dependency_analyzer.py +147 -0
cicada/entry_utils.py +92 -0
cicada/extractors/base.py +9 -9
cicada/extractors/call.py +17 -20
cicada/extractors/common.py +64 -0
cicada/extractors/dependency.py +117 -235
cicada/extractors/doc.py +2 -49
cicada/extractors/function.py +10 -14
cicada/extractors/keybert.py +228 -0
cicada/extractors/keyword.py +191 -0
cicada/extractors/module.py +6 -10
cicada/extractors/spec.py +8 -56
cicada/format/__init__.py +20 -0
cicada/{ascii_art.py → format/ascii_art.py} +1 -1
cicada/format/formatter.py +1145 -0
cicada/git_helper.py +134 -7
cicada/indexer.py +322 -89
cicada/interactive_setup.py +251 -323
cicada/interactive_setup_helpers.py +302 -0
cicada/keyword_expander.py +437 -0
cicada/keyword_search.py +208 -422
cicada/keyword_test.py +383 -16
cicada/mcp/__init__.py +10 -0
cicada/mcp/entry.py +17 -0
cicada/mcp/filter_utils.py +107 -0
cicada/mcp/pattern_utils.py +118 -0
cicada/{mcp_server.py → mcp/server.py} +819 -73
cicada/mcp/tools.py +473 -0
cicada/pr_finder.py +2 -3
cicada/pr_indexer/indexer.py +3 -2
cicada/setup.py +167 -35
cicada/tier.py +225 -0
cicada/utils/__init__.py +9 -2
cicada/utils/fuzzy_match.py +54 -0
cicada/utils/index_utils.py +9 -0
cicada/utils/path_utils.py +18 -0
cicada/utils/text_utils.py +52 -1
cicada/utils/tree_utils.py +47 -0
cicada/version_check.py +99 -0
cicada/watch_manager.py +320 -0
cicada/watcher.py +431 -0
cicada_mcp-0.3.0.dist-info/METADATA +541 -0
cicada_mcp-0.3.0.dist-info/RECORD +70 -0
cicada_mcp-0.3.0.dist-info/entry_points.txt +4 -0
cicada/formatter.py +0 -864
cicada/keybert_extractor.py +0 -286
cicada/lightweight_keyword_extractor.py +0 -290
cicada/mcp_entry.py +0 -683
cicada/mcp_tools.py +0 -291
cicada_mcp-0.2.0.dist-info/METADATA +0 -735
cicada_mcp-0.2.0.dist-info/RECORD +0 -53
cicada_mcp-0.2.0.dist-info/entry_points.txt +0 -4
/cicada/{dead_code_analyzer.py → dead_code/analyzer.py} +0 -0
/cicada/{colors.py → format/colors.py} +0 -0
{cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/WHEEL +0 -0
{cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
{cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/top_level.txt +0 -0

cicada/{mcp_server.py → mcp/server.py} RENAMED Viewed

@@ -7,10 +7,11 @@ Provides an MCP tool to search for Elixir modules and their functions.
 Author: Cursor(Auto)
 """
-import contextlib
 import os
+import subprocess
 import sys
 import time
+from datetime import datetime, timedelta, timezone
 from pathlib import Path
 from typing import Any, cast
@@ -20,11 +21,18 @@ from mcp.server.stdio import stdio_server
 from mcp.types import TextContent, Tool
 from cicada.command_logger import get_logger
-from cicada.formatter import ModuleFormatter
+from cicada.format import ModuleFormatter
 from cicada.git_helper import GitHelper
-from cicada.mcp_tools import get_tool_definitions
+from cicada.mcp.pattern_utils import (
+    FunctionPattern,
+    has_wildcards,
+    match_any_pattern,
+    parse_function_patterns,
+    split_or_patterns,
+)
+from cicada.mcp.tools import get_tool_definitions
 from cicada.pr_finder import PRFinder
-from cicada.utils import get_config_path, get_pr_index_path, load_index
+from cicada.utils import find_similar_names, get_config_path, get_pr_index_path, load_index
 class CicadaServer:
@@ -73,32 +81,31 @@ class CicadaServer:
         Returns:
             Path to the config file
         """
-        # Check if CICADA_CONFIG_DIR is set (new temp directory approach)
+        # Check if CICADA_CONFIG_DIR is set (direct path to storage directory)
         config_dir = os.environ.get("CICADA_CONFIG_DIR")
         if config_dir:
             return str(Path(config_dir) / "config.yaml")
         # Determine repository path from environment or current directory
-        repo_path = os.environ.get("CICADA_REPO_PATH")
+        repo_path = None
         # Check if WORKSPACE_FOLDER_PATHS is available (Cursor-specific)
-        if not repo_path:
-            workspace_paths = os.environ.get("WORKSPACE_FOLDER_PATHS")
-            if workspace_paths:
-                # WORKSPACE_FOLDER_PATHS might be a single path or multiple paths
-                # Take the first one if multiple
-                # Use os.pathsep for platform-aware splitting (';' on Windows, ':' on Unix)
-                repo_path = (
-                    workspace_paths.split(os.pathsep)[0]
-                    if os.pathsep in workspace_paths
-                    else workspace_paths
-                )
+        workspace_paths = os.environ.get("WORKSPACE_FOLDER_PATHS")
+        if workspace_paths:
+            # WORKSPACE_FOLDER_PATHS might be a single path or multiple paths
+            # Take the first one if multiple
+            # Use os.pathsep for platform-aware splitting (';' on Windows, ':' on Unix)
+            repo_path = (
+                workspace_paths.split(os.pathsep)[0]
+                if os.pathsep in workspace_paths
+                else workspace_paths
+            )
         # Fall back to current working directory
         if not repo_path:
             repo_path = str(Path.cwd().resolve())
-        # Use new storage structure only
+        # Calculate config path from repository path
         config_path = get_config_path(repo_path)
         return str(config_path)
@@ -193,6 +200,77 @@ class CicadaServer:
                     return True
         return False
+    def _check_index_staleness(self) -> dict[str, Any] | None:
+        """
+        Check if the index is stale by comparing file modification times.
+        Returns:
+            Dictionary with staleness info (is_stale, index_age, newest_file_age) or None
+        """
+        try:
+            import os
+            import random
+            from datetime import datetime
+            # Get index file path and modification time
+            index_path = Path(self.config["storage"]["index_path"])
+            if not index_path.exists():
+                return None
+            index_mtime = os.path.getmtime(index_path)
+            index_age = datetime.now().timestamp() - index_mtime
+            # Get repo path
+            repo_path = Path(self.config.get("repository", {}).get("path", "."))
+            # Check a sample of indexed files to see if any are newer than the index
+            # Use random sampling for better coverage
+            max_files_to_check = 50
+            all_modules = list(self.index.get("modules", {}).values())
+            if len(all_modules) > max_files_to_check:
+                modules_to_check = random.sample(all_modules, max_files_to_check)
+            else:
+                modules_to_check = all_modules
+            newest_file_mtime = 0
+            for module_data in modules_to_check:
+                file_path = repo_path / module_data["file"]
+                if file_path.exists():
+                    file_mtime = os.path.getmtime(file_path)
+                    newest_file_mtime = max(newest_file_mtime, file_mtime)
+            # Check if any files are newer than the index
+            is_stale = newest_file_mtime > index_mtime
+            if is_stale:
+                # Calculate how old the index is in human-readable format
+                hours_old = index_age / 3600
+                if hours_old < 1:
+                    age_str = f"{int(index_age / 60)} minutes"
+                elif hours_old < 24:
+                    age_str = f"{int(hours_old)} hours"
+                else:
+                    age_str = f"{int(hours_old / 24)} days"
+                return {
+                    "is_stale": True,
+                    "age_str": age_str,
+                }
+            return None
+        except (OSError, KeyError):
+            # Expected errors - file permissions, disk issues, config issues
+            # Silently ignore these as staleness check is non-critical
+            return None
+        except Exception as e:
+            # Unexpected error - log for debugging but don't break functionality
+            import sys
+            print(f"Warning: Unexpected error checking index staleness: {e}", file=sys.stderr)
+            return None
     async def list_tools(self) -> list[Tool]:
         """List available MCP tools."""
         return get_tool_definitions()
@@ -259,6 +337,8 @@ class CicadaServer:
             include_usage_examples = arguments.get("include_usage_examples", False)
             max_examples = arguments.get("max_examples", 5)
             test_files_only = arguments.get("test_files_only", False)
+            changed_since = arguments.get("changed_since")
+            show_relationships = arguments.get("show_relationships", True)
             if not function_name:
                 error_msg = "'function_name' is required"
@@ -270,16 +350,23 @@ class CicadaServer:
                 include_usage_examples,
                 max_examples,
                 test_files_only,
+                changed_since,
+                show_relationships,
             )
         elif name == "search_module_usage":
             module_name = arguments.get("module_name")
             output_format = arguments.get("format", "markdown")
+            usage_type = arguments.get("usage_type", "all")
             if not module_name:
                 error_msg = "'module_name' is required"
                 return [TextContent(type="text", text=error_msg)]
-            return await self._search_module_usage(module_name, output_format)
+            if usage_type not in ("all", "test_only", "production_only"):
+                error_msg = "'usage_type' must be one of: 'all', 'test_only', 'production_only'"
+                return [TextContent(type="text", text=error_msg)]
+            return await self._search_module_usage(module_name, output_format, usage_type)
         elif name == "find_pr_for_line":
             file_path = arguments.get("file_path")
             line_number = arguments.get("line_number")
@@ -302,6 +389,10 @@ class CicadaServer:
             precise_tracking = arguments.get("precise_tracking", False)
             show_evolution = arguments.get("show_evolution", False)
             max_commits = arguments.get("max_commits", 10)
+            since_date = arguments.get("since_date")
+            until_date = arguments.get("until_date")
+            author = arguments.get("author")
+            min_changes = arguments.get("min_changes", 0)
             if not file_path:
                 error_msg = "'file_path' is required"
@@ -320,6 +411,10 @@ class CicadaServer:
                 precise_tracking,
                 show_evolution,
                 max_commits,
+                since_date,
+                until_date,
+                author,
+                min_changes,
             )
         elif name == "get_blame":
             file_path = arguments.get("file_path")
@@ -343,8 +438,12 @@ class CicadaServer:
                 return [TextContent(type="text", text=error_msg)]
             return await self._get_file_pr_history(file_path)
-        elif name == "search_by_keywords":
+        elif name == "search_by_features" or name == "search_by_keywords":
+            # Support both names for backward compatibility
+            # search_by_keywords is deprecated but still functional
             keywords = arguments.get("keywords")
+            filter_type = arguments.get("filter_type", "all")
+            min_score = arguments.get("min_score", 0.0)
             if not keywords:
                 error_msg = "'keywords' is required"
@@ -354,15 +453,76 @@ class CicadaServer:
                 error_msg = "'keywords' must be a list of strings"
                 return [TextContent(type="text", text=error_msg)]
-            return await self._search_by_keywords(keywords)
+            if filter_type not in ("all", "modules", "functions"):
+                error_msg = "'filter_type' must be one of: 'all', 'modules', 'functions'"
+                return [TextContent(type="text", text=error_msg)]
+            if not isinstance(min_score, (int, float)) or min_score < 0.0 or min_score > 1.0:
+                error_msg = "'min_score' must be a number between 0.0 and 1.0"
+                return [TextContent(type="text", text=error_msg)]
+            return await self._search_by_keywords(keywords, filter_type, min_score)
         elif name == "find_dead_code":
             min_confidence = arguments.get("min_confidence", "high")
             output_format = arguments.get("format", "markdown")
             return await self._find_dead_code(min_confidence, output_format)
+        elif name == "get_module_dependencies":
+            module_name = arguments.get("module_name")
+            if not module_name:
+                raise ValueError("module_name is required")
+            output_format = arguments.get("format", "markdown")
+            depth = arguments.get("depth", 1)
+            granular = arguments.get("granular", False)
+            return await self._get_module_dependencies(module_name, output_format, depth, granular)
+        elif name == "get_function_dependencies":
+            module_name = arguments.get("module_name")
+            function_name = arguments.get("function_name")
+            arity = arguments.get("arity")
+            if not module_name:
+                raise ValueError("module_name is required")
+            if not function_name:
+                raise ValueError("function_name is required")
+            if arity is None:
+                raise ValueError("arity is required")
+            output_format = arguments.get("format", "markdown")
+            include_context = arguments.get("include_context", False)
+            return await self._get_function_dependencies(
+                module_name, function_name, arity, output_format, include_context
+            )
         else:
             raise ValueError(f"Unknown tool: {name}")
+    def _lookup_module_with_error(
+        self, module_name: str, include_suggestions: bool = True
+    ) -> tuple[dict | None, str | None]:
+        """
+        Look up a module in the index with error handling.
+        Args:
+            module_name: Module name to look up
+            include_suggestions: Whether to include similar module suggestions in error
+        Returns:
+            Tuple of (module_data, error_message). If found, returns (data, None).
+            If not found, returns (None, error_message).
+        """
+        module_data = self.index["modules"].get(module_name)
+        if module_data:
+            return module_data, None
+        # Module not found - create error message
+        error_msg = f"Module not found: {module_name}"
+        if include_suggestions:
+            similar = find_similar_names(module_name, list(self.index["modules"].keys()))
+            if similar:
+                error_msg += "\n\nDid you mean one of these?\n" + "\n".join(
+                    f"  - {name}" for name in similar[:5]
+                )
+        return None, error_msg
     def _resolve_file_to_module(self, file_path: str) -> str | None:
         """Resolve a file path to a module name by searching the index."""
         # Normalize the file path (remove leading ./ and trailing whitespace)
@@ -394,27 +554,99 @@ class CicadaServer:
         output_format: str = "markdown",
         private_functions: str = "exclude",
     ) -> list[TextContent]:
-        """Search for a module and return its information."""
-        # Exact match lookup
+        """
+        Search for a module and return its information.
+        Supports wildcards (*) and OR patterns (|) for both module names and file paths.
+        Examples:
+            - "MyApp.*" - matches all modules starting with MyApp.
+            - "*User*" - matches all modules containing User
+            - "lib/my_app/*.ex" - matches all modules in that directory
+            - "MyApp.User|MyApp.Post" - matches either module
+            - "*User*|*Post*" - matches modules containing User OR Post
+        """
+        # Check for wildcard or OR patterns
+        if has_wildcards(module_name):
+            # Split by OR patterns
+            patterns = split_or_patterns(module_name)
+            # Find all matching modules
+            matching_modules = []
+            for mod_name, mod_data in self.index["modules"].items():
+                # Check if module name or file path matches any pattern
+                if match_any_pattern(patterns, mod_name) or match_any_pattern(
+                    patterns, mod_data["file"]
+                ):
+                    matching_modules.append((mod_name, mod_data))
+            # If no matches found, return error
+            if not matching_modules:
+                total_modules = self.index["metadata"]["total_modules"]
+                if output_format == "json":
+                    error_result = ModuleFormatter.format_error_json(module_name, total_modules)
+                else:
+                    error_result = ModuleFormatter.format_error_markdown(module_name, total_modules)
+                return [TextContent(type="text", text=error_result)]
+            # Format all matching modules
+            results: list[str] = []
+            for mod_name, mod_data in matching_modules:
+                if output_format == "json":
+                    result = ModuleFormatter.format_module_json(
+                        mod_name, mod_data, private_functions
+                    )
+                else:
+                    result = ModuleFormatter.format_module_markdown(
+                        mod_name, mod_data, private_functions
+                    )
+                results.append(result)
+            # Combine results with separator for markdown, or as array for JSON
+            if output_format == "json":
+                # For JSON, wrap in array notation
+                combined = "[\n" + ",\n".join(results) + "\n]"
+            else:
+                # For markdown, separate with horizontal rules
+                header = (
+                    f"Found {len(matching_modules)} module(s) matching pattern '{module_name}':\n\n"
+                )
+                combined = header + "\n\n---\n\n".join(results)
+            return [TextContent(type="text", text=combined)]
+        # Exact match lookup (no wildcards)
         if module_name in self.index["modules"]:
             data = self.index["modules"][module_name]
+            # Get PR context for the file
+            pr_info = self._get_recent_pr_info(data["file"])
+            # Check index staleness
+            staleness_info = self._check_index_staleness()
             if output_format == "json":
                 result = ModuleFormatter.format_module_json(module_name, data, private_functions)
             else:
                 result = ModuleFormatter.format_module_markdown(
-                    module_name, data, private_functions
+                    module_name, data, private_functions, pr_info, staleness_info
                 )
             return [TextContent(type="text", text=result)]
-        # Module not found
+        # Module not found - compute suggestions and provide helpful error message
         total_modules = self.index["metadata"]["total_modules"]
         if output_format == "json":
             error_result = ModuleFormatter.format_error_json(module_name, total_modules)
         else:
-            error_result = ModuleFormatter.format_error_markdown(module_name, total_modules)
+            # Compute fuzzy match suggestions
+            available_modules = list(self.index["modules"].keys())
+            similar_matches = find_similar_names(module_name, available_modules, max_suggestions=3)
+            suggestions = [name for name, _score in similar_matches]
+            error_result = ModuleFormatter.format_error_markdown(
+                module_name, total_modules, suggestions
+            )
         return [TextContent(type="text", text=error_result)]
@@ -425,46 +657,60 @@ class CicadaServer:
         include_usage_examples: bool = False,
         max_examples: int = 5,
         test_files_only: bool = False,
+        changed_since: str | None = None,
+        show_relationships: bool = True,
     ) -> list[TextContent]:
-        """Search for a function across all modules and return matches with call sites."""
-        # Parse the function name - supports multiple formats:
-        # - "func_name" or "func_name/arity" (search all modules)
-        # - "Module.func_name" or "Module.func_name/arity" (search specific module)
-        target_module = None
-        target_name = function_name
-        target_arity = None
-        # Check for Module.function format
-        if "." in function_name:
-            # Split on last dot to separate module from function
-            parts = function_name.rsplit(".", 1)
-            if len(parts) == 2:
-                target_module = parts[0]
-                target_name = parts[1]
-        # Check for arity
-        if "/" in target_name:
-            parts = target_name.split("/")
-            target_name = parts[0]
-            with contextlib.suppress(ValueError, IndexError):
-                target_arity = int(parts[1])
+        """
+        Search for a function across all modules and return matches with call sites.
+        Supports wildcards (*) and OR patterns (|) for function names, module names, and file paths.
+        Examples:
+            - "create*" - matches all functions starting with create
+            - "*user*" - matches all functions containing user
+            - "MyApp.User.create*" - matches create* functions in MyApp.User module
+            - "create*|update*" - matches functions starting with create OR update
+            - "MyApp.*.create/1" - matches create/1 in any module under MyApp
+            - "lib/*/user.ex:create*" - matches create* functions in files matching path pattern
+        """
+        # Support OR syntax by splitting first, then parsing each component individually
+        parsed_patterns: list[FunctionPattern] = parse_function_patterns(function_name)
         # Search across all modules for function definitions
         results = []
-        for module_name, module_data in self.index["modules"].items():
-            # If target_module is specified, only search in that module
-            if target_module and module_name != target_module:
-                continue
+        seen_functions: set[tuple[str, str, int]] = set()
+        # Parse changed_since filter if provided
+        cutoff_date = None
+        if changed_since:
+            cutoff_date = self._parse_changed_since(changed_since)
+        for module_name, module_data in self.index["modules"].items():
             for func in module_data["functions"]:
-                # Match by name and optionally arity
-                if func["name"] == target_name and (
-                    target_arity is None or func["arity"] == target_arity
+                if any(
+                    pattern.matches(module_name, module_data["file"], func)
+                    for pattern in parsed_patterns
                 ):
+                    # Filter by changed_since if provided
+                    if cutoff_date:
+                        func_modified = func.get("last_modified_at")
+                        if not func_modified:
+                            continue  # Skip functions without timestamp
+                        func_modified_dt = datetime.fromisoformat(func_modified)
+                        # Ensure timezone-aware for comparison
+                        if func_modified_dt.tzinfo is None:
+                            func_modified_dt = func_modified_dt.replace(tzinfo=timezone.utc)
+                        if func_modified_dt < cutoff_date:
+                            continue  # Function too old, skip
+                    key = (module_name, func["name"], func["arity"])
+                    if key in seen_functions:
+                        continue
+                    seen_functions.add(key)
                     # Find call sites for this function
                     call_sites = self._find_call_sites(
                         target_module=module_name,
-                        target_function=target_name,
+                        target_function=func["name"],
                         target_arity=func["arity"],
                     )
@@ -482,6 +728,14 @@ class CicadaServer:
                         # Extract code lines for each call site
                         self._add_code_examples(call_sites_with_examples)
+                    # Get PR context for this function
+                    pr_info = self._get_recent_pr_info(module_data["file"])
+                    # Get function dependencies if show_relationships is enabled
+                    dependencies = []
+                    if show_relationships:
+                        dependencies = func.get("dependencies", [])
                     results.append(
                         {
                             "module": module_name,
@@ -490,19 +744,26 @@ class CicadaServer:
                             "file": module_data["file"],
                             "call_sites": call_sites,
                             "call_sites_with_examples": call_sites_with_examples,
+                            "pr_info": pr_info,
+                            "dependencies": dependencies,
                         }
                     )
+        # Check index staleness
+        staleness_info = self._check_index_staleness()
         # Format results
         if output_format == "json":
             result = ModuleFormatter.format_function_results_json(function_name, results)
         else:
-            result = ModuleFormatter.format_function_results_markdown(function_name, results)
+            result = ModuleFormatter.format_function_results_markdown(
+                function_name, results, staleness_info, show_relationships
+            )
         return [TextContent(type="text", text=result)]
     async def _search_module_usage(
-        self, module_name: str, output_format: str = "markdown"
+        self, module_name: str, output_format: str = "markdown", usage_type: str = "all"
     ) -> list[TextContent]:
         """
         Search for all locations where a module is used (aliased/imported and called).
@@ -510,6 +771,7 @@ class CicadaServer:
         Args:
             module_name: The module to search for (e.g., "MyApp.User")
             output_format: Output format ('markdown' or 'json')
+            usage_type: Filter by file type ('all', 'test_only', 'production_only')
         Returns:
             TextContent with usage information
@@ -624,6 +886,21 @@ class CicadaServer:
                     }
                 )
+        # Apply usage type filter if not 'all'
+        if usage_type != "all":
+            from cicada.mcp.filter_utils import filter_by_file_type
+            # Filter each category that has file information
+            for category in [
+                "aliases",
+                "imports",
+                "requires",
+                "uses",
+                "value_mentions",
+                "function_calls",
+            ]:
+                usage_results[category] = filter_by_file_type(usage_results[category], usage_type)
         # Format results
         if output_format == "json":
             result = ModuleFormatter.format_module_usage_json(module_name, usage_results)
@@ -801,6 +1078,131 @@ class CicadaServer:
         return call_sites
+    def _parse_changed_since(self, changed_since: str) -> datetime:
+        """
+        Parse changed_since parameter into datetime.
+        Supports:
+        - ISO dates: '2024-01-15'
+        - Relative: '7d', '2w', '3m', '1y'
+        - Git refs: 'HEAD~10', 'v1.0.0' (if git_helper available)
+        Returns:
+            datetime object (timezone-aware) representing the cutoff date
+        Raises:
+            ValueError: If format is invalid or amount is negative/zero
+        """
+        # ISO date format (YYYY-MM-DD)
+        if "-" in changed_since and len(changed_since) >= 10:
+            try:
+                dt = datetime.fromisoformat(changed_since)
+                # Ensure timezone-aware - if naive, assume UTC
+                if dt.tzinfo is None:
+                    dt = dt.replace(tzinfo=timezone.utc)
+                return dt
+            except ValueError:
+                pass
+        # Relative format (7d, 2w, 3m, 1y)
+        if len(changed_since) >= 2 and changed_since[-1] in "dwmy":
+            try:
+                amount = int(changed_since[:-1])
+                unit = changed_since[-1]
+                # Validate positive amount
+                if amount <= 0:
+                    raise ValueError(f"Time amount must be positive, got: {amount}{unit}")
+                now = datetime.now(timezone.utc)
+                if unit == "d":
+                    return now - timedelta(days=amount)
+                elif unit == "w":
+                    return now - timedelta(weeks=amount)
+                elif unit == "m":
+                    return now - timedelta(days=amount * 30)
+                elif unit == "y":
+                    return now - timedelta(days=amount * 365)
+            except ValueError as e:
+                # Re-raise if it's our validation error
+                if "Time amount must be positive" in str(e):
+                    raise
+                # Otherwise, try next format (likely invalid int parsing)
+        # Git ref format (requires git_helper)
+        if self.git_helper:
+            try:
+                # Validate git ref format to prevent command injection
+                # Refs should not start with - or -- (could be flags)
+                if changed_since.startswith("-"):
+                    raise ValueError(f"Invalid git ref format (starts with '-'): {changed_since}")
+                # Get timestamp of the ref using git show
+                repo_path = self.git_helper.repo_path
+                result = subprocess.run(
+                    ["git", "show", "-s", "--format=%ai", changed_since],
+                    cwd=repo_path,
+                    capture_output=True,
+                    text=True,
+                    check=True,
+                )
+                dt = datetime.fromisoformat(result.stdout.strip())
+                # Git returns timezone-aware datetime, ensure it has tzinfo
+                if dt.tzinfo is None:
+                    dt = dt.replace(tzinfo=timezone.utc)
+                return dt
+            except subprocess.CalledProcessError:
+                # Git command failed - invalid ref or other git error
+                pass
+            except ValueError:
+                # Re-raise validation errors
+                raise
+            except Exception:
+                # Other errors (e.g., datetime parsing) - try next format
+                pass
+        raise ValueError(f"Invalid changed_since format: {changed_since}")
+    def _get_recent_pr_info(self, file_path: str) -> dict | None:
+        """
+        Get the most recent PR that modified a file.
+        Args:
+            file_path: Relative path to the file
+        Returns:
+            Dictionary with PR info (number, title, date, comment_count) or None
+        """
+        if not self.pr_index:
+            return None
+        # Look up PRs for this file
+        file_to_prs = self.pr_index.get("file_to_prs", {})
+        pr_numbers = file_to_prs.get(file_path, [])
+        if not pr_numbers:
+            return None
+        # Get the most recent PR (last in list)
+        prs_data = self.pr_index.get("prs", {})
+        most_recent_pr_num = pr_numbers[-1]
+        pr = prs_data.get(str(most_recent_pr_num))
+        if not pr:
+            return None
+        # Count comments for this file
+        comments = pr.get("comments", [])
+        file_comments = [c for c in comments if c.get("path") == file_path]
+        return {
+            "number": pr["number"],
+            "title": pr["title"],
+            "author": pr.get("author", "unknown"),
+            "comment_count": len(file_comments),
+            "url": pr.get("url", ""),
+        }
     def _find_function_at_line(self, module_name: str, line: int) -> dict | None:
         """
         Find the function that contains a specific line number.
@@ -951,6 +1353,10 @@ class CicadaServer:
         _precise_tracking: bool = False,
         show_evolution: bool = False,
         max_commits: int = 10,
+        since_date: str | None = None,
+        until_date: str | None = None,
+        author: str | None = None,
+        min_changes: int = 0,
     ) -> list[TextContent]:
         """
         Get git commit history for a file or function.
@@ -963,6 +1369,10 @@ class CicadaServer:
             precise_tracking: Deprecated (function tracking is always used when function_name provided)
             show_evolution: Include function evolution metadata
             max_commits: Maximum number of commits to return
+            since_date: Only include commits after this date (ISO format or relative like '7d', '2w')
+            until_date: Only include commits before this date (ISO format or relative)
+            author: Filter by author name (substring match)
+            min_changes: Minimum number of lines changed
         Returns:
             TextContent with formatted commit history
@@ -972,11 +1382,27 @@ class CicadaServer:
             - Function tracking works even as the function moves in the file
             - Line numbers are used as fallback if function tracking fails
             - Requires .gitattributes with "*.ex diff=elixir" for function tracking
+            - Date filters only work with file-level history (not function/line tracking)
         """
         if not self.git_helper:
             error_msg = "Git history is not available (repository may not be a git repo)"
             return [TextContent(type="text", text=error_msg)]
+        # Parse date filters if provided
+        since_datetime = None
+        until_datetime = None
+        if since_date:
+            since_datetime = self._parse_changed_since(since_date)
+        if until_date:
+            until_datetime = self._parse_changed_since(until_date)
+        # Check if any filters are being used (only supported for file-level history)
+        has_filters = since_date or until_date or author or min_changes > 0
+        if has_filters and (function_name or (start_line and end_line)):
+            warning_msg = "⚠️  Date/author/min_changes filters only work with file-level history (without function_name or line range)\n\n"
+        else:
+            warning_msg = ""
         try:
             evolution = None
             tracking_method = "file"
@@ -1021,7 +1447,17 @@ class CicadaServer:
                     )
             else:
                 # File-level history
-                commits = self.git_helper.get_file_history(file_path, max_commits)
+                if has_filters:
+                    commits = self.git_helper.get_file_history_filtered(
+                        file_path,
+                        max_commits=max_commits,
+                        since_date=since_datetime,
+                        until_date=until_datetime,
+                        author=author,
+                        min_changes=min_changes,
+                    )
+                else:
+                    commits = self.git_helper.get_file_history(file_path, max_commits)
                 title = f"Git History for {file_path}"
             if not commits:
@@ -1031,6 +1467,23 @@ class CicadaServer:
             # Format the results as markdown
             lines = [f"# {title}\n"]
+            # Add warning if filters were specified but not used
+            if warning_msg:
+                lines.append(warning_msg)
+            # Add filter information if filters were used
+            if has_filters and not (function_name or (start_line and end_line)):
+                filter_parts = []
+                if since_date:
+                    filter_parts.append(f"since {since_date}")
+                if until_date:
+                    filter_parts.append(f"until {until_date}")
+                if author:
+                    filter_parts.append(f"author: {author}")
+                if min_changes > 0:
+                    filter_parts.append(f"min changes: {min_changes}")
+                lines.append(f"*Filters: {', '.join(filter_parts)}*\n")
             # Add tracking method info
             if tracking_method == "function":
                 lines.append(
@@ -1256,40 +1709,56 @@ class CicadaServer:
         result = "\n".join(lines)
         return [TextContent(type="text", text=result)]
-    async def _search_by_keywords(self, keywords: list[str]) -> list[TextContent]:
+    async def _search_by_keywords(
+        self, keywords: list[str], filter_type: str = "all", min_score: float = 0.0
+    ) -> list[TextContent]:
         """
         Search for modules and functions by keywords.
         Args:
             keywords: List of keywords to search for
+            filter_type: Filter results by type ('all', 'modules', 'functions'). Defaults to 'all'.
+            min_score: Minimum relevance score threshold (0.0 to 1.0). Defaults to 0.0.
         Returns:
             TextContent with formatted search results
         """
         from cicada.keyword_search import KeywordSearcher
+        from cicada.mcp.filter_utils import filter_by_score_threshold
         # Check if keywords are available (cached at initialization)
         if not self._has_keywords:
             error_msg = (
                 "No keywords found in index. Please rebuild the index with keyword extraction:\n\n"
-                "  cicada index --nlp   # NLP-based extraction (lemminflect)\n"
-                "  cicada index --rag   # BERT-based extraction\n\n"
+                "  cicada index           # Default: reuse configured tier\n"
+                "  cicada index --force --regular   # BERT + GloVe (regular tier)\n"
+                "  cicada index --force --fast      # Fast: Token-based + lemminflect\n"
+                "  cicada index --force --max       # Max: BERT + FastText\n\n"
                 "This will extract keywords from documentation for semantic search."
             )
             return [TextContent(type="text", text=error_msg)]
         # Perform the search
         searcher = KeywordSearcher(self.index)
-        results = searcher.search(keywords, top_n=5)
+        results = searcher.search(keywords, top_n=20, filter_type=filter_type)
+        # Apply score threshold filter
+        if min_score > 0.0:
+            results = filter_by_score_threshold(results, min_score)
         if not results:
-            result = f"No results found for keywords: {', '.join(keywords)}"
+            if min_score > 0.0:
+                result = f"No results found for keywords: {', '.join(keywords)} with min_score >= {min_score}"
+            else:
+                result = f"No results found for keywords: {', '.join(keywords)}"
             return [TextContent(type="text", text=result)]
         # Format results
-        from cicada.formatter import ModuleFormatter
+        from cicada.format import ModuleFormatter
-        formatted_result = ModuleFormatter.format_keyword_search_results_markdown(keywords, results)
+        formatted_result = ModuleFormatter.format_keyword_search_results_markdown(
+            keywords, results, show_scores=True
+        )
         return [TextContent(type="text", text=formatted_result)]
@@ -1304,8 +1773,8 @@ class CicadaServer:
         Returns:
             TextContent with formatted dead code analysis
         """
-        from cicada.dead_code_analyzer import DeadCodeAnalyzer
-        from cicada.find_dead_code import (
+        from cicada.dead_code.analyzer import DeadCodeAnalyzer
+        from cicada.dead_code.finder import (
             filter_by_confidence,
             format_json,
             format_markdown,
@@ -1323,6 +1792,276 @@ class CicadaServer:
         return [TextContent(type="text", text=output)]
+    async def _get_module_dependencies(
+        self, module_name: str, output_format: str, depth: int, granular: bool = False
+    ) -> list[TextContent]:
+        """
+        Get all modules that a given module depends on.
+        Args:
+            module_name: Module name to analyze
+            output_format: Output format ('markdown' or 'json')
+            depth: Depth for transitive dependencies (1 = direct only, 2 = include dependencies of dependencies)
+            granular: Show which specific functions use which dependencies
+        Returns:
+            TextContent with formatted dependency information
+        """
+        import json
+        # Look up the module in the index
+        module_data, error_msg = self._lookup_module_with_error(module_name)
+        if error_msg:
+            return [TextContent(type="text", text=error_msg)]
+        # module_data is guaranteed to be non-None here
+        assert module_data is not None
+        # Get dependencies from the index
+        dependencies = module_data.get("dependencies", {})
+        direct_modules = dependencies.get("modules", [])
+        # Collect granular dependency information if requested
+        granular_info: dict[str, list[dict[str, Any]]] = {}
+        if granular:
+            # Build a mapping of dependency_module -> [functions that use it]
+            for func in module_data.get("functions", []):
+                func_deps = func.get("dependencies", [])
+                for dep in func_deps:
+                    dep_module = dep.get("module", "")
+                    if dep_module in direct_modules:
+                        if dep_module not in granular_info:
+                            granular_info[dep_module] = []
+                        granular_info[dep_module].append(
+                            {
+                                "function": func.get("name"),
+                                "arity": func.get("arity"),
+                                "line": func.get("line"),
+                                "calls": f"{dep.get('function')}/{dep.get('arity')}",
+                                "call_line": dep.get("line"),
+                            }
+                        )
+        # If depth > 1, collect transitive dependencies
+        all_modules = set(direct_modules)
+        if depth > 1:
+            visited = {module_name}  # Avoid circular dependencies
+            to_visit = list(direct_modules)
+            for _ in range(depth - 1):
+                next_level = []
+                for dep_module in to_visit:
+                    if dep_module in visited:
+                        continue
+                    visited.add(dep_module)
+                    dep_data = self.index["modules"].get(dep_module)
+                    if dep_data:
+                        dep_dependencies = dep_data.get("dependencies", {})
+                        dep_modules = dep_dependencies.get("modules", [])
+                        all_modules.update(dep_modules)
+                        next_level.extend(dep_modules)
+                to_visit = next_level
+        # Format output
+        if output_format == "json":
+            result = {
+                "module": module_name,
+                "dependencies": {
+                    "direct": sorted(direct_modules),
+                    "all": sorted(all_modules) if depth > 1 else sorted(direct_modules),
+                    "depth": depth,
+                },
+            }
+            if granular:
+                result["granular"] = granular_info  # type: ignore
+            output = json.dumps(result, indent=2)
+        else:
+            # Markdown format
+            lines = [f"# Dependencies for {module_name}\n"]
+            if direct_modules:
+                lines.append(f"## Direct Dependencies ({len(direct_modules)})\n")
+                for dep in sorted(direct_modules):
+                    lines.append(f"- {dep}")
+                    # Add granular information if available
+                    if granular and dep in granular_info:
+                        uses = granular_info[dep]
+                        lines.append(f"  Used by {len(uses)} function(s):")
+                        for use in uses[:3]:  # Limit to 3 examples
+                            lines.append(
+                                f"    • {use['function']}/{use['arity']} (line {use['line']}) → calls {use['calls']} (line {use['call_line']})"
+                            )
+                        if len(uses) > 3:
+                            lines.append(f"    ... and {len(uses) - 3} more")
+                lines.append("")
+            if depth > 1 and len(all_modules) > len(direct_modules):
+                transitive = sorted(all_modules - set(direct_modules))
+                lines.append(f"## Transitive Dependencies ({len(transitive)})\n")
+                for dep in transitive:
+                    lines.append(f"- {dep}")
+                lines.append("")
+            if not direct_modules:
+                lines.append("*No dependencies found*")
+            output = "\n".join(lines)
+        return [TextContent(type="text", text=output)]
+    def _format_dependency_with_context(
+        self,
+        dep: dict,
+        context_lines: dict,
+        include_context: bool,
+        include_module: bool = False,
+    ) -> list[str]:
+        """
+        Format a single dependency with optional code context.
+        Args:
+            dep: Dependency dict with module, function, arity, line
+            context_lines: Dict mapping line numbers to code context
+            include_context: Whether to include code context
+            include_module: Whether to include module name in output
+        Returns:
+            List of formatted lines
+        """
+        lines = []
+        line_info = f"(line {dep['line']})"
+        if include_module:
+            lines.append(f"- {dep['module']}.{dep['function']}/{dep['arity']} {line_info}")
+        else:
+            lines.append(f"- {dep['function']}/{dep['arity']} {line_info}")
+        if include_context and dep["line"] in context_lines:
+            lines.append("  ```elixir")
+            lines.append(f"  {context_lines[dep['line']]}")
+            lines.append("  ```")
+        return lines
+    async def _get_function_dependencies(
+        self,
+        module_name: str,
+        function_name: str,
+        arity: int,
+        output_format: str,
+        include_context: bool,
+    ) -> list[TextContent]:
+        """
+        Get all functions that a given function calls.
+        Args:
+            module_name: Module name containing the function
+            function_name: Function name to analyze
+            arity: Function arity
+            output_format: Output format ('markdown' or 'json')
+            include_context: Whether to include code context
+        Returns:
+            TextContent with formatted dependency information
+        """
+        import json
+        # Look up the module in the index (no suggestions for function lookup)
+        module_data, error_msg = self._lookup_module_with_error(
+            module_name, include_suggestions=False
+        )
+        if error_msg:
+            return [TextContent(type="text", text=error_msg)]
+        # module_data is guaranteed to be non-None here
+        assert module_data is not None
+        # Find the function
+        functions = module_data.get("functions", [])
+        target_func = None
+        for func in functions:
+            if func["name"] == function_name and func["arity"] == arity:
+                target_func = func
+                break
+        if not target_func:
+            error_msg = (
+                f"Function not found: {module_name}.{function_name}/{arity}\n\n"
+                f"Available functions in {module_name}:\n"
+            )
+            available = [f"  - {f['name']}/{f['arity']}" for f in functions[:10]]
+            error_msg += "\n".join(available)
+            return [TextContent(type="text", text=error_msg)]
+        # Get function dependencies
+        dependencies = target_func.get("dependencies", [])
+        # If include_context is True, fetch the source code
+        context_lines = {}
+        if include_context and dependencies:
+            # Read the source file
+            repo_path = self.config.get("repository", {}).get("path", ".")
+            file_path = Path(repo_path) / module_data["file"]
+            try:
+                with open(file_path) as f:
+                    source_lines = f.readlines()
+                    # Get context for each dependency call
+                    for dep in dependencies:
+                        line_num = dep["line"]
+                        if 1 <= line_num <= len(source_lines):
+                            # Get 3 lines of context (before, current, after)
+                            start = max(0, line_num - 2)
+                            end = min(len(source_lines), line_num + 1)
+                            context = "".join(source_lines[start:end])
+                            context_lines[line_num] = context.rstrip()
+            except OSError:
+                pass  # If we can't read the file, just skip context
+        # Format output
+        if output_format == "json":
+            result = {
+                "module": module_name,
+                "function": f"{function_name}/{arity}",
+                "dependencies": dependencies,
+            }
+            output = json.dumps(result, indent=2)
+        else:
+            # Markdown format
+            lines = [f"# Dependencies for {module_name}.{function_name}/{arity}\n"]
+            if dependencies:
+                # Group by internal vs external
+                internal = [d for d in dependencies if d["module"] == module_name]
+                external = [d for d in dependencies if d["module"] != module_name]
+                if internal:
+                    lines.append(f"## Internal Calls ({len(internal)})\n")
+                    for dep in internal:
+                        lines.extend(
+                            self._format_dependency_with_context(
+                                dep, context_lines, include_context, include_module=False
+                            )
+                        )
+                    lines.append("")
+                if external:
+                    lines.append(f"## External Calls ({len(external)})\n")
+                    for dep in external:
+                        lines.extend(
+                            self._format_dependency_with_context(
+                                dep, context_lines, include_context, include_module=True
+                            )
+                        )
+                    lines.append("")
+            else:
+                lines.append("*No dependencies found*")
+            output = "\n".join(lines)
+        return [TextContent(type="text", text=output)]
     async def run(self):
         """Run the MCP server."""
         async with stdio_server() as (read_stream, write_stream):
@@ -1365,9 +2104,12 @@ def _auto_setup_if_needed():
     )
     # Determine repository path from environment or current directory
-    repo_path_str = os.environ.get("CICADA_REPO_PATH")
+    repo_path_str = None
+    # First check if repo path was provided via positional argument (internal env var)
+    repo_path_str = os.environ.get("_CICADA_REPO_PATH_ARG")
-    # Check if WORKSPACE_FOLDER_PATHS is available (Cursor-specific)
+    # Fall back to WORKSPACE_FOLDER_PATHS (Cursor-specific)
     if not repo_path_str:
         workspace_paths = os.environ.get("WORKSPACE_FOLDER_PATHS")
         if workspace_paths:
@@ -1420,15 +2162,19 @@ def main():
     import sys
     # Accept optional positional argument for repo path
-    # Usage: cicada-server [repo_path]
     if len(sys.argv) > 1:
         repo_path = sys.argv[1]
         # Convert to absolute path
         from pathlib import Path
+        from cicada.utils.storage import get_storage_dir
         abs_path = Path(repo_path).resolve()
-        # Set environment variable to override default
-        os.environ["CICADA_REPO_PATH"] = str(abs_path)
+        # Set environment variables for both storage directory and repo path
+        # The repo path is needed by _auto_setup_if_needed() for first-time setup
+        storage_dir = get_storage_dir(abs_path)
+        os.environ["CICADA_CONFIG_DIR"] = str(storage_dir)
+        os.environ["_CICADA_REPO_PATH_ARG"] = str(abs_path)
     asyncio.run(async_main())

cicada-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

cicada-mcp 0.2.0py3-none-any.whl → 0.3.0py3-none-any.whl