PyPI - cicada-mcp - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl - Mend

cicada-mcp 0.1.4py3-none-any.whl → 0.1.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of cicada-mcp might be problematic. Click here for more details.

Files changed (11) hide show

cicada/indexer.py +60 -42
cicada/install.py +22 -5
cicada/mcp_server.py +7 -23
cicada/setup.py +23 -9
{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/METADATA +30 -3
{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/RECORD +10 -11
{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/entry_points.txt +1 -0
cicada/keyword_extractor.py +0 -364
{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/WHEEL +0 -0
{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/licenses/LICENSE +0 -0
{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/top_level.txt +0 -0

cicada/indexer.py CHANGED Viewed

@@ -102,7 +102,8 @@ class ElixirIndexer:
         if not repo_path_obj.exists():
             raise ValueError(f"Repository path does not exist: {repo_path_obj}")
-        print(f"Indexing repository: {repo_path_obj}")
+        if self.verbose:
+            print(f"Indexing repository: {repo_path_obj}")
         # Set up signal handlers for graceful interruption
         signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -118,20 +119,22 @@ class ElixirIndexer:
                 )
                 keyword_extractor = LightweightKeywordExtractor(
-                    verbose=True, model_size=spacy_model
+                    verbose=self.verbose, model_size=spacy_model
                 )
             except Exception as e:
-                print(f"Warning: Could not initialize keyword extractor: {e}")
-                print("Continuing without keyword extraction...")
+                if self.verbose:
+                    print(f"Warning: Could not initialize keyword extractor: {e}")
+                    print("Continuing without keyword extraction...")
                 extract_keywords = False
         # Find all Elixir files
         elixir_files = self._find_elixir_files(repo_path_obj)
         total_files = len(elixir_files)
-        print(f"Found {total_files} Elixir files")
-        if extract_keywords:
-            print("Keyword extraction enabled")
+        if self.verbose:
+            print(f"Found {total_files} Elixir files")
+            if extract_keywords:
+                print("Keyword extraction enabled")
         # Parse all files
         all_modules = {}
@@ -222,7 +225,10 @@ class ElixirIndexer:
                 files_processed += 1
                 # Progress reporting
-                if files_processed % self.PROGRESS_REPORT_INTERVAL == 0:
+                if (
+                    self.verbose
+                    and files_processed % self.PROGRESS_REPORT_INTERVAL == 0
+                ):
                     print(f"  Processed {files_processed}/{total_files} files...")
                 # Check for interruption after each file
@@ -230,7 +236,8 @@ class ElixirIndexer:
                     break
             except Exception as e:
-                print(f"  Skipping {file_path}: {e}")
+                if self.verbose:
+                    print(f"  Skipping {file_path}: {e}")
                 # Check for interruption even after error
                 if self._check_and_report_interruption(files_processed, total_files):
                     break
@@ -258,12 +265,14 @@ class ElixirIndexer:
             from cicada.utils.path_utils import ensure_gitignore_has_cicada
             if ensure_gitignore_has_cicada(repo_path_obj):
-                print("✓ Added .cicada/ to .gitignore")
+                if self.verbose:
+                    print("✓ Added .cicada/ to .gitignore")
         save_index(index, output_path_obj, create_dirs=True)
         # Compute and save hashes for all PROCESSED files for future incremental updates
-        print("Computing file hashes for incremental updates...")
+        if self.verbose:
+            print("Computing file hashes for incremental updates...")
         # Only hash files that were actually processed
         processed_files = [
             str(f.relative_to(repo_path_obj)) for f in elixir_files[:files_processed]
@@ -272,30 +281,31 @@ class ElixirIndexer:
         save_file_hashes(str(output_path_obj.parent), file_hashes)
         # Report completion status
-        if self._interrupted:
-            print(f"\n✓ Partial index saved!")
-            print(
-                f"  Processed: {files_processed}/{total_files} files ({files_processed/total_files*100:.1f}%)"
-            )
-            print(f"  Modules: {len(all_modules)}")
-            print(f"  Functions: {total_functions}")
-            print(
-                f"\n💡 Run the command again to continue indexing remaining {total_files - files_processed} file(s)"
-            )
-        else:
-            print(f"\nIndexing complete!")
-            print(f"  Modules: {len(all_modules)}")
-            print(f"  Functions: {total_functions}")
+        if self.verbose:
+            if self._interrupted:
+                print(f"\n✓ Partial index saved!")
+                print(
+                    f"  Processed: {files_processed}/{total_files} files ({files_processed/total_files*100:.1f}%)"
+                )
+                print(f"  Modules: {len(all_modules)}")
+                print(f"  Functions: {total_functions}")
+                print(
+                    f"\n💡 Run the command again to continue indexing remaining {total_files - files_processed} file(s)"
+                )
+            else:
+                print(f"\nIndexing complete!")
+                print(f"  Modules: {len(all_modules)}")
+                print(f"  Functions: {total_functions}")
-        # Report keyword extraction failures if any
-        if extract_keywords and keyword_extraction_failures > 0:
-            print(
-                f"\n⚠️  Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
-            )
-            print("   Some documentation may not be indexed for keyword search.")
+            # Report keyword extraction failures if any
+            if extract_keywords and keyword_extraction_failures > 0:
+                print(
+                    f"\n⚠️  Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
+                )
+                print("   Some documentation may not be indexed for keyword search.")
-        print(f"\nIndex saved to: {output_path_obj}")
-        print(f"Hashes saved to: {output_path_obj.parent}/hashes.json")
+            print(f"\nIndex saved to: {output_path_obj}")
+            print(f"Hashes saved to: {output_path_obj.parent}/hashes.json")
         return index
@@ -351,7 +361,8 @@ class ElixirIndexer:
                 str(repo_path_obj), str(output_path_obj), extract_keywords, spacy_model
             )
-        print(f"Performing incremental index of: {repo_path_obj}")
+        if self.verbose:
+            print(f"Performing incremental index of: {repo_path_obj}")
         # Set up signal handlers for graceful interruption
         signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -364,7 +375,8 @@ class ElixirIndexer:
         relative_files = [str(f.relative_to(repo_path_obj)) for f in elixir_files]
         # Detect file changes
-        print("Detecting file changes...")
+        if self.verbose:
+            print("Detecting file changes...")
         new_files, modified_files, deleted_files = detect_file_changes(
             relative_files, existing_hashes, str(repo_path_obj)
         )
@@ -377,10 +389,14 @@ class ElixirIndexer:
             print("No changes detected. Index is up to date.")
             return existing_index
-        print(f"Changes detected:")
-        print(f"  New files: {len(new_files)}")
-        print(f"  Modified files: {len(modified_files)}")
-        print(f"  Deleted files: {len(deleted_files)}")
+        if self.verbose:
+            print(f"Changes detected:")
+        if self.verbose:
+            print(f"  New files: {len(new_files)}")
+        if self.verbose:
+            print(f"  Modified files: {len(modified_files)}")
+        if self.verbose:
+            print(f"  Deleted files: {len(deleted_files)}")
         if files_to_process:
             print(f"\nProcessing {len(files_to_process)} changed file(s)...")
@@ -394,7 +410,7 @@ class ElixirIndexer:
                 )
                 keyword_extractor = LightweightKeywordExtractor(
-                    verbose=True, model_size=spacy_model
+                    verbose=self.verbose, model_size=spacy_model
                 )
             except Exception as e:
                 print(f"Warning: Could not initialize keyword extractor: {e}")
@@ -502,13 +518,15 @@ class ElixirIndexer:
         }
         # Merge with existing index
-        print("\nMerging with existing index...")
+        if self.verbose:
+            print("\nMerging with existing index...")
         merged_index = merge_indexes_incremental(
             existing_index, new_index, deleted_files
         )
         # Update hashes for all current files
-        print("Updating file hashes...")
+        if self.verbose:
+            print("Updating file hashes...")
         updated_hashes = dict(existing_hashes)
         # Compute hashes only for files that were actually processed

cicada/install.py CHANGED Viewed

@@ -251,7 +251,15 @@ def detect_installation_method():
         ".local/share/uv/tools" in script_path_str
         or ".local/bin/cicada-" in script_path_str
     ):
-        # Installed via uv tool install
+        # Installed via uv tool install - check for cicada-mcp first
+        if shutil.which("cicada-mcp"):
+            return (
+                "cicada-mcp",
+                [],
+                None,
+                "uv tool install (ensure ~/.local/bin is in PATH)",
+            )
+        # Fall back to cicada-server for backwards compatibility
         return (
             "cicada-server",
             [],
@@ -259,7 +267,11 @@ def detect_installation_method():
             "uv tool install (ensure ~/.local/bin is in PATH)",
         )
-    # Check if cicada-server is in PATH (from uv tool install)
+    # Check if cicada-mcp is in PATH first (from uv tool install)
+    if shutil.which("cicada-mcp"):
+        return ("cicada-mcp", [], None, "uv tool install (permanent, fast)")
+    # Fall back to cicada-server for backwards compatibility
     if shutil.which("cicada-server"):
         return ("cicada-server", [], None, "uv tool install (permanent, fast)")
@@ -279,8 +291,13 @@ def check_tools_in_path():
     """Check if cicada tools are in PATH."""
     import shutil
-    tools = ["cicada-server", "cicada-index"]
+    # Check for cicada-mcp (new) or cicada-server (backwards compat)
+    has_mcp_server = shutil.which("cicada-mcp") or shutil.which("cicada-server")
+    tools = ["cicada-index"]
     visible_tools = [tool for tool in tools if shutil.which(tool)]
+    if has_mcp_server:
+        visible_tools.insert(0, "cicada-mcp/cicada-server")
+        tools.insert(0, "cicada-mcp/cicada-server")
     if len(visible_tools) == len(tools):
         return "all_visible"
@@ -351,8 +368,8 @@ def create_mcp_config(repo_path, _cicada_dir, _python_bin):
     print(f"✓ MCP configuration updated at {mcp_config_path}")
     # Show what was configured
-    if command == "cicada-server":
-        print("✅ Using 'cicada-server' command (fast, no paths needed)")
+    if command in ("cicada-mcp", "cicada-server"):
+        print(f"✅ Using '{command}' command (fast, no paths needed)")
     else:
         print(f"ℹ️  Using Python: {command}")

cicada/mcp_server.py CHANGED Viewed

@@ -1496,43 +1496,27 @@ def _auto_setup_if_needed():
         # Already set up, nothing to do
         return
-    # Setup needed - create storage and index
-    print("=" * 60, file=sys.stderr)
-    print("Cicada: First-time setup detected", file=sys.stderr)
-    print("=" * 60, file=sys.stderr)
-    print(file=sys.stderr)
+    # Setup needed - create storage and index (silent mode)
     # Validate it's an Elixir project
     if not (repo_path / "mix.exs").exists():
         print(
-            f"Error: {repo_path} does not appear to be an Elixir project",
+            f"Error: {repo_path} does not appear to be an Elixir project (mix.exs not found)",
             file=sys.stderr,
         )
-        print("(mix.exs not found)", file=sys.stderr)
         sys.exit(1)
     try:
         # Create storage directory
         storage_dir = create_storage_dir(repo_path)
-        print(f"Repository: {repo_path}", file=sys.stderr)
-        print(f"Storage: {storage_dir}", file=sys.stderr)
-        print(file=sys.stderr)
-        # Index repository
-        index_repository(repo_path)
-        print(file=sys.stderr)
-        # Create config.yaml
-        create_config_yaml(repo_path, storage_dir)
-        print(file=sys.stderr)
+        # Index repository (silent mode)
+        index_repository(repo_path, verbose=False)
-        print("=" * 60, file=sys.stderr)
-        print("✓ Setup Complete! Starting server...", file=sys.stderr)
-        print("=" * 60, file=sys.stderr)
-        print(file=sys.stderr)
+        # Create config.yaml (silent mode)
+        create_config_yaml(repo_path, storage_dir, verbose=False)
     except Exception as e:
-        print(f"Error during auto-setup: {e}", file=sys.stderr)
+        print(f"Cicada auto-setup error: {e}", file=sys.stderr)
         sys.exit(1)

cicada/setup.py CHANGED Viewed

@@ -102,9 +102,15 @@ def get_mcp_config_for_editor(
     # Detect installation method
     import shutil
+    # Check for cicada-mcp first (new name), fall back to cicada-server (backwards compat)
+    has_cicada_mcp = shutil.which("cicada-mcp") is not None
     has_cicada_server = shutil.which("cicada-server") is not None
-    if has_cicada_server:
+    if has_cicada_mcp:
+        command = "cicada-mcp"
+        args = []
+        cwd = None
+    elif has_cicada_server:
         command = "cicada-server"
         args = []
         cwd = None
@@ -157,13 +163,16 @@ def get_mcp_config_for_editor(
     return config_path, config
-def create_config_yaml(repo_path: Path, storage_dir: Path) -> None:
+def create_config_yaml(
+    repo_path: Path, storage_dir: Path, verbose: bool = True
+) -> None:
     """
     Create config.yaml in storage directory.
     Args:
         repo_path: Path to the repository
         storage_dir: Path to the storage directory
+        verbose: Whether to print progress messages (default: True)
     """
     config_path = get_config_path(repo_path)
     index_path = get_index_path(repo_path)
@@ -178,22 +187,24 @@ storage:
     with open(config_path, "w") as f:
         f.write(config_content)
-    print(f"✓ Config file created at {config_path}")
+    if verbose:
+        print(f"✓ Config file created at {config_path}")
-def index_repository(repo_path: Path) -> None:
+def index_repository(repo_path: Path, verbose: bool = True) -> None:
     """
     Index the repository with keyword extraction enabled.
     Args:
         repo_path: Path to the repository
+        verbose: Whether to print progress messages (default: True)
     Raises:
         Exception: If indexing fails
     """
     try:
         index_path = get_index_path(repo_path)
-        indexer = ElixirIndexer(verbose=True)
+        indexer = ElixirIndexer(verbose=verbose)
         # Index with keyword extraction enabled by default
         # Note: Using 'small' model for compatibility with uvx
@@ -205,10 +216,12 @@ def index_repository(repo_path: Path) -> None:
             spacy_model="small",
         )
-        print(f"✓ Repository indexed at {index_path}")
+        if verbose:
+            print(f"✓ Repository indexed at {index_path}")
     except Exception as e:
-        print(f"Error: Failed to index repository: {e}")
-        print("Please check that the repository contains valid Elixir files.")
+        if verbose:
+            print(f"Error: Failed to index repository: {e}")
+            print("Please check that the repository contains valid Elixir files.")
         raise
@@ -275,7 +288,8 @@ def setup(editor: EditorType, repo_path: Path | None = None) -> None:
     import shutil
     from cicada import __version__
-    if not shutil.which("cicada-server"):
+    # Check for either cicada-mcp or cicada-server (backwards compat)
+    if not (shutil.which("cicada-mcp") or shutil.which("cicada-server")):
         print("💡 Tip: For best experience, install Cicada permanently:")
         print(
             f"   uv tool install git+https://github.com/wende/cicada.git@v{__version__}"

{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: cicada-mcp
-Version: 0.1.4
+Version: 0.1.7
 Summary: An Elixir module search MCP server
 Author-email: wende <wende@hey.com>
 Maintainer-email: wende <wende@hey.com>
@@ -138,7 +138,7 @@ cicada claude  # or: cicada cursor, cicada vs
 **Available commands after installation:**
 - `cicada [claude|cursor|vs]` - One-command setup per project
-- `cicada-server` - MCP server (auto-started by editor)
+- `cicada-mcp` - MCP server (auto-started by editor)
 - `cicada-index` - Re-index code with custom options (medium/large spaCy models)
 - `cicada-index-pr` - Index pull requests for PR attribution
 - `cicada-install` - Legacy setup (creates `.cicada/` in repo)
@@ -169,6 +169,33 @@ uvx --from git+https://github.com/wende/cicada.git@latest cicada vs
 Once you're convinced, install permanently with `uv tool install` above!
+### Quick Setup for Cursor and Claude Code
+**For Cursor:**
+Click the install button at the top of this README or visit:
+[![Install MCP Server](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/en-US/install-mcp?name=cicada&config=eyJjb21tYW5kIjoidXZ4IC0tZnJvbSBnaXQraHR0cHM6Ly9naXRodWIuY29tL3dlbmRlL2NpY2FkYS5naXRAbGF0ZXN0IGNpY2FkYS1zZXJ2ZXIgLiJ9)
+**For Claude Code:**
+```bash
+# Option 1: Using claude mcp add command
+claude mcp add cicada -- uvx --from git+https://github.com/wende/cicada.git@latest cicada-mcp ./path/to/your/codebase
+# Option 2: Using setup script
+uvx --from git+https://github.com/wende/cicada.git@latest cicada claude
+```
+**Then for both editors,** run these commands in your codebase to generate keyword lookup and GitHub PR lookup databases:
+```bash
+# Generate keyword lookup database
+uvx --from git+https://github.com/wende/cicada.git@latest cicada-index .
+# Generate GitHub PR lookup database
+uvx --from git+https://github.com/wende/cicada.git@latest cicada-index-pr .
+```
 ---
 ## Quick Start
@@ -221,7 +248,7 @@ your-project/
 {
   "mcpServers": {
     "cicada": {
-      "command": "cicada-server",
+      "command": "cicada-mcp",
       "env": {
         "CICADA_REPO_PATH": "/path/to/project",
         "CICADA_CONFIG_DIR": "/home/user/.cicada/projects/<hash>"

{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/RECORD RENAMED Viewed

@@ -5,16 +5,15 @@ cicada/dead_code_analyzer.py,sha256=hk3kmuFTj3K2HQpLDwrA_7GHrPc4rP9Ecg3OnrFmdh4,
 cicada/find_dead_code.py,sha256=xCheicrNbYhLvrPGgqVJJBbf_rAm_gXwnfONDWPnNI0,8288
 cicada/formatter.py,sha256=wwxD1nt1ub7HDeDRGc61JhpmgleNVlp0SfQG9QBgGns,36194
 cicada/git_helper.py,sha256=zhyqSfk90tCwndWYxhh-LxFmqqXB1Wki91uDkZRr7Js,24303
-cicada/indexer.py,sha256=gVj6Jwc-sZgcGZnueqpRqcn4Wu451qo6RVfGuQahaZ4,25249
-cicada/install.py,sha256=mM8hj1_45CkXUFbJd8ve8dqYyIzNY1HhNbVKbseiJ4s,23214
-cicada/keyword_extractor.py,sha256=9oEEU3cwv5prsWYn1P-nNFayArQeXgCFNzx4iaq1qhg,13425
+cicada/indexer.py,sha256=eK8OrxI0wHl-mm61aoP7kpj1qBQGIKqwKhx-_ydt1gQ,25897
+cicada/install.py,sha256=VU7OI031cM0S-Y7udXVRFs2hluQRI9S6tIm3XBLJL2w,23980
 cicada/keyword_search.py,sha256=pj5zSsYKX-pOeWyGI53ZRAZm91BnrEMHofGNoenoIqQ,21746
 cicada/lightweight_keyword_extractor.py,sha256=KtxcOjLPuoY6EjcWNvHvoZswcg9IoryMfG4EM3_LDMg,9172
-cicada/mcp_server.py,sha256=k_JnwQExgQ-dTAA-MfPTl8G02B9MEmVZJb8fAc_UnPY,60299
+cicada/mcp_server.py,sha256=Fq_2BiCzASBBjgQrPheJJXPxM6z7IEiDDrzsWhc68Xg,59774
 cicada/mcp_tools.py,sha256=LHNyrpztmY0yk1Ysu3_I-ZE7KmngJJ0ukKd-1OJpenA,13805
 cicada/parser.py,sha256=uQlzYnQQicUWU-yF9LgvqDK-83xImzGlZOkjPoov8_I,4022
 cicada/pr_finder.py,sha256=FPSaGe5W4RwPi93VmyoIWcUZIaHLZdHsT7s_WCIvHBM,14214
-cicada/setup.py,sha256=n9hFlK4LmPG7ivCvnburXvD-7sWwZjCvz6sdWRD_d_0,9166
+cicada/setup.py,sha256=Ru52J_ZRx09GUXrTJRuOWZI85k6wSaqOuRpmSWSJvE0,9773
 cicada/version_check.py,sha256=c8BFl--ohKfLZYe_3tX40rKXydTR6FVGWiseGuIvcBk,3181
 cicada/extractors/__init__.py,sha256=Dnm_jjWMGPvaGmt1aZqcgpS964tak4hys5BFOjbCcg8,890
 cicada/extractors/base.py,sha256=reenF-Cngpg1LgueWsddYzGcmtHElSuNv1F5OlZRFpI,2487
@@ -40,9 +39,9 @@ cicada/utils/signature_builder.py,sha256=O76JfypSESNncQ_OppCAR7aUDz4ocBNPXEmI9uh
 cicada/utils/storage.py,sha256=wbw_Ma77v4uevDGTQP06Eu4m5V8IU6GkKUARYWXgj1A,2578
 cicada/utils/subprocess_runner.py,sha256=fibqu_YCCmQPvtTwaDkGkVyhGVSQ6oX235pBYavQW5M,5168
 cicada/utils/text_utils.py,sha256=_lt_65BcAVZa36QrTY84GR8v5m5oxvfPY3tr6PoNaxw,2923
-cicada_mcp-0.1.4.dist-info/licenses/LICENSE,sha256=ijMI5EAN1o3jl676-BOu0ELzlsBr2FqTRzmha9e1lug,1062
-cicada_mcp-0.1.4.dist-info/METADATA,sha256=pj5-4L2Bz3xn6w6r7HJTRwgPCTa-KiLNOUrTPREDLF0,18931
-cicada_mcp-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-cicada_mcp-0.1.4.dist-info/entry_points.txt,sha256=DFW2H5na_prQFHRcgcDOkziQCpzykOZuHO5cztoMA2Y,281
-cicada_mcp-0.1.4.dist-info/top_level.txt,sha256=xZCtaMDbCi2CKA5PExum99ZU54IJg5iognV-k44a1W0,7
-cicada_mcp-0.1.4.dist-info/RECORD,,
+cicada_mcp-0.1.7.dist-info/licenses/LICENSE,sha256=ijMI5EAN1o3jl676-BOu0ELzlsBr2FqTRzmha9e1lug,1062
+cicada_mcp-0.1.7.dist-info/METADATA,sha256=ydnq27gKEEAabFUHZ4yXnkKZNyNYfTG3oUlmC_Jy7Pg,19952
+cicada_mcp-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+cicada_mcp-0.1.7.dist-info/entry_points.txt,sha256=cwG5-3TwDGwFPiKiOC5gjlfvi9mBFc01EVtX2ZcXpcQ,317
+cicada_mcp-0.1.7.dist-info/top_level.txt,sha256=xZCtaMDbCi2CKA5PExum99ZU54IJg5iognV-k44a1W0,7
+cicada_mcp-0.1.7.dist-info/RECORD,,

{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/entry_points.txt RENAMED Viewed

@@ -5,4 +5,5 @@ cicada-find-dead-code = cicada.find_dead_code:main
 cicada-index = cicada.indexer:main
 cicada-index-pr = cicada.pr_indexer:main
 cicada-install = cicada.install:main
+cicada-mcp = cicada.mcp_server:main
 cicada-server = cicada.mcp_server:main

cicada/keyword_extractor.py DELETED Viewed

@@ -1,364 +0,0 @@
-"""
-Keyword Extraction using spaCy
-Advanced NLP-based keyword extraction for programming documentation
-DEPRECATED: This module is being replaced by lightweight_keyword_extractor.py
-which provides faster performance using lemminflect instead of spaCy.
-The spaCy-based extractor has been kept for backward compatibility and for
-cases where advanced NLP features are needed. For most use cases, prefer
-LightweightKeywordExtractor from cicada.lightweight_keyword_extractor.
-Performance comparison:
-- LightweightKeywordExtractor: ~0.1s startup time
-- KeywordExtractor (spaCy): ~2s startup time
-See: cicada.lightweight_keyword_extractor.LightweightKeywordExtractor
-"""
-from collections import Counter
-import re
-import sys
-import subprocess
-from cicada.utils import split_camel_snake_case
-# Lazy import spacy only when needed
-spacy = None
-def _ensure_spacy_imported():
-    """Import spacy only when needed."""
-    global spacy
-    if spacy is None:
-        import spacy as spacy_module
-        spacy = spacy_module
-class KeywordExtractor:
-    """Extract keywords from text using spaCy NLP."""
-    # spaCy model names for different sizes
-    SPACY_MODELS = {
-        "small": "en_core_web_sm",
-        "medium": "en_core_web_md",
-        "large": "en_core_web_lg",
-    }
-    def __init__(self, verbose: bool = False, model_size: str = "small"):
-        """
-        Initialize keyword extractor with lazy model loading.
-        Args:
-            verbose: If True, print status messages during initialization
-            model_size: Size of spaCy model to use ('small', 'medium', or 'large')
-                       Default is 'small'. Medium and large models provide better
-                       accuracy but are slower and require more memory.
-        """
-        self.verbose = verbose
-        # Validate model size
-        if model_size not in self.SPACY_MODELS:
-            raise ValueError(
-                f"Invalid model size '{model_size}'. "
-                f"Must be one of: {', '.join(self.SPACY_MODELS.keys())}"
-            )
-        self.model_size = model_size
-        self.model_name = self.SPACY_MODELS[model_size]
-        self.nlp = None  # Lazy-loaded on first use
-    def _ensure_model_loaded(self):
-        """
-        Ensure the spaCy model is loaded, downloading if necessary.
-        Only called when model is actually needed (lazy loading).
-        """
-        if self.nlp is not None:
-            return  # Already loaded
-        # Ensure spacy is imported
-        _ensure_spacy_imported()
-        if self.verbose:
-            print(f"Loading spaCy model ({self.model_size})...", file=sys.stderr)
-        try:
-            # Import the model directly as a Python package (fast failure if not installed)
-            import importlib
-            model_module = importlib.import_module(self.model_name)
-            self.nlp = model_module.load()
-            if self.verbose:
-                print("✓ Model loaded successfully", file=sys.stderr)
-        except (ImportError, AttributeError):
-            # Model not installed, download it
-            if self.verbose:
-                print(
-                    f"Model '{self.model_name}' not found. Downloading...",
-                    file=sys.stderr,
-                )
-            if not self._download_model():
-                raise RuntimeError(
-                    f"Failed to download spaCy model '{self.model_name}'. "
-                    f"Please install it manually with: python -m spacy download {self.model_name}"
-                )
-            # Try importing again after download
-            try:
-                import importlib
-                model_module = importlib.import_module(self.model_name)
-                self.nlp = model_module.load()
-                if self.verbose:
-                    print("✓ Model loaded successfully", file=sys.stderr)
-            except (ImportError, AttributeError) as e:
-                raise RuntimeError(
-                    f"Failed to load spaCy model '{self.model_name}' after download. "
-                    f"Please try installing it manually: python -m spacy download {self.model_name}"
-                ) from e
-    def _download_model(self) -> bool:
-        """
-        Download the spaCy model using uv pip install.
-        Returns:
-            True if download succeeded, False otherwise
-        """
-        # Model URLs for direct installation
-        model_urls = {
-            "en_core_web_sm": "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
-            "en_core_web_md": "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl",
-            "en_core_web_lg": "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl",
-        }
-        if self.model_name not in model_urls:
-            if self.verbose:
-                print(f"Unknown model: {self.model_name}", file=sys.stderr)
-            return False
-        model_url = model_urls[self.model_name]
-        # Use uv pip install (works in uv-managed environments)
-        try:
-            if self.verbose:
-                print(f"Running: uv pip install {model_url}", file=sys.stderr)
-            result = subprocess.run(
-                ["uv", "pip", "install", model_url],
-                capture_output=True,
-                text=True,
-                check=True,
-            )
-            if self.verbose and result.stdout:
-                print(result.stdout, file=sys.stderr)
-            return True
-        except FileNotFoundError:
-            if self.verbose:
-                print(
-                    "uv not found. Please install uv or manually install the model:",
-                    file=sys.stderr,
-                )
-                print(f"  uv pip install {model_url}", file=sys.stderr)
-            return False
-        except subprocess.CalledProcessError as e:
-            if self.verbose:
-                print(f"uv pip install failed: {e.stderr}", file=sys.stderr)
-            return False
-        except Exception as e:
-            if self.verbose:
-                print(f"Unexpected error during download: {e}", file=sys.stderr)
-            return False
-    def extract_code_identifiers(self, text):
-        """
-        Extract code-specific identifiers and their split words.
-        Returns a tuple of (identifiers, split_words) where:
-        - identifiers: original camelCase/PascalCase/snake_case identifiers
-        - split_words: individual words extracted from those identifiers
-        """
-        # Match camelCase, snake_case, PascalCase, and mixed patterns
-        patterns = [
-            r"\b[a-z]+[A-Z][a-zA-Z]*\b",  # camelCase (e.g., getUserData)
-            r"\b[A-Z]{2,}[a-z]+[a-zA-Z]*\b",  # Uppercase prefix + PascalCase (e.g., HTTPServer, XMLParser)
-            r"\b[A-Z][a-z]+[A-Z][a-zA-Z]*\b",  # PascalCase (e.g., UserController, PostgreSQL)
-            r"\b[a-z]+_[a-z_]+\b",  # snake_case (e.g., get_user_data)
-            r"\b[A-Z]{2,}\b",  # All UPPERCASE (e.g., HTTP, API, SQL)
-        ]
-        identifiers = []
-        for pattern in patterns:
-            matches = re.findall(pattern, text)
-            identifiers.extend(matches)
-        identifiers = list(set(identifiers))
-        # Split identifiers into individual words
-        split_words = []
-        for identifier in identifiers:
-            split_text = split_camel_snake_case(identifier)
-            # Extract individual words (lowercase, length > 1)
-            words = [
-                word.lower()
-                for word in split_text.split()
-                if len(word) > 1 and word.isalpha()
-            ]
-            split_words.extend(words)
-        return identifiers, list(set(split_words))
-    def extract_keywords_simple(self, text: str, top_n: int = 10) -> list[str]:
-        """
-        Extract keywords and return a simple list of keyword strings.
-        Args:
-            text: Input text to analyze
-            top_n: Number of top keywords to return
-        Returns:
-            List of keyword strings (e.g., ['authentication', 'user', 'validate'])
-        """
-        if not text or not text.strip():
-            return []
-        try:
-            self._ensure_model_loaded()
-            results = self.extract_keywords(text, top_n=top_n)
-            # Extract just the keyword strings from top_keywords tuples
-            return [keyword for keyword, _ in results["top_keywords"]]
-        except Exception as e:
-            if self.verbose:
-                print(f"Warning: Keyword extraction failed: {e}", file=sys.stderr)
-            return []
-    def extract_keywords(self, text, top_n=15):
-        """
-        Extract keywords using multiple strategies with emphasis on code identifiers.
-        Weighting strategy:
-        - Full code identifiers (e.g., getUserData, snake_case): 10x weight (exact match priority)
-        - Code split words (e.g., get, user, data): 3x weight (fuzzy match support)
-        - Regular words (nouns, verbs): 1x weight
-        Args:
-            text: Input text to analyze
-            top_n: Number of top keywords to return
-        Returns:
-            Dictionary with extracted keywords and analysis:
-            - top_keywords: List of (keyword, count) tuples, sorted by frequency
-            - code_identifiers: Original identifiers (weighted 10x)
-            - code_split_words: Words extracted from identifiers (weighted 3x)
-            - nouns, verbs, adjectives: Linguistic categories
-            - entities: Named entities found
-            - tf_scores: Term frequency scores
-            - stats: Text statistics
-        """
-        if not text or not text.strip():
-            return {
-                "top_keywords": [],
-                "nouns": [],
-                "verbs": [],
-                "adjectives": [],
-                "proper_nouns": [],
-                "noun_chunks": [],
-                "entities": [],
-                "code_identifiers": [],
-                "tf_scores": {},
-                "stats": {
-                    "total_tokens": 0,
-                    "total_words": 0,
-                    "unique_words": 0,
-                    "sentences": 0,
-                },
-            }
-        # Ensure model is loaded (lazy loading on first use)
-        self._ensure_model_loaded()
-        # Process with spaCy
-        doc = self.nlp(text)
-        # 1. Extract nouns (concepts)
-        nouns = [
-            token.lemma_.lower()
-            for token in doc
-            if token.pos_ == "NOUN" and not token.is_stop and len(token.text) > 2
-        ]
-        # 2. Extract verbs (actions)
-        verbs = [
-            token.lemma_.lower()
-            for token in doc
-            if token.pos_ == "VERB" and not token.is_stop and len(token.text) > 2
-        ]
-        # 3. Extract adjectives (descriptors)
-        adjectives = [
-            token.lemma_.lower()
-            for token in doc
-            if token.pos_ == "ADJ" and not token.is_stop
-        ]
-        # 4. Extract proper nouns (named entities, technologies)
-        proper_nouns = [token.text for token in doc if token.pos_ == "PROPN"]
-        # 5. Extract noun chunks (multi-word concepts)
-        noun_chunks = [
-            chunk.text.lower()
-            for chunk in doc.noun_chunks
-            if len(chunk.text.split()) > 1
-        ]
-        # 6. Extract named entities
-        entities = [(ent.text, ent.label_) for ent in doc.ents]
-        # 7. Extract code identifiers and their split words
-        code_identifiers, code_split_words = self.extract_code_identifiers(text)
-        # 8. Calculate keyword frequency (combining nouns, verbs, proper nouns, identifiers, and split code words)
-        # Give full code identifiers 10x weight for exact matching
-        # Give code split words 3x weight for fuzzy matching
-        code_identifiers_lower = [ident.lower() for ident in code_identifiers]
-        all_keywords = (
-            nouns
-            + verbs
-            + proper_nouns
-            + (code_identifiers_lower * 10)
-            + (code_split_words * 3)
-        )
-        keyword_freq = Counter(all_keywords)
-        top_keywords = keyword_freq.most_common(top_n)
-        # 9. Calculate TF scores (simple version)
-        total_words = len(
-            [token for token in doc if not token.is_stop and not token.is_punct]
-        )
-        tf_scores = {word: (freq / total_words) for word, freq in keyword_freq.items()}
-        # Statistics
-        stats = {
-            "total_tokens": len(doc),
-            "total_words": total_words,
-            "unique_words": len(set([t.text.lower() for t in doc if not t.is_punct])),
-            "sentences": len(list(doc.sents)),
-        }
-        return {
-            "top_keywords": top_keywords,
-            "nouns": list(set(nouns))[:20],
-            "verbs": list(set(verbs))[:20],
-            "adjectives": list(set(adjectives))[:15],
-            "proper_nouns": list(set(proper_nouns)),
-            "noun_chunks": list(set(noun_chunks))[:15],
-            "entities": entities,
-            "code_identifiers": code_identifiers,
-            "code_split_words": code_split_words,
-            "tf_scores": dict(
-                sorted(tf_scores.items(), key=lambda x: x[1], reverse=True)[:10]
-            ),
-            "stats": stats,
-        }

{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/WHEEL RENAMED Viewed

File without changes

{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{cicada_mcp-0.1.4.dist-info → cicada_mcp-0.1.7.dist-info}/top_level.txt RENAMED Viewed

File without changes

cicada-mcp 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl

Potentially problematic release.

cicada-mcp 0.1.4py3-none-any.whl → 0.1.7py3-none-any.whl