cicada-mcp 0.1.4__py3-none-any.whl → 0.1.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cicada-mcp might be problematic. Click here for more details.

cicada/indexer.py CHANGED
@@ -102,7 +102,8 @@ class ElixirIndexer:
102
102
  if not repo_path_obj.exists():
103
103
  raise ValueError(f"Repository path does not exist: {repo_path_obj}")
104
104
 
105
- print(f"Indexing repository: {repo_path_obj}")
105
+ if self.verbose:
106
+ print(f"Indexing repository: {repo_path_obj}")
106
107
 
107
108
  # Set up signal handlers for graceful interruption
108
109
  signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -118,20 +119,22 @@ class ElixirIndexer:
118
119
  )
119
120
 
120
121
  keyword_extractor = LightweightKeywordExtractor(
121
- verbose=True, model_size=spacy_model
122
+ verbose=self.verbose, model_size=spacy_model
122
123
  )
123
124
  except Exception as e:
124
- print(f"Warning: Could not initialize keyword extractor: {e}")
125
- print("Continuing without keyword extraction...")
125
+ if self.verbose:
126
+ print(f"Warning: Could not initialize keyword extractor: {e}")
127
+ print("Continuing without keyword extraction...")
126
128
  extract_keywords = False
127
129
 
128
130
  # Find all Elixir files
129
131
  elixir_files = self._find_elixir_files(repo_path_obj)
130
132
  total_files = len(elixir_files)
131
133
 
132
- print(f"Found {total_files} Elixir files")
133
- if extract_keywords:
134
- print("Keyword extraction enabled")
134
+ if self.verbose:
135
+ print(f"Found {total_files} Elixir files")
136
+ if extract_keywords:
137
+ print("Keyword extraction enabled")
135
138
 
136
139
  # Parse all files
137
140
  all_modules = {}
@@ -222,7 +225,10 @@ class ElixirIndexer:
222
225
  files_processed += 1
223
226
 
224
227
  # Progress reporting
225
- if files_processed % self.PROGRESS_REPORT_INTERVAL == 0:
228
+ if (
229
+ self.verbose
230
+ and files_processed % self.PROGRESS_REPORT_INTERVAL == 0
231
+ ):
226
232
  print(f" Processed {files_processed}/{total_files} files...")
227
233
 
228
234
  # Check for interruption after each file
@@ -230,7 +236,8 @@ class ElixirIndexer:
230
236
  break
231
237
 
232
238
  except Exception as e:
233
- print(f" Skipping {file_path}: {e}")
239
+ if self.verbose:
240
+ print(f" Skipping {file_path}: {e}")
234
241
  # Check for interruption even after error
235
242
  if self._check_and_report_interruption(files_processed, total_files):
236
243
  break
@@ -258,12 +265,14 @@ class ElixirIndexer:
258
265
  from cicada.utils.path_utils import ensure_gitignore_has_cicada
259
266
 
260
267
  if ensure_gitignore_has_cicada(repo_path_obj):
261
- print("✓ Added .cicada/ to .gitignore")
268
+ if self.verbose:
269
+ print("✓ Added .cicada/ to .gitignore")
262
270
 
263
271
  save_index(index, output_path_obj, create_dirs=True)
264
272
 
265
273
  # Compute and save hashes for all PROCESSED files for future incremental updates
266
- print("Computing file hashes for incremental updates...")
274
+ if self.verbose:
275
+ print("Computing file hashes for incremental updates...")
267
276
  # Only hash files that were actually processed
268
277
  processed_files = [
269
278
  str(f.relative_to(repo_path_obj)) for f in elixir_files[:files_processed]
@@ -272,30 +281,31 @@ class ElixirIndexer:
272
281
  save_file_hashes(str(output_path_obj.parent), file_hashes)
273
282
 
274
283
  # Report completion status
275
- if self._interrupted:
276
- print(f"\n✓ Partial index saved!")
277
- print(
278
- f" Processed: {files_processed}/{total_files} files ({files_processed/total_files*100:.1f}%)"
279
- )
280
- print(f" Modules: {len(all_modules)}")
281
- print(f" Functions: {total_functions}")
282
- print(
283
- f"\n💡 Run the command again to continue indexing remaining {total_files - files_processed} file(s)"
284
- )
285
- else:
286
- print(f"\nIndexing complete!")
287
- print(f" Modules: {len(all_modules)}")
288
- print(f" Functions: {total_functions}")
284
+ if self.verbose:
285
+ if self._interrupted:
286
+ print(f"\n✓ Partial index saved!")
287
+ print(
288
+ f" Processed: {files_processed}/{total_files} files ({files_processed/total_files*100:.1f}%)"
289
+ )
290
+ print(f" Modules: {len(all_modules)}")
291
+ print(f" Functions: {total_functions}")
292
+ print(
293
+ f"\n💡 Run the command again to continue indexing remaining {total_files - files_processed} file(s)"
294
+ )
295
+ else:
296
+ print(f"\nIndexing complete!")
297
+ print(f" Modules: {len(all_modules)}")
298
+ print(f" Functions: {total_functions}")
289
299
 
290
- # Report keyword extraction failures if any
291
- if extract_keywords and keyword_extraction_failures > 0:
292
- print(
293
- f"\n⚠️ Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
294
- )
295
- print(" Some documentation may not be indexed for keyword search.")
300
+ # Report keyword extraction failures if any
301
+ if extract_keywords and keyword_extraction_failures > 0:
302
+ print(
303
+ f"\n⚠️ Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
304
+ )
305
+ print(" Some documentation may not be indexed for keyword search.")
296
306
 
297
- print(f"\nIndex saved to: {output_path_obj}")
298
- print(f"Hashes saved to: {output_path_obj.parent}/hashes.json")
307
+ print(f"\nIndex saved to: {output_path_obj}")
308
+ print(f"Hashes saved to: {output_path_obj.parent}/hashes.json")
299
309
 
300
310
  return index
301
311
 
@@ -351,7 +361,8 @@ class ElixirIndexer:
351
361
  str(repo_path_obj), str(output_path_obj), extract_keywords, spacy_model
352
362
  )
353
363
 
354
- print(f"Performing incremental index of: {repo_path_obj}")
364
+ if self.verbose:
365
+ print(f"Performing incremental index of: {repo_path_obj}")
355
366
 
356
367
  # Set up signal handlers for graceful interruption
357
368
  signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -364,7 +375,8 @@ class ElixirIndexer:
364
375
  relative_files = [str(f.relative_to(repo_path_obj)) for f in elixir_files]
365
376
 
366
377
  # Detect file changes
367
- print("Detecting file changes...")
378
+ if self.verbose:
379
+ print("Detecting file changes...")
368
380
  new_files, modified_files, deleted_files = detect_file_changes(
369
381
  relative_files, existing_hashes, str(repo_path_obj)
370
382
  )
@@ -377,10 +389,14 @@ class ElixirIndexer:
377
389
  print("No changes detected. Index is up to date.")
378
390
  return existing_index
379
391
 
380
- print(f"Changes detected:")
381
- print(f" New files: {len(new_files)}")
382
- print(f" Modified files: {len(modified_files)}")
383
- print(f" Deleted files: {len(deleted_files)}")
392
+ if self.verbose:
393
+ print(f"Changes detected:")
394
+ if self.verbose:
395
+ print(f" New files: {len(new_files)}")
396
+ if self.verbose:
397
+ print(f" Modified files: {len(modified_files)}")
398
+ if self.verbose:
399
+ print(f" Deleted files: {len(deleted_files)}")
384
400
 
385
401
  if files_to_process:
386
402
  print(f"\nProcessing {len(files_to_process)} changed file(s)...")
@@ -394,7 +410,7 @@ class ElixirIndexer:
394
410
  )
395
411
 
396
412
  keyword_extractor = LightweightKeywordExtractor(
397
- verbose=True, model_size=spacy_model
413
+ verbose=self.verbose, model_size=spacy_model
398
414
  )
399
415
  except Exception as e:
400
416
  print(f"Warning: Could not initialize keyword extractor: {e}")
@@ -502,13 +518,15 @@ class ElixirIndexer:
502
518
  }
503
519
 
504
520
  # Merge with existing index
505
- print("\nMerging with existing index...")
521
+ if self.verbose:
522
+ print("\nMerging with existing index...")
506
523
  merged_index = merge_indexes_incremental(
507
524
  existing_index, new_index, deleted_files
508
525
  )
509
526
 
510
527
  # Update hashes for all current files
511
- print("Updating file hashes...")
528
+ if self.verbose:
529
+ print("Updating file hashes...")
512
530
  updated_hashes = dict(existing_hashes)
513
531
 
514
532
  # Compute hashes only for files that were actually processed
cicada/install.py CHANGED
@@ -251,7 +251,15 @@ def detect_installation_method():
251
251
  ".local/share/uv/tools" in script_path_str
252
252
  or ".local/bin/cicada-" in script_path_str
253
253
  ):
254
- # Installed via uv tool install
254
+ # Installed via uv tool install - check for cicada-mcp first
255
+ if shutil.which("cicada-mcp"):
256
+ return (
257
+ "cicada-mcp",
258
+ [],
259
+ None,
260
+ "uv tool install (ensure ~/.local/bin is in PATH)",
261
+ )
262
+ # Fall back to cicada-server for backwards compatibility
255
263
  return (
256
264
  "cicada-server",
257
265
  [],
@@ -259,7 +267,11 @@ def detect_installation_method():
259
267
  "uv tool install (ensure ~/.local/bin is in PATH)",
260
268
  )
261
269
 
262
- # Check if cicada-server is in PATH (from uv tool install)
270
+ # Check if cicada-mcp is in PATH first (from uv tool install)
271
+ if shutil.which("cicada-mcp"):
272
+ return ("cicada-mcp", [], None, "uv tool install (permanent, fast)")
273
+
274
+ # Fall back to cicada-server for backwards compatibility
263
275
  if shutil.which("cicada-server"):
264
276
  return ("cicada-server", [], None, "uv tool install (permanent, fast)")
265
277
 
@@ -279,8 +291,13 @@ def check_tools_in_path():
279
291
  """Check if cicada tools are in PATH."""
280
292
  import shutil
281
293
 
282
- tools = ["cicada-server", "cicada-index"]
294
+ # Check for cicada-mcp (new) or cicada-server (backwards compat)
295
+ has_mcp_server = shutil.which("cicada-mcp") or shutil.which("cicada-server")
296
+ tools = ["cicada-index"]
283
297
  visible_tools = [tool for tool in tools if shutil.which(tool)]
298
+ if has_mcp_server:
299
+ visible_tools.insert(0, "cicada-mcp/cicada-server")
300
+ tools.insert(0, "cicada-mcp/cicada-server")
284
301
 
285
302
  if len(visible_tools) == len(tools):
286
303
  return "all_visible"
@@ -351,8 +368,8 @@ def create_mcp_config(repo_path, _cicada_dir, _python_bin):
351
368
  print(f"✓ MCP configuration updated at {mcp_config_path}")
352
369
 
353
370
  # Show what was configured
354
- if command == "cicada-server":
355
- print("✅ Using 'cicada-server' command (fast, no paths needed)")
371
+ if command in ("cicada-mcp", "cicada-server"):
372
+ print(f"✅ Using '{command}' command (fast, no paths needed)")
356
373
  else:
357
374
  print(f"ℹ️ Using Python: {command}")
358
375
 
cicada/mcp_server.py CHANGED
@@ -1496,43 +1496,27 @@ def _auto_setup_if_needed():
1496
1496
  # Already set up, nothing to do
1497
1497
  return
1498
1498
 
1499
- # Setup needed - create storage and index
1500
- print("=" * 60, file=sys.stderr)
1501
- print("Cicada: First-time setup detected", file=sys.stderr)
1502
- print("=" * 60, file=sys.stderr)
1503
- print(file=sys.stderr)
1504
-
1499
+ # Setup needed - create storage and index (silent mode)
1505
1500
  # Validate it's an Elixir project
1506
1501
  if not (repo_path / "mix.exs").exists():
1507
1502
  print(
1508
- f"Error: {repo_path} does not appear to be an Elixir project",
1503
+ f"Error: {repo_path} does not appear to be an Elixir project (mix.exs not found)",
1509
1504
  file=sys.stderr,
1510
1505
  )
1511
- print("(mix.exs not found)", file=sys.stderr)
1512
1506
  sys.exit(1)
1513
1507
 
1514
1508
  try:
1515
1509
  # Create storage directory
1516
1510
  storage_dir = create_storage_dir(repo_path)
1517
- print(f"Repository: {repo_path}", file=sys.stderr)
1518
- print(f"Storage: {storage_dir}", file=sys.stderr)
1519
- print(file=sys.stderr)
1520
-
1521
- # Index repository
1522
- index_repository(repo_path)
1523
- print(file=sys.stderr)
1524
1511
 
1525
- # Create config.yaml
1526
- create_config_yaml(repo_path, storage_dir)
1527
- print(file=sys.stderr)
1512
+ # Index repository (silent mode)
1513
+ index_repository(repo_path, verbose=False)
1528
1514
 
1529
- print("=" * 60, file=sys.stderr)
1530
- print("✓ Setup Complete! Starting server...", file=sys.stderr)
1531
- print("=" * 60, file=sys.stderr)
1532
- print(file=sys.stderr)
1515
+ # Create config.yaml (silent mode)
1516
+ create_config_yaml(repo_path, storage_dir, verbose=False)
1533
1517
 
1534
1518
  except Exception as e:
1535
- print(f"Error during auto-setup: {e}", file=sys.stderr)
1519
+ print(f"Cicada auto-setup error: {e}", file=sys.stderr)
1536
1520
  sys.exit(1)
1537
1521
 
1538
1522
 
cicada/setup.py CHANGED
@@ -102,9 +102,15 @@ def get_mcp_config_for_editor(
102
102
  # Detect installation method
103
103
  import shutil
104
104
 
105
+ # Check for cicada-mcp first (new name), fall back to cicada-server (backwards compat)
106
+ has_cicada_mcp = shutil.which("cicada-mcp") is not None
105
107
  has_cicada_server = shutil.which("cicada-server") is not None
106
108
 
107
- if has_cicada_server:
109
+ if has_cicada_mcp:
110
+ command = "cicada-mcp"
111
+ args = []
112
+ cwd = None
113
+ elif has_cicada_server:
108
114
  command = "cicada-server"
109
115
  args = []
110
116
  cwd = None
@@ -157,13 +163,16 @@ def get_mcp_config_for_editor(
157
163
  return config_path, config
158
164
 
159
165
 
160
- def create_config_yaml(repo_path: Path, storage_dir: Path) -> None:
166
+ def create_config_yaml(
167
+ repo_path: Path, storage_dir: Path, verbose: bool = True
168
+ ) -> None:
161
169
  """
162
170
  Create config.yaml in storage directory.
163
171
 
164
172
  Args:
165
173
  repo_path: Path to the repository
166
174
  storage_dir: Path to the storage directory
175
+ verbose: Whether to print progress messages (default: True)
167
176
  """
168
177
  config_path = get_config_path(repo_path)
169
178
  index_path = get_index_path(repo_path)
@@ -178,22 +187,24 @@ storage:
178
187
  with open(config_path, "w") as f:
179
188
  f.write(config_content)
180
189
 
181
- print(f"✓ Config file created at {config_path}")
190
+ if verbose:
191
+ print(f"✓ Config file created at {config_path}")
182
192
 
183
193
 
184
- def index_repository(repo_path: Path) -> None:
194
+ def index_repository(repo_path: Path, verbose: bool = True) -> None:
185
195
  """
186
196
  Index the repository with keyword extraction enabled.
187
197
 
188
198
  Args:
189
199
  repo_path: Path to the repository
200
+ verbose: Whether to print progress messages (default: True)
190
201
 
191
202
  Raises:
192
203
  Exception: If indexing fails
193
204
  """
194
205
  try:
195
206
  index_path = get_index_path(repo_path)
196
- indexer = ElixirIndexer(verbose=True)
207
+ indexer = ElixirIndexer(verbose=verbose)
197
208
 
198
209
  # Index with keyword extraction enabled by default
199
210
  # Note: Using 'small' model for compatibility with uvx
@@ -205,10 +216,12 @@ def index_repository(repo_path: Path) -> None:
205
216
  spacy_model="small",
206
217
  )
207
218
 
208
- print(f"✓ Repository indexed at {index_path}")
219
+ if verbose:
220
+ print(f"✓ Repository indexed at {index_path}")
209
221
  except Exception as e:
210
- print(f"Error: Failed to index repository: {e}")
211
- print("Please check that the repository contains valid Elixir files.")
222
+ if verbose:
223
+ print(f"Error: Failed to index repository: {e}")
224
+ print("Please check that the repository contains valid Elixir files.")
212
225
  raise
213
226
 
214
227
 
@@ -275,7 +288,8 @@ def setup(editor: EditorType, repo_path: Path | None = None) -> None:
275
288
  import shutil
276
289
  from cicada import __version__
277
290
 
278
- if not shutil.which("cicada-server"):
291
+ # Check for either cicada-mcp or cicada-server (backwards compat)
292
+ if not (shutil.which("cicada-mcp") or shutil.which("cicada-server")):
279
293
  print("💡 Tip: For best experience, install Cicada permanently:")
280
294
  print(
281
295
  f" uv tool install git+https://github.com/wende/cicada.git@v{__version__}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cicada-mcp
3
- Version: 0.1.4
3
+ Version: 0.1.7
4
4
  Summary: An Elixir module search MCP server
5
5
  Author-email: wende <wende@hey.com>
6
6
  Maintainer-email: wende <wende@hey.com>
@@ -138,7 +138,7 @@ cicada claude # or: cicada cursor, cicada vs
138
138
 
139
139
  **Available commands after installation:**
140
140
  - `cicada [claude|cursor|vs]` - One-command setup per project
141
- - `cicada-server` - MCP server (auto-started by editor)
141
+ - `cicada-mcp` - MCP server (auto-started by editor)
142
142
  - `cicada-index` - Re-index code with custom options (medium/large spaCy models)
143
143
  - `cicada-index-pr` - Index pull requests for PR attribution
144
144
  - `cicada-install` - Legacy setup (creates `.cicada/` in repo)
@@ -169,6 +169,33 @@ uvx --from git+https://github.com/wende/cicada.git@latest cicada vs
169
169
 
170
170
  Once you're convinced, install permanently with `uv tool install` above!
171
171
 
172
+ ### Quick Setup for Cursor and Claude Code
173
+
174
+ **For Cursor:**
175
+
176
+ Click the install button at the top of this README or visit:
177
+ [![Install MCP Server](https://cursor.com/deeplink/mcp-install-dark.svg)](https://cursor.com/en-US/install-mcp?name=cicada&config=eyJjb21tYW5kIjoidXZ4IC0tZnJvbSBnaXQraHR0cHM6Ly9naXRodWIuY29tL3dlbmRlL2NpY2FkYS5naXRAbGF0ZXN0IGNpY2FkYS1zZXJ2ZXIgLiJ9)
178
+
179
+ **For Claude Code:**
180
+
181
+ ```bash
182
+ # Option 1: Using claude mcp add command
183
+ claude mcp add cicada -- uvx --from git+https://github.com/wende/cicada.git@latest cicada-mcp ./path/to/your/codebase
184
+
185
+ # Option 2: Using setup script
186
+ uvx --from git+https://github.com/wende/cicada.git@latest cicada claude
187
+ ```
188
+
189
+ **Then for both editors,** run these commands in your codebase to generate keyword lookup and GitHub PR lookup databases:
190
+
191
+ ```bash
192
+ # Generate keyword lookup database
193
+ uvx --from git+https://github.com/wende/cicada.git@latest cicada-index .
194
+
195
+ # Generate GitHub PR lookup database
196
+ uvx --from git+https://github.com/wende/cicada.git@latest cicada-index-pr .
197
+ ```
198
+
172
199
  ---
173
200
 
174
201
  ## Quick Start
@@ -221,7 +248,7 @@ your-project/
221
248
  {
222
249
  "mcpServers": {
223
250
  "cicada": {
224
- "command": "cicada-server",
251
+ "command": "cicada-mcp",
225
252
  "env": {
226
253
  "CICADA_REPO_PATH": "/path/to/project",
227
254
  "CICADA_CONFIG_DIR": "/home/user/.cicada/projects/<hash>"
@@ -5,16 +5,15 @@ cicada/dead_code_analyzer.py,sha256=hk3kmuFTj3K2HQpLDwrA_7GHrPc4rP9Ecg3OnrFmdh4,
5
5
  cicada/find_dead_code.py,sha256=xCheicrNbYhLvrPGgqVJJBbf_rAm_gXwnfONDWPnNI0,8288
6
6
  cicada/formatter.py,sha256=wwxD1nt1ub7HDeDRGc61JhpmgleNVlp0SfQG9QBgGns,36194
7
7
  cicada/git_helper.py,sha256=zhyqSfk90tCwndWYxhh-LxFmqqXB1Wki91uDkZRr7Js,24303
8
- cicada/indexer.py,sha256=gVj6Jwc-sZgcGZnueqpRqcn4Wu451qo6RVfGuQahaZ4,25249
9
- cicada/install.py,sha256=mM8hj1_45CkXUFbJd8ve8dqYyIzNY1HhNbVKbseiJ4s,23214
10
- cicada/keyword_extractor.py,sha256=9oEEU3cwv5prsWYn1P-nNFayArQeXgCFNzx4iaq1qhg,13425
8
+ cicada/indexer.py,sha256=eK8OrxI0wHl-mm61aoP7kpj1qBQGIKqwKhx-_ydt1gQ,25897
9
+ cicada/install.py,sha256=VU7OI031cM0S-Y7udXVRFs2hluQRI9S6tIm3XBLJL2w,23980
11
10
  cicada/keyword_search.py,sha256=pj5zSsYKX-pOeWyGI53ZRAZm91BnrEMHofGNoenoIqQ,21746
12
11
  cicada/lightweight_keyword_extractor.py,sha256=KtxcOjLPuoY6EjcWNvHvoZswcg9IoryMfG4EM3_LDMg,9172
13
- cicada/mcp_server.py,sha256=k_JnwQExgQ-dTAA-MfPTl8G02B9MEmVZJb8fAc_UnPY,60299
12
+ cicada/mcp_server.py,sha256=Fq_2BiCzASBBjgQrPheJJXPxM6z7IEiDDrzsWhc68Xg,59774
14
13
  cicada/mcp_tools.py,sha256=LHNyrpztmY0yk1Ysu3_I-ZE7KmngJJ0ukKd-1OJpenA,13805
15
14
  cicada/parser.py,sha256=uQlzYnQQicUWU-yF9LgvqDK-83xImzGlZOkjPoov8_I,4022
16
15
  cicada/pr_finder.py,sha256=FPSaGe5W4RwPi93VmyoIWcUZIaHLZdHsT7s_WCIvHBM,14214
17
- cicada/setup.py,sha256=n9hFlK4LmPG7ivCvnburXvD-7sWwZjCvz6sdWRD_d_0,9166
16
+ cicada/setup.py,sha256=Ru52J_ZRx09GUXrTJRuOWZI85k6wSaqOuRpmSWSJvE0,9773
18
17
  cicada/version_check.py,sha256=c8BFl--ohKfLZYe_3tX40rKXydTR6FVGWiseGuIvcBk,3181
19
18
  cicada/extractors/__init__.py,sha256=Dnm_jjWMGPvaGmt1aZqcgpS964tak4hys5BFOjbCcg8,890
20
19
  cicada/extractors/base.py,sha256=reenF-Cngpg1LgueWsddYzGcmtHElSuNv1F5OlZRFpI,2487
@@ -40,9 +39,9 @@ cicada/utils/signature_builder.py,sha256=O76JfypSESNncQ_OppCAR7aUDz4ocBNPXEmI9uh
40
39
  cicada/utils/storage.py,sha256=wbw_Ma77v4uevDGTQP06Eu4m5V8IU6GkKUARYWXgj1A,2578
41
40
  cicada/utils/subprocess_runner.py,sha256=fibqu_YCCmQPvtTwaDkGkVyhGVSQ6oX235pBYavQW5M,5168
42
41
  cicada/utils/text_utils.py,sha256=_lt_65BcAVZa36QrTY84GR8v5m5oxvfPY3tr6PoNaxw,2923
43
- cicada_mcp-0.1.4.dist-info/licenses/LICENSE,sha256=ijMI5EAN1o3jl676-BOu0ELzlsBr2FqTRzmha9e1lug,1062
44
- cicada_mcp-0.1.4.dist-info/METADATA,sha256=pj5-4L2Bz3xn6w6r7HJTRwgPCTa-KiLNOUrTPREDLF0,18931
45
- cicada_mcp-0.1.4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
46
- cicada_mcp-0.1.4.dist-info/entry_points.txt,sha256=DFW2H5na_prQFHRcgcDOkziQCpzykOZuHO5cztoMA2Y,281
47
- cicada_mcp-0.1.4.dist-info/top_level.txt,sha256=xZCtaMDbCi2CKA5PExum99ZU54IJg5iognV-k44a1W0,7
48
- cicada_mcp-0.1.4.dist-info/RECORD,,
42
+ cicada_mcp-0.1.7.dist-info/licenses/LICENSE,sha256=ijMI5EAN1o3jl676-BOu0ELzlsBr2FqTRzmha9e1lug,1062
43
+ cicada_mcp-0.1.7.dist-info/METADATA,sha256=ydnq27gKEEAabFUHZ4yXnkKZNyNYfTG3oUlmC_Jy7Pg,19952
44
+ cicada_mcp-0.1.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
45
+ cicada_mcp-0.1.7.dist-info/entry_points.txt,sha256=cwG5-3TwDGwFPiKiOC5gjlfvi9mBFc01EVtX2ZcXpcQ,317
46
+ cicada_mcp-0.1.7.dist-info/top_level.txt,sha256=xZCtaMDbCi2CKA5PExum99ZU54IJg5iognV-k44a1W0,7
47
+ cicada_mcp-0.1.7.dist-info/RECORD,,
@@ -5,4 +5,5 @@ cicada-find-dead-code = cicada.find_dead_code:main
5
5
  cicada-index = cicada.indexer:main
6
6
  cicada-index-pr = cicada.pr_indexer:main
7
7
  cicada-install = cicada.install:main
8
+ cicada-mcp = cicada.mcp_server:main
8
9
  cicada-server = cicada.mcp_server:main
@@ -1,364 +0,0 @@
1
- """
2
- Keyword Extraction using spaCy
3
- Advanced NLP-based keyword extraction for programming documentation
4
-
5
- DEPRECATED: This module is being replaced by lightweight_keyword_extractor.py
6
- which provides faster performance using lemminflect instead of spaCy.
7
-
8
- The spaCy-based extractor has been kept for backward compatibility and for
9
- cases where advanced NLP features are needed. For most use cases, prefer
10
- LightweightKeywordExtractor from cicada.lightweight_keyword_extractor.
11
-
12
- Performance comparison:
13
- - LightweightKeywordExtractor: ~0.1s startup time
14
- - KeywordExtractor (spaCy): ~2s startup time
15
-
16
- See: cicada.lightweight_keyword_extractor.LightweightKeywordExtractor
17
- """
18
-
19
- from collections import Counter
20
- import re
21
- import sys
22
- import subprocess
23
-
24
- from cicada.utils import split_camel_snake_case
25
-
26
- # Lazy import spacy only when needed
27
- spacy = None
28
-
29
-
30
- def _ensure_spacy_imported():
31
- """Import spacy only when needed."""
32
- global spacy
33
- if spacy is None:
34
- import spacy as spacy_module
35
-
36
- spacy = spacy_module
37
-
38
-
39
- class KeywordExtractor:
40
- """Extract keywords from text using spaCy NLP."""
41
-
42
- # spaCy model names for different sizes
43
- SPACY_MODELS = {
44
- "small": "en_core_web_sm",
45
- "medium": "en_core_web_md",
46
- "large": "en_core_web_lg",
47
- }
48
-
49
- def __init__(self, verbose: bool = False, model_size: str = "small"):
50
- """
51
- Initialize keyword extractor with lazy model loading.
52
-
53
- Args:
54
- verbose: If True, print status messages during initialization
55
- model_size: Size of spaCy model to use ('small', 'medium', or 'large')
56
- Default is 'small'. Medium and large models provide better
57
- accuracy but are slower and require more memory.
58
- """
59
- self.verbose = verbose
60
-
61
- # Validate model size
62
- if model_size not in self.SPACY_MODELS:
63
- raise ValueError(
64
- f"Invalid model size '{model_size}'. "
65
- f"Must be one of: {', '.join(self.SPACY_MODELS.keys())}"
66
- )
67
-
68
- self.model_size = model_size
69
- self.model_name = self.SPACY_MODELS[model_size]
70
- self.nlp = None # Lazy-loaded on first use
71
-
72
- def _ensure_model_loaded(self):
73
- """
74
- Ensure the spaCy model is loaded, downloading if necessary.
75
- Only called when model is actually needed (lazy loading).
76
- """
77
- if self.nlp is not None:
78
- return # Already loaded
79
-
80
- # Ensure spacy is imported
81
- _ensure_spacy_imported()
82
-
83
- if self.verbose:
84
- print(f"Loading spaCy model ({self.model_size})...", file=sys.stderr)
85
-
86
- try:
87
- # Import the model directly as a Python package (fast failure if not installed)
88
- import importlib
89
-
90
- model_module = importlib.import_module(self.model_name)
91
- self.nlp = model_module.load()
92
- if self.verbose:
93
- print("✓ Model loaded successfully", file=sys.stderr)
94
- except (ImportError, AttributeError):
95
- # Model not installed, download it
96
- if self.verbose:
97
- print(
98
- f"Model '{self.model_name}' not found. Downloading...",
99
- file=sys.stderr,
100
- )
101
-
102
- if not self._download_model():
103
- raise RuntimeError(
104
- f"Failed to download spaCy model '{self.model_name}'. "
105
- f"Please install it manually with: python -m spacy download {self.model_name}"
106
- )
107
-
108
- # Try importing again after download
109
- try:
110
- import importlib
111
-
112
- model_module = importlib.import_module(self.model_name)
113
- self.nlp = model_module.load()
114
- if self.verbose:
115
- print("✓ Model loaded successfully", file=sys.stderr)
116
- except (ImportError, AttributeError) as e:
117
- raise RuntimeError(
118
- f"Failed to load spaCy model '{self.model_name}' after download. "
119
- f"Please try installing it manually: python -m spacy download {self.model_name}"
120
- ) from e
121
-
122
- def _download_model(self) -> bool:
123
- """
124
- Download the spaCy model using uv pip install.
125
-
126
- Returns:
127
- True if download succeeded, False otherwise
128
- """
129
- # Model URLs for direct installation
130
- model_urls = {
131
- "en_core_web_sm": "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
132
- "en_core_web_md": "https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl",
133
- "en_core_web_lg": "https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.8.0/en_core_web_lg-3.8.0-py3-none-any.whl",
134
- }
135
-
136
- if self.model_name not in model_urls:
137
- if self.verbose:
138
- print(f"Unknown model: {self.model_name}", file=sys.stderr)
139
- return False
140
-
141
- model_url = model_urls[self.model_name]
142
-
143
- # Use uv pip install (works in uv-managed environments)
144
- try:
145
- if self.verbose:
146
- print(f"Running: uv pip install {model_url}", file=sys.stderr)
147
-
148
- result = subprocess.run(
149
- ["uv", "pip", "install", model_url],
150
- capture_output=True,
151
- text=True,
152
- check=True,
153
- )
154
-
155
- if self.verbose and result.stdout:
156
- print(result.stdout, file=sys.stderr)
157
-
158
- return True
159
- except FileNotFoundError:
160
- if self.verbose:
161
- print(
162
- "uv not found. Please install uv or manually install the model:",
163
- file=sys.stderr,
164
- )
165
- print(f" uv pip install {model_url}", file=sys.stderr)
166
- return False
167
- except subprocess.CalledProcessError as e:
168
- if self.verbose:
169
- print(f"uv pip install failed: {e.stderr}", file=sys.stderr)
170
- return False
171
- except Exception as e:
172
- if self.verbose:
173
- print(f"Unexpected error during download: {e}", file=sys.stderr)
174
- return False
175
-
176
- def extract_code_identifiers(self, text):
177
- """
178
- Extract code-specific identifiers and their split words.
179
-
180
- Returns a tuple of (identifiers, split_words) where:
181
- - identifiers: original camelCase/PascalCase/snake_case identifiers
182
- - split_words: individual words extracted from those identifiers
183
- """
184
- # Match camelCase, snake_case, PascalCase, and mixed patterns
185
- patterns = [
186
- r"\b[a-z]+[A-Z][a-zA-Z]*\b", # camelCase (e.g., getUserData)
187
- r"\b[A-Z]{2,}[a-z]+[a-zA-Z]*\b", # Uppercase prefix + PascalCase (e.g., HTTPServer, XMLParser)
188
- r"\b[A-Z][a-z]+[A-Z][a-zA-Z]*\b", # PascalCase (e.g., UserController, PostgreSQL)
189
- r"\b[a-z]+_[a-z_]+\b", # snake_case (e.g., get_user_data)
190
- r"\b[A-Z]{2,}\b", # All UPPERCASE (e.g., HTTP, API, SQL)
191
- ]
192
-
193
- identifiers = []
194
- for pattern in patterns:
195
- matches = re.findall(pattern, text)
196
- identifiers.extend(matches)
197
-
198
- identifiers = list(set(identifiers))
199
-
200
- # Split identifiers into individual words
201
- split_words = []
202
- for identifier in identifiers:
203
- split_text = split_camel_snake_case(identifier)
204
- # Extract individual words (lowercase, length > 1)
205
- words = [
206
- word.lower()
207
- for word in split_text.split()
208
- if len(word) > 1 and word.isalpha()
209
- ]
210
- split_words.extend(words)
211
-
212
- return identifiers, list(set(split_words))
213
-
214
- def extract_keywords_simple(self, text: str, top_n: int = 10) -> list[str]:
215
- """
216
- Extract keywords and return a simple list of keyword strings.
217
-
218
- Args:
219
- text: Input text to analyze
220
- top_n: Number of top keywords to return
221
-
222
- Returns:
223
- List of keyword strings (e.g., ['authentication', 'user', 'validate'])
224
- """
225
- if not text or not text.strip():
226
- return []
227
-
228
- try:
229
- self._ensure_model_loaded()
230
- results = self.extract_keywords(text, top_n=top_n)
231
- # Extract just the keyword strings from top_keywords tuples
232
- return [keyword for keyword, _ in results["top_keywords"]]
233
- except Exception as e:
234
- if self.verbose:
235
- print(f"Warning: Keyword extraction failed: {e}", file=sys.stderr)
236
- return []
237
-
238
- def extract_keywords(self, text, top_n=15):
239
- """
240
- Extract keywords using multiple strategies with emphasis on code identifiers.
241
-
242
- Weighting strategy:
243
- - Full code identifiers (e.g., getUserData, snake_case): 10x weight (exact match priority)
244
- - Code split words (e.g., get, user, data): 3x weight (fuzzy match support)
245
- - Regular words (nouns, verbs): 1x weight
246
-
247
- Args:
248
- text: Input text to analyze
249
- top_n: Number of top keywords to return
250
-
251
- Returns:
252
- Dictionary with extracted keywords and analysis:
253
- - top_keywords: List of (keyword, count) tuples, sorted by frequency
254
- - code_identifiers: Original identifiers (weighted 10x)
255
- - code_split_words: Words extracted from identifiers (weighted 3x)
256
- - nouns, verbs, adjectives: Linguistic categories
257
- - entities: Named entities found
258
- - tf_scores: Term frequency scores
259
- - stats: Text statistics
260
- """
261
- if not text or not text.strip():
262
- return {
263
- "top_keywords": [],
264
- "nouns": [],
265
- "verbs": [],
266
- "adjectives": [],
267
- "proper_nouns": [],
268
- "noun_chunks": [],
269
- "entities": [],
270
- "code_identifiers": [],
271
- "tf_scores": {},
272
- "stats": {
273
- "total_tokens": 0,
274
- "total_words": 0,
275
- "unique_words": 0,
276
- "sentences": 0,
277
- },
278
- }
279
- # Ensure model is loaded (lazy loading on first use)
280
- self._ensure_model_loaded()
281
-
282
- # Process with spaCy
283
- doc = self.nlp(text)
284
-
285
- # 1. Extract nouns (concepts)
286
- nouns = [
287
- token.lemma_.lower()
288
- for token in doc
289
- if token.pos_ == "NOUN" and not token.is_stop and len(token.text) > 2
290
- ]
291
-
292
- # 2. Extract verbs (actions)
293
- verbs = [
294
- token.lemma_.lower()
295
- for token in doc
296
- if token.pos_ == "VERB" and not token.is_stop and len(token.text) > 2
297
- ]
298
-
299
- # 3. Extract adjectives (descriptors)
300
- adjectives = [
301
- token.lemma_.lower()
302
- for token in doc
303
- if token.pos_ == "ADJ" and not token.is_stop
304
- ]
305
-
306
- # 4. Extract proper nouns (named entities, technologies)
307
- proper_nouns = [token.text for token in doc if token.pos_ == "PROPN"]
308
-
309
- # 5. Extract noun chunks (multi-word concepts)
310
- noun_chunks = [
311
- chunk.text.lower()
312
- for chunk in doc.noun_chunks
313
- if len(chunk.text.split()) > 1
314
- ]
315
-
316
- # 6. Extract named entities
317
- entities = [(ent.text, ent.label_) for ent in doc.ents]
318
-
319
- # 7. Extract code identifiers and their split words
320
- code_identifiers, code_split_words = self.extract_code_identifiers(text)
321
-
322
- # 8. Calculate keyword frequency (combining nouns, verbs, proper nouns, identifiers, and split code words)
323
- # Give full code identifiers 10x weight for exact matching
324
- # Give code split words 3x weight for fuzzy matching
325
- code_identifiers_lower = [ident.lower() for ident in code_identifiers]
326
- all_keywords = (
327
- nouns
328
- + verbs
329
- + proper_nouns
330
- + (code_identifiers_lower * 10)
331
- + (code_split_words * 3)
332
- )
333
- keyword_freq = Counter(all_keywords)
334
- top_keywords = keyword_freq.most_common(top_n)
335
-
336
- # 9. Calculate TF scores (simple version)
337
- total_words = len(
338
- [token for token in doc if not token.is_stop and not token.is_punct]
339
- )
340
- tf_scores = {word: (freq / total_words) for word, freq in keyword_freq.items()}
341
-
342
- # Statistics
343
- stats = {
344
- "total_tokens": len(doc),
345
- "total_words": total_words,
346
- "unique_words": len(set([t.text.lower() for t in doc if not t.is_punct])),
347
- "sentences": len(list(doc.sents)),
348
- }
349
-
350
- return {
351
- "top_keywords": top_keywords,
352
- "nouns": list(set(nouns))[:20],
353
- "verbs": list(set(verbs))[:20],
354
- "adjectives": list(set(adjectives))[:15],
355
- "proper_nouns": list(set(proper_nouns)),
356
- "noun_chunks": list(set(noun_chunks))[:15],
357
- "entities": entities,
358
- "code_identifiers": code_identifiers,
359
- "code_split_words": code_split_words,
360
- "tf_scores": dict(
361
- sorted(tf_scores.items(), key=lambda x: x[1], reverse=True)[:10]
362
- ),
363
- "stats": stats,
364
- }