cicada-mcp 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. cicada/ascii_art.py +60 -0
  2. cicada/clean.py +195 -60
  3. cicada/cli.py +757 -0
  4. cicada/colors.py +27 -0
  5. cicada/command_logger.py +14 -16
  6. cicada/dead_code_analyzer.py +12 -19
  7. cicada/extractors/__init__.py +6 -6
  8. cicada/extractors/base.py +3 -3
  9. cicada/extractors/call.py +11 -15
  10. cicada/extractors/dependency.py +39 -51
  11. cicada/extractors/doc.py +8 -9
  12. cicada/extractors/function.py +12 -24
  13. cicada/extractors/module.py +11 -15
  14. cicada/extractors/spec.py +8 -12
  15. cicada/find_dead_code.py +15 -39
  16. cicada/formatter.py +37 -91
  17. cicada/git_helper.py +22 -34
  18. cicada/indexer.py +165 -132
  19. cicada/interactive_setup.py +490 -0
  20. cicada/keybert_extractor.py +286 -0
  21. cicada/keyword_search.py +22 -30
  22. cicada/keyword_test.py +127 -0
  23. cicada/lightweight_keyword_extractor.py +5 -13
  24. cicada/mcp_entry.py +683 -0
  25. cicada/mcp_server.py +110 -232
  26. cicada/parser.py +9 -9
  27. cicada/pr_finder.py +15 -19
  28. cicada/pr_indexer/__init__.py +3 -3
  29. cicada/pr_indexer/cli.py +4 -9
  30. cicada/pr_indexer/github_api_client.py +22 -37
  31. cicada/pr_indexer/indexer.py +17 -29
  32. cicada/pr_indexer/line_mapper.py +8 -12
  33. cicada/pr_indexer/pr_index_builder.py +22 -34
  34. cicada/setup.py +198 -89
  35. cicada/utils/__init__.py +9 -9
  36. cicada/utils/call_site_formatter.py +4 -6
  37. cicada/utils/function_grouper.py +4 -4
  38. cicada/utils/hash_utils.py +12 -15
  39. cicada/utils/index_utils.py +15 -15
  40. cicada/utils/path_utils.py +24 -29
  41. cicada/utils/signature_builder.py +3 -3
  42. cicada/utils/subprocess_runner.py +17 -19
  43. cicada/utils/text_utils.py +1 -2
  44. cicada/version_check.py +2 -5
  45. {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/METADATA +144 -55
  46. cicada_mcp-0.2.0.dist-info/RECORD +53 -0
  47. cicada_mcp-0.2.0.dist-info/entry_points.txt +4 -0
  48. cicada/install.py +0 -741
  49. cicada_mcp-0.1.5.dist-info/RECORD +0 -47
  50. cicada_mcp-0.1.5.dist-info/entry_points.txt +0 -9
  51. {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/WHEEL +0 -0
  52. {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/licenses/LICENSE +0 -0
  53. {cicada_mcp-0.1.5.dist-info → cicada_mcp-0.2.0.dist-info}/top_level.txt +0 -0
cicada/indexer.py CHANGED
@@ -10,19 +10,56 @@ import signal
10
10
  import sys
11
11
  from datetime import datetime
12
12
  from pathlib import Path
13
+
13
14
  from cicada.parser import ElixirParser
14
15
  from cicada.utils import (
15
- save_index,
16
16
  load_index,
17
17
  merge_indexes_incremental,
18
+ save_index,
18
19
  validate_index_structure,
19
20
  )
20
21
  from cicada.utils.hash_utils import (
22
+ compute_hashes_for_files,
23
+ detect_file_changes,
21
24
  load_file_hashes,
22
25
  save_file_hashes,
23
- detect_file_changes,
24
- compute_hashes_for_files,
25
26
  )
27
+ from cicada.utils.storage import get_config_path
28
+
29
+
30
+ def read_keyword_extraction_config(repo_path: Path) -> tuple[str, str]:
31
+ """
32
+ Read keyword extraction configuration from config.yaml.
33
+
34
+ Args:
35
+ repo_path: Path to the repository
36
+
37
+ Returns:
38
+ tuple[str, str]: (method, tier) where method is 'lemminflect' or 'bert',
39
+ and tier is 'fast', 'regular', or 'max'.
40
+ Returns ('lemminflect', 'regular') as default if config not found.
41
+ """
42
+ try:
43
+ import yaml
44
+
45
+ config_path = get_config_path(repo_path)
46
+ if not config_path.exists():
47
+ # Default to lemminflect if config doesn't exist
48
+ return ("lemminflect", "regular")
49
+
50
+ with open(config_path) as f:
51
+ config = yaml.safe_load(f)
52
+
53
+ if config and "keyword_extraction" in config:
54
+ method = config["keyword_extraction"].get("method", "lemminflect")
55
+ tier = config["keyword_extraction"].get("tier", "regular")
56
+ return (method, tier)
57
+
58
+ # Default to lemminflect if keyword_extraction section not found
59
+ return ("lemminflect", "regular")
60
+ except Exception:
61
+ # If anything goes wrong, default to lemminflect
62
+ return ("lemminflect", "regular")
26
63
 
27
64
 
28
65
  class ElixirIndexer:
@@ -45,20 +82,16 @@ class ElixirIndexer:
45
82
  }
46
83
  self._interrupted = False
47
84
 
48
- def _handle_interrupt(self, signum, frame):
85
+ def _handle_interrupt(self, _signum, _frame):
49
86
  """Handle interrupt signals (Ctrl-C, SIGTERM) gracefully."""
50
- print(
51
- "\n\n⚠️ Interrupt received. Finishing current file and saving progress..."
52
- )
87
+ print("\n\n⚠️ Interrupt received. Finishing current file and saving progress...")
53
88
  print(" Press Ctrl-C again to force quit (may lose progress)\n")
54
89
  self._interrupted = True
55
90
  # Restore default handler so second Ctrl-C will kill immediately
56
91
  signal.signal(signal.SIGINT, signal.SIG_DFL)
57
92
  signal.signal(signal.SIGTERM, signal.SIG_DFL)
58
93
 
59
- def _check_and_report_interruption(
60
- self, files_processed: int, total_files: int
61
- ) -> bool:
94
+ def _check_and_report_interruption(self, files_processed: int, total_files: int) -> bool:
62
95
  """
63
96
  Check if interrupted and report status.
64
97
 
@@ -70,9 +103,7 @@ class ElixirIndexer:
70
103
  True if interrupted, False otherwise
71
104
  """
72
105
  if self._interrupted:
73
- print(
74
- f"\n⚠️ Interrupted after processing {files_processed}/{total_files} files"
75
- )
106
+ print(f"\n⚠️ Interrupted after processing {files_processed}/{total_files} files")
76
107
  print(" Saving partial progress...")
77
108
  return True
78
109
  return False
@@ -80,9 +111,8 @@ class ElixirIndexer:
80
111
  def index_repository(
81
112
  self,
82
113
  repo_path: str,
83
- output_path: str = ".cicada/index.json",
114
+ output_path: str,
84
115
  extract_keywords: bool = False,
85
- spacy_model: str = "small",
86
116
  ):
87
117
  """
88
118
  Index an Elixir repository.
@@ -91,8 +121,6 @@ class ElixirIndexer:
91
121
  repo_path: Path to the Elixir repository root
92
122
  output_path: Path where the index JSON file will be saved
93
123
  extract_keywords: If True, extract keywords from documentation using NLP
94
- spacy_model: Size of spaCy model to use for keyword extraction
95
- ('small', 'medium', or 'large'). Default is 'small'.
96
124
 
97
125
  Returns:
98
126
  Dictionary containing the index data
@@ -102,7 +130,12 @@ class ElixirIndexer:
102
130
  if not repo_path_obj.exists():
103
131
  raise ValueError(f"Repository path does not exist: {repo_path_obj}")
104
132
 
105
- print(f"Indexing repository: {repo_path_obj}")
133
+ if self.verbose:
134
+ print(f"Indexing repository: {repo_path_obj}")
135
+ if extract_keywords:
136
+ # Read and display keyword extraction config
137
+ method, tier = read_keyword_extraction_config(repo_path_obj)
138
+ print(f"Keyword extraction: {method.upper()} ({tier})")
106
139
 
107
140
  # Set up signal handlers for graceful interruption
108
141
  signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -113,25 +146,33 @@ class ElixirIndexer:
113
146
  keyword_extractor = None
114
147
  if extract_keywords:
115
148
  try:
116
- from cicada.lightweight_keyword_extractor import (
117
- LightweightKeywordExtractor,
118
- )
149
+ # Read keyword extraction config from config.yaml
150
+ method, tier = read_keyword_extraction_config(repo_path_obj)
119
151
 
120
- keyword_extractor = LightweightKeywordExtractor(
121
- verbose=True, model_size=spacy_model
122
- )
152
+ if method == "bert":
153
+ # Initialize KeyBERT extractor
154
+ from cicada.keybert_extractor import KeyBERTExtractor
155
+
156
+ keyword_extractor = KeyBERTExtractor(model_tier=tier, verbose=self.verbose)
157
+ else:
158
+ # Initialize lemminflect extractor (default)
159
+ from cicada.lightweight_keyword_extractor import (
160
+ LightweightKeywordExtractor,
161
+ )
162
+
163
+ keyword_extractor = LightweightKeywordExtractor(verbose=self.verbose)
123
164
  except Exception as e:
124
- print(f"Warning: Could not initialize keyword extractor: {e}")
125
- print("Continuing without keyword extraction...")
165
+ if self.verbose:
166
+ print(f"Warning: Could not initialize keyword extractor: {e}")
167
+ print("Continuing without keyword extraction...")
126
168
  extract_keywords = False
127
169
 
128
170
  # Find all Elixir files
129
171
  elixir_files = self._find_elixir_files(repo_path_obj)
130
172
  total_files = len(elixir_files)
131
173
 
132
- print(f"Found {total_files} Elixir files")
133
- if extract_keywords:
134
- print("Keyword extraction enabled")
174
+ if self.verbose:
175
+ print(f"Found {total_files} Elixir files")
135
176
 
136
177
  # Parse all files
137
178
  all_modules = {}
@@ -156,10 +197,8 @@ class ElixirIndexer:
156
197
  module_keywords = None
157
198
  if keyword_extractor and module_data.get("moduledoc"):
158
199
  try:
159
- module_keywords = (
160
- keyword_extractor.extract_keywords_simple(
161
- module_data["moduledoc"], top_n=10
162
- )
200
+ module_keywords = keyword_extractor.extract_keywords_simple(
201
+ module_data["moduledoc"], top_n=10
163
202
  )
164
203
  except Exception as e:
165
204
  keyword_extraction_failures += 1
@@ -178,10 +217,8 @@ class ElixirIndexer:
178
217
  # Include function name in text for keyword extraction
179
218
  # This ensures the function name identifier gets 10x weight
180
219
  text_for_keywords = f"{func_name} {func['doc']}"
181
- func_keywords = (
182
- keyword_extractor.extract_keywords_simple(
183
- text_for_keywords, top_n=10
184
- )
220
+ func_keywords = keyword_extractor.extract_keywords_simple(
221
+ text_for_keywords, top_n=10
185
222
  )
186
223
  if func_keywords:
187
224
  func["keywords"] = func_keywords
@@ -222,7 +259,7 @@ class ElixirIndexer:
222
259
  files_processed += 1
223
260
 
224
261
  # Progress reporting
225
- if files_processed % self.PROGRESS_REPORT_INTERVAL == 0:
262
+ if self.verbose and files_processed % self.PROGRESS_REPORT_INTERVAL == 0:
226
263
  print(f" Processed {files_processed}/{total_files} files...")
227
264
 
228
265
  # Check for interruption after each file
@@ -230,7 +267,8 @@ class ElixirIndexer:
230
267
  break
231
268
 
232
269
  except Exception as e:
233
- print(f" Skipping {file_path}: {e}")
270
+ if self.verbose:
271
+ print(f" Skipping {file_path}: {e}")
234
272
  # Check for interruption even after error
235
273
  if self._check_and_report_interruption(files_processed, total_files):
236
274
  break
@@ -257,54 +295,59 @@ class ElixirIndexer:
257
295
  if is_first_run:
258
296
  from cicada.utils.path_utils import ensure_gitignore_has_cicada
259
297
 
260
- if ensure_gitignore_has_cicada(repo_path_obj):
298
+ if ensure_gitignore_has_cicada(repo_path_obj) and self.verbose:
261
299
  print("✓ Added .cicada/ to .gitignore")
262
300
 
263
301
  save_index(index, output_path_obj, create_dirs=True)
264
302
 
265
303
  # Compute and save hashes for all PROCESSED files for future incremental updates
266
- print("Computing file hashes for incremental updates...")
304
+ if self.verbose:
305
+ print("Computing file hashes for incremental updates...")
267
306
  # Only hash files that were actually processed
268
307
  processed_files = [
269
308
  str(f.relative_to(repo_path_obj)) for f in elixir_files[:files_processed]
270
309
  ]
271
310
  file_hashes = compute_hashes_for_files(processed_files, str(repo_path_obj))
272
- save_file_hashes(str(output_path_obj.parent), file_hashes)
311
+ # Save hashes to centralized storage directory
312
+ from cicada.utils import get_storage_dir
313
+
314
+ storage_dir = get_storage_dir(repo_path_obj)
315
+ save_file_hashes(str(storage_dir), file_hashes)
273
316
 
274
317
  # Report completion status
275
- if self._interrupted:
276
- print(f"\n✓ Partial index saved!")
277
- print(
278
- f" Processed: {files_processed}/{total_files} files ({files_processed/total_files*100:.1f}%)"
279
- )
280
- print(f" Modules: {len(all_modules)}")
281
- print(f" Functions: {total_functions}")
282
- print(
283
- f"\n💡 Run the command again to continue indexing remaining {total_files - files_processed} file(s)"
284
- )
285
- else:
286
- print(f"\nIndexing complete!")
287
- print(f" Modules: {len(all_modules)}")
288
- print(f" Functions: {total_functions}")
318
+ if self.verbose:
319
+ if self._interrupted:
320
+ print("\n✓ Partial index saved!")
321
+ print(
322
+ f" Processed: {files_processed}/{total_files} files ({files_processed/total_files*100:.1f}%)"
323
+ )
324
+ print(f" Modules: {len(all_modules)}")
325
+ print(f" Functions: {total_functions}")
326
+ print(
327
+ f"\n💡 Run the command again to continue indexing remaining {total_files - files_processed} file(s)"
328
+ )
329
+ else:
330
+ print("\nIndexing complete!")
331
+ print(f" Modules: {len(all_modules)}")
332
+ print(f" Functions: {total_functions}")
289
333
 
290
- # Report keyword extraction failures if any
291
- if extract_keywords and keyword_extraction_failures > 0:
292
- print(
293
- f"\n⚠️ Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
294
- )
295
- print(" Some documentation may not be indexed for keyword search.")
334
+ # Report keyword extraction failures if any
335
+ if extract_keywords and keyword_extraction_failures > 0:
336
+ print(
337
+ f"\n⚠️ Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
338
+ )
339
+ print(" Some documentation may not be indexed for keyword search.")
296
340
 
297
- print(f"\nIndex saved to: {output_path_obj}")
298
- print(f"Hashes saved to: {output_path_obj.parent}/hashes.json")
341
+ print(f"\nIndex saved to: {output_path_obj}")
342
+ print(f"Hashes saved to: {output_path_obj.parent}/hashes.json")
299
343
 
300
344
  return index
301
345
 
302
346
  def incremental_index_repository(
303
347
  self,
304
348
  repo_path: str,
305
- output_path: str = ".cicada/index.json",
349
+ output_path: str,
306
350
  extract_keywords: bool = False,
307
- spacy_model: str = "small",
308
351
  force_full: bool = False,
309
352
  ):
310
353
  """
@@ -318,7 +361,6 @@ class ElixirIndexer:
318
361
  repo_path: Path to the Elixir repository root
319
362
  output_path: Path where the index JSON file will be saved
320
363
  extract_keywords: If True, extract keywords from documentation using NLP
321
- spacy_model: Size of spaCy model to use for keyword extraction
322
364
  force_full: If True, ignore existing hashes and do full reindex
323
365
 
324
366
  Returns:
@@ -326,32 +368,39 @@ class ElixirIndexer:
326
368
  """
327
369
  repo_path_obj = Path(repo_path).resolve()
328
370
  output_path_obj = Path(output_path)
329
- cicada_dir = output_path_obj.parent
371
+ # Use centralized storage directory for hashes
372
+ from cicada.utils import get_storage_dir
373
+
374
+ storage_dir = get_storage_dir(repo_path_obj)
330
375
 
331
376
  if not repo_path_obj.exists():
332
377
  raise ValueError(f"Repository path does not exist: {repo_path_obj}")
333
378
 
334
379
  # Load existing index and hashes
335
380
  existing_index = load_index(output_path_obj) if not force_full else None
336
- existing_hashes = load_file_hashes(str(cicada_dir)) if not force_full else {}
381
+ existing_hashes = load_file_hashes(str(storage_dir)) if not force_full else {}
337
382
 
338
383
  # Validate existing index structure if loaded
339
384
  if existing_index:
340
385
  is_valid, error = validate_index_structure(existing_index)
341
386
  if not is_valid:
342
- print(
343
- f"Warning: Existing index is corrupted ({error}). Performing full reindex..."
344
- )
387
+ if self.verbose:
388
+ print(
389
+ f"Warning: Existing index is corrupted ({error}). Performing full reindex..."
390
+ )
345
391
  existing_index = None
346
392
 
347
393
  # If no existing data, do full index
348
394
  if not existing_index or not existing_hashes:
349
- print("No existing index or hashes found. Performing full index...")
350
- return self.index_repository(
351
- str(repo_path_obj), str(output_path_obj), extract_keywords, spacy_model
352
- )
395
+ if self.verbose:
396
+ print("No existing index or hashes found. Performing full index...")
397
+ return self.index_repository(str(repo_path_obj), str(output_path_obj), extract_keywords)
353
398
 
354
- print(f"Performing incremental index of: {repo_path_obj}")
399
+ if self.verbose:
400
+ # Read and display keyword extraction config
401
+ method, tier = read_keyword_extraction_config(repo_path_obj)
402
+ print(f"Performing incremental index of: {repo_path_obj}")
403
+ print(f"Keyword extraction: {method.upper()} ({tier})")
355
404
 
356
405
  # Set up signal handlers for graceful interruption
357
406
  signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -364,7 +413,8 @@ class ElixirIndexer:
364
413
  relative_files = [str(f.relative_to(repo_path_obj)) for f in elixir_files]
365
414
 
366
415
  # Detect file changes
367
- print("Detecting file changes...")
416
+ if self.verbose:
417
+ print("Detecting file changes...")
368
418
  new_files, modified_files, deleted_files = detect_file_changes(
369
419
  relative_files, existing_hashes, str(repo_path_obj)
370
420
  )
@@ -377,10 +427,11 @@ class ElixirIndexer:
377
427
  print("No changes detected. Index is up to date.")
378
428
  return existing_index
379
429
 
380
- print(f"Changes detected:")
381
- print(f" New files: {len(new_files)}")
382
- print(f" Modified files: {len(modified_files)}")
383
- print(f" Deleted files: {len(deleted_files)}")
430
+ if self.verbose:
431
+ print("Changes detected:")
432
+ print(f" New files: {len(new_files)}")
433
+ print(f" Modified files: {len(modified_files)}")
434
+ print(f" Deleted files: {len(deleted_files)}")
384
435
 
385
436
  if files_to_process:
386
437
  print(f"\nProcessing {len(files_to_process)} changed file(s)...")
@@ -389,13 +440,21 @@ class ElixirIndexer:
389
440
  keyword_extractor = None
390
441
  if extract_keywords:
391
442
  try:
392
- from cicada.lightweight_keyword_extractor import (
393
- LightweightKeywordExtractor,
394
- )
443
+ # Read keyword extraction config from config.yaml
444
+ method, tier = read_keyword_extraction_config(repo_path_obj)
395
445
 
396
- keyword_extractor = LightweightKeywordExtractor(
397
- verbose=True, model_size=spacy_model
398
- )
446
+ if method == "bert":
447
+ # Initialize KeyBERT extractor
448
+ from cicada.keybert_extractor import KeyBERTExtractor
449
+
450
+ keyword_extractor = KeyBERTExtractor(model_tier=tier, verbose=self.verbose)
451
+ else:
452
+ # Initialize lemminflect extractor (default)
453
+ from cicada.lightweight_keyword_extractor import (
454
+ LightweightKeywordExtractor,
455
+ )
456
+
457
+ keyword_extractor = LightweightKeywordExtractor(verbose=self.verbose)
399
458
  except Exception as e:
400
459
  print(f"Warning: Could not initialize keyword extractor: {e}")
401
460
  print("Continuing without keyword extraction...")
@@ -425,12 +484,10 @@ class ElixirIndexer:
425
484
  module_keywords = None
426
485
  if keyword_extractor and module_data.get("moduledoc"):
427
486
  try:
428
- module_keywords = (
429
- keyword_extractor.extract_keywords_simple(
430
- module_data["moduledoc"], top_n=10
431
- )
487
+ module_keywords = keyword_extractor.extract_keywords_simple(
488
+ module_data["moduledoc"], top_n=10
432
489
  )
433
- except Exception as e:
490
+ except Exception:
434
491
  keyword_extraction_failures += 1
435
492
 
436
493
  # Extract keywords from function docs
@@ -440,14 +497,12 @@ class ElixirIndexer:
440
497
  try:
441
498
  func_name = func.get("name", "")
442
499
  text_for_keywords = f"{func_name} {func['doc']}"
443
- func_keywords = (
444
- keyword_extractor.extract_keywords_simple(
445
- text_for_keywords, top_n=10
446
- )
500
+ func_keywords = keyword_extractor.extract_keywords_simple(
501
+ text_for_keywords, top_n=10
447
502
  )
448
503
  if func_keywords:
449
504
  func["keywords"] = func_keywords
450
- except Exception as e:
505
+ except Exception:
451
506
  keyword_extraction_failures += 1
452
507
 
453
508
  # Store module info
@@ -478,17 +533,13 @@ class ElixirIndexer:
478
533
  files_processed += 1
479
534
 
480
535
  # Check for interruption after each file
481
- if self._check_and_report_interruption(
482
- files_processed, len(files_to_process)
483
- ):
536
+ if self._check_and_report_interruption(files_processed, len(files_to_process)):
484
537
  break
485
538
 
486
539
  except Exception as e:
487
540
  print(f" Skipping {file_path}: {e}")
488
541
  # Check for interruption even after error
489
- if self._check_and_report_interruption(
490
- files_processed, len(files_to_process)
491
- ):
542
+ if self._check_and_report_interruption(files_processed, len(files_to_process)):
492
543
  break
493
544
  continue
494
545
 
@@ -502,13 +553,13 @@ class ElixirIndexer:
502
553
  }
503
554
 
504
555
  # Merge with existing index
505
- print("\nMerging with existing index...")
506
- merged_index = merge_indexes_incremental(
507
- existing_index, new_index, deleted_files
508
- )
556
+ if self.verbose:
557
+ print("\nMerging with existing index...")
558
+ merged_index = merge_indexes_incremental(existing_index, new_index, deleted_files)
509
559
 
510
560
  # Update hashes for all current files
511
- print("Updating file hashes...")
561
+ if self.verbose:
562
+ print("Updating file hashes...")
512
563
  updated_hashes = dict(existing_hashes)
513
564
 
514
565
  # Compute hashes only for files that were actually processed
@@ -522,15 +573,13 @@ class ElixirIndexer:
522
573
 
523
574
  # Save index and hashes
524
575
  save_index(merged_index, output_path_obj, create_dirs=True)
525
- save_file_hashes(str(cicada_dir), updated_hashes)
576
+ save_file_hashes(str(storage_dir), updated_hashes)
526
577
 
527
578
  # Report completion status
528
579
  if self._interrupted:
529
580
  remaining = len(files_to_process) - files_processed
530
- print(f"\n✓ Partial index saved!")
531
- print(
532
- f" Processed: {files_processed}/{len(files_to_process)} changed file(s)"
533
- )
581
+ print("\n✓ Partial index saved!")
582
+ print(f" Processed: {files_processed}/{len(files_to_process)} changed file(s)")
534
583
  print(f" Total modules: {merged_index['metadata']['total_modules']}")
535
584
  print(f" Total functions: {merged_index['metadata']['total_functions']}")
536
585
  print(f" Files deleted: {len(deleted_files)}")
@@ -538,7 +587,7 @@ class ElixirIndexer:
538
587
  f"\n💡 Run the command again to continue indexing remaining {remaining} changed file(s)"
539
588
  )
540
589
  else:
541
- print(f"\nIncremental indexing complete!")
590
+ print("\nIncremental indexing complete!")
542
591
  print(f" Total modules: {merged_index['metadata']['total_modules']}")
543
592
  print(f" Total functions: {merged_index['metadata']['total_functions']}")
544
593
  print(f" Files processed: {files_processed}")
@@ -550,9 +599,6 @@ class ElixirIndexer:
550
599
  f"\n⚠️ Warning: Keyword extraction failed for {keyword_extraction_failures} module(s) or function(s)"
551
600
  )
552
601
 
553
- print(f"\nIndex saved to: {output_path_obj}")
554
- print(f"Hashes saved to: {cicada_dir}/hashes.json")
555
-
556
602
  return merged_index
557
603
 
558
604
  def _find_elixir_files(self, repo_path: Path) -> list:
@@ -593,18 +639,6 @@ def main():
593
639
  default=".cicada/index.json",
594
640
  help="Output path for the index file (default: .cicada/index.json)",
595
641
  )
596
- parser.add_argument(
597
- "--extract-keywords",
598
- action="store_true",
599
- help="Extract keywords from documentation using NLP (adds ~1-2s per 100 docs)",
600
- )
601
- parser.add_argument(
602
- "--spacy-model",
603
- choices=["small", "medium", "large"],
604
- default="small",
605
- help="Size of spaCy model to use for keyword extraction (default: small). "
606
- "Medium and large models provide better accuracy but are slower.",
607
- )
608
642
  parser.add_argument(
609
643
  "--full",
610
644
  action="store_true",
@@ -619,8 +653,7 @@ def main():
619
653
  indexer.incremental_index_repository(
620
654
  args.repo,
621
655
  args.output,
622
- extract_keywords=args.extract_keywords,
623
- spacy_model=args.spacy_model,
656
+ extract_keywords=True,
624
657
  force_full=args.full,
625
658
  )
626
659