cicada-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. cicada/_version_hash.py +4 -0
  2. cicada/cli.py +6 -748
  3. cicada/commands.py +1255 -0
  4. cicada/dead_code/__init__.py +1 -0
  5. cicada/{find_dead_code.py → dead_code/finder.py} +2 -1
  6. cicada/dependency_analyzer.py +147 -0
  7. cicada/entry_utils.py +92 -0
  8. cicada/extractors/base.py +9 -9
  9. cicada/extractors/call.py +17 -20
  10. cicada/extractors/common.py +64 -0
  11. cicada/extractors/dependency.py +117 -235
  12. cicada/extractors/doc.py +2 -49
  13. cicada/extractors/function.py +10 -14
  14. cicada/extractors/keybert.py +228 -0
  15. cicada/extractors/keyword.py +191 -0
  16. cicada/extractors/module.py +6 -10
  17. cicada/extractors/spec.py +8 -56
  18. cicada/format/__init__.py +20 -0
  19. cicada/{ascii_art.py → format/ascii_art.py} +1 -1
  20. cicada/format/formatter.py +1145 -0
  21. cicada/git_helper.py +134 -7
  22. cicada/indexer.py +322 -89
  23. cicada/interactive_setup.py +251 -323
  24. cicada/interactive_setup_helpers.py +302 -0
  25. cicada/keyword_expander.py +437 -0
  26. cicada/keyword_search.py +208 -422
  27. cicada/keyword_test.py +383 -16
  28. cicada/mcp/__init__.py +10 -0
  29. cicada/mcp/entry.py +17 -0
  30. cicada/mcp/filter_utils.py +107 -0
  31. cicada/mcp/pattern_utils.py +118 -0
  32. cicada/{mcp_server.py → mcp/server.py} +819 -73
  33. cicada/mcp/tools.py +473 -0
  34. cicada/pr_finder.py +2 -3
  35. cicada/pr_indexer/indexer.py +3 -2
  36. cicada/setup.py +167 -35
  37. cicada/tier.py +225 -0
  38. cicada/utils/__init__.py +9 -2
  39. cicada/utils/fuzzy_match.py +54 -0
  40. cicada/utils/index_utils.py +9 -0
  41. cicada/utils/path_utils.py +18 -0
  42. cicada/utils/text_utils.py +52 -1
  43. cicada/utils/tree_utils.py +47 -0
  44. cicada/version_check.py +99 -0
  45. cicada/watch_manager.py +320 -0
  46. cicada/watcher.py +431 -0
  47. cicada_mcp-0.3.0.dist-info/METADATA +541 -0
  48. cicada_mcp-0.3.0.dist-info/RECORD +70 -0
  49. cicada_mcp-0.3.0.dist-info/entry_points.txt +4 -0
  50. cicada/formatter.py +0 -864
  51. cicada/keybert_extractor.py +0 -286
  52. cicada/lightweight_keyword_extractor.py +0 -290
  53. cicada/mcp_entry.py +0 -683
  54. cicada/mcp_tools.py +0 -291
  55. cicada_mcp-0.2.0.dist-info/METADATA +0 -735
  56. cicada_mcp-0.2.0.dist-info/RECORD +0 -53
  57. cicada_mcp-0.2.0.dist-info/entry_points.txt +0 -4
  58. /cicada/{dead_code_analyzer.py → dead_code/analyzer.py} +0 -0
  59. /cicada/{colors.py → format/colors.py} +0 -0
  60. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/WHEEL +0 -0
  61. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
  62. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/top_level.txt +0 -0
cicada/indexer.py CHANGED
@@ -11,7 +11,14 @@ import sys
11
11
  from datetime import datetime
12
12
  from pathlib import Path
13
13
 
14
+ from cicada.dependency_analyzer import (
15
+ calculate_function_end_line,
16
+ extract_function_dependencies,
17
+ extract_module_dependencies,
18
+ )
19
+ from cicada.git_helper import GitHelper
14
20
  from cicada.parser import ElixirParser
21
+ from cicada.tier import read_keyword_extraction_config
15
22
  from cicada.utils import (
16
23
  load_index,
17
24
  merge_indexes_incremental,
@@ -24,42 +31,7 @@ from cicada.utils.hash_utils import (
24
31
  load_file_hashes,
25
32
  save_file_hashes,
26
33
  )
27
- from cicada.utils.storage import get_config_path
28
-
29
-
30
- def read_keyword_extraction_config(repo_path: Path) -> tuple[str, str]:
31
- """
32
- Read keyword extraction configuration from config.yaml.
33
-
34
- Args:
35
- repo_path: Path to the repository
36
-
37
- Returns:
38
- tuple[str, str]: (method, tier) where method is 'lemminflect' or 'bert',
39
- and tier is 'fast', 'regular', or 'max'.
40
- Returns ('lemminflect', 'regular') as default if config not found.
41
- """
42
- try:
43
- import yaml
44
-
45
- config_path = get_config_path(repo_path)
46
- if not config_path.exists():
47
- # Default to lemminflect if config doesn't exist
48
- return ("lemminflect", "regular")
49
-
50
- with open(config_path) as f:
51
- config = yaml.safe_load(f)
52
-
53
- if config and "keyword_extraction" in config:
54
- method = config["keyword_extraction"].get("method", "lemminflect")
55
- tier = config["keyword_extraction"].get("tier", "regular")
56
- return (method, tier)
57
-
58
- # Default to lemminflect if keyword_extraction section not found
59
- return ("lemminflect", "regular")
60
- except Exception:
61
- # If anything goes wrong, default to lemminflect
62
- return ("lemminflect", "regular")
34
+ from cicada.version_check import get_version_string, version_mismatch
63
35
 
64
36
 
65
37
  class ElixirIndexer:
@@ -68,6 +40,10 @@ class ElixirIndexer:
68
40
  # Progress reporting interval - report every N files processed
69
41
  PROGRESS_REPORT_INTERVAL = 10
70
42
 
43
+ # Keyword expansion parameters
44
+ DEFAULT_EXPANSION_TOP_N = 3
45
+ DEFAULT_EXPANSION_THRESHOLD = 0.2
46
+
71
47
  def __init__(self, verbose: bool = False):
72
48
  """Initialize the indexer with a parser."""
73
49
  self.parser = ElixirParser()
@@ -82,6 +58,33 @@ class ElixirIndexer:
82
58
  }
83
59
  self._interrupted = False
84
60
 
61
+ def _extract_dependencies(self, module_data: dict, functions: list) -> tuple[dict, list]:
62
+ """
63
+ Extract module and function level dependencies.
64
+
65
+ Args:
66
+ module_data: Parsed module data containing calls, aliases, etc.
67
+ functions: List of function data dictionaries
68
+
69
+ Returns:
70
+ Tuple of (module_dependencies, modified_functions_list)
71
+ """
72
+ # Extract module-level dependencies
73
+ module_dependencies = extract_module_dependencies(module_data)
74
+
75
+ # Extract function-level dependencies
76
+ all_calls = module_data.get("calls", [])
77
+ for i, func in enumerate(functions):
78
+ # Calculate function end line
79
+ next_func_line = functions[i + 1]["line"] if i + 1 < len(functions) else None
80
+ func_end_line = calculate_function_end_line(func, next_func_line)
81
+
82
+ # Extract dependencies for this function
83
+ func_deps = extract_function_dependencies(module_data, func, all_calls, func_end_line)
84
+ func["dependencies"] = func_deps
85
+
86
+ return module_dependencies, functions
87
+
85
88
  def _handle_interrupt(self, _signum, _frame):
86
89
  """Handle interrupt signals (Ctrl-C, SIGTERM) gracefully."""
87
90
  print("\n\n⚠️ Interrupt received. Finishing current file and saving progress...")
@@ -113,6 +116,7 @@ class ElixirIndexer:
113
116
  repo_path: str,
114
117
  output_path: str,
115
118
  extract_keywords: bool = False,
119
+ compute_timestamps: bool = False,
116
120
  ):
117
121
  """
118
122
  Index an Elixir repository.
@@ -121,6 +125,7 @@ class ElixirIndexer:
121
125
  repo_path: Path to the Elixir repository root
122
126
  output_path: Path where the index JSON file will be saved
123
127
  extract_keywords: If True, extract keywords from documentation using NLP
128
+ compute_timestamps: If True, compute git history timestamps for functions
124
129
 
125
130
  Returns:
126
131
  Dictionary containing the index data
@@ -134,39 +139,61 @@ class ElixirIndexer:
134
139
  print(f"Indexing repository: {repo_path_obj}")
135
140
  if extract_keywords:
136
141
  # Read and display keyword extraction config
137
- method, tier = read_keyword_extraction_config(repo_path_obj)
138
- print(f"Keyword extraction: {method.upper()} ({tier})")
142
+ extraction_method, expansion_method = read_keyword_extraction_config(repo_path_obj)
143
+ print(
144
+ f"Keyword extraction: {extraction_method.upper()} + {expansion_method.upper()}"
145
+ )
139
146
 
140
147
  # Set up signal handlers for graceful interruption
141
148
  signal.signal(signal.SIGINT, self._handle_interrupt)
142
149
  signal.signal(signal.SIGTERM, self._handle_interrupt)
143
150
  self._interrupted = False
144
151
 
145
- # Initialize keyword extractor if requested
152
+ # Initialize keyword extractor and expander if requested
146
153
  keyword_extractor = None
154
+ keyword_expander = None
147
155
  if extract_keywords:
148
156
  try:
149
157
  # Read keyword extraction config from config.yaml
150
- method, tier = read_keyword_extraction_config(repo_path_obj)
158
+ extraction_method, expansion_method = read_keyword_extraction_config(repo_path_obj)
151
159
 
152
- if method == "bert":
153
- # Initialize KeyBERT extractor
154
- from cicada.keybert_extractor import KeyBERTExtractor
160
+ # Initialize extraction method
161
+ if extraction_method == "bert":
162
+ from cicada.extractors.keybert import KeyBERTExtractor
155
163
 
156
- keyword_extractor = KeyBERTExtractor(model_tier=tier, verbose=self.verbose)
164
+ keyword_extractor = KeyBERTExtractor(verbose=self.verbose)
157
165
  else:
158
- # Initialize lemminflect extractor (default)
159
- from cicada.lightweight_keyword_extractor import (
160
- LightweightKeywordExtractor,
161
- )
166
+ # Use regular (TF-based) extractor as default
167
+ from cicada.extractors.keyword import RegularKeywordExtractor
168
+
169
+ keyword_extractor = RegularKeywordExtractor(verbose=self.verbose)
170
+
171
+ # Initialize expansion method
172
+ from cicada.keyword_expander import KeywordExpander
173
+
174
+ keyword_expander = KeywordExpander(
175
+ expansion_type=expansion_method, verbose=self.verbose
176
+ )
162
177
 
163
- keyword_extractor = LightweightKeywordExtractor(verbose=self.verbose)
164
178
  except Exception as e:
165
179
  if self.verbose:
166
- print(f"Warning: Could not initialize keyword extractor: {e}")
180
+ print(f"Warning: Could not initialize keyword extractor/expander: {e}")
167
181
  print("Continuing without keyword extraction...")
168
182
  extract_keywords = False
169
183
 
184
+ # Initialize git helper if timestamps are requested
185
+ git_helper = None
186
+ if compute_timestamps:
187
+ try:
188
+ git_helper = GitHelper(str(repo_path_obj))
189
+ if self.verbose:
190
+ print("Git history tracking enabled - computing function timestamps")
191
+ except Exception as e:
192
+ if self.verbose:
193
+ print(f"Warning: Could not initialize git helper: {e}")
194
+ print("Continuing without timestamp computation...")
195
+ compute_timestamps = False
196
+
170
197
  # Find all Elixir files
171
198
  elixir_files = self._find_elixir_files(repo_path_obj)
172
199
  total_files = len(elixir_files)
@@ -193,13 +220,48 @@ class ElixirIndexer:
193
220
  public_count = sum(1 for f in functions if f["type"] == "def")
194
221
  private_count = sum(1 for f in functions if f["type"] == "defp")
195
222
 
196
- # Extract keywords if enabled
223
+ # Extract and expand keywords if enabled
197
224
  module_keywords = None
198
225
  if keyword_extractor and module_data.get("moduledoc"):
199
226
  try:
200
- module_keywords = keyword_extractor.extract_keywords_simple(
227
+ # Step 1: Extract keywords with scores
228
+ extraction_result = keyword_extractor.extract_keywords(
201
229
  module_data["moduledoc"], top_n=10
202
230
  )
231
+ extracted_keywords = [
232
+ kw for kw, _ in extraction_result["top_keywords"]
233
+ ]
234
+ keyword_scores = {
235
+ kw.lower(): score
236
+ for kw, score in extraction_result["top_keywords"]
237
+ }
238
+
239
+ # Step 2: Expand keywords with scores
240
+ if keyword_expander and extracted_keywords:
241
+ expansion_result = keyword_expander.expand_keywords(
242
+ extracted_keywords,
243
+ top_n=self.DEFAULT_EXPANSION_TOP_N,
244
+ threshold=self.DEFAULT_EXPANSION_THRESHOLD,
245
+ return_scores=True,
246
+ keyword_scores=keyword_scores,
247
+ )
248
+ # Convert to dict: word -> max_score
249
+ module_keywords = {}
250
+ # When return_scores=True, expansion_result is a dict
251
+ if not isinstance(expansion_result, dict):
252
+ raise TypeError(
253
+ "Expected dict from expand_keywords with return_scores=True"
254
+ )
255
+ for item in expansion_result["words"]:
256
+ word = item["word"]
257
+ score = item["score"]
258
+ if (
259
+ word not in module_keywords
260
+ or score > module_keywords[word]
261
+ ):
262
+ module_keywords[word] = score
263
+ else:
264
+ module_keywords = keyword_scores
203
265
  except Exception as e:
204
266
  keyword_extraction_failures += 1
205
267
  if self.verbose:
@@ -208,27 +270,97 @@ class ElixirIndexer:
208
270
  file=sys.stderr,
209
271
  )
210
272
 
211
- # Extract keywords from function docs
212
- if keyword_extractor:
213
- for func in functions:
214
- if func.get("doc"):
215
- func_name = func.get("name", "")
216
- try:
217
- # Include function name in text for keyword extraction
218
- # This ensures the function name identifier gets 10x weight
219
- text_for_keywords = f"{func_name} {func['doc']}"
220
- func_keywords = keyword_extractor.extract_keywords_simple(
221
- text_for_keywords, top_n=10
273
+ # Enrich function metadata (keywords and timestamps)
274
+ for func in functions:
275
+ func_name = func.get("name", "")
276
+
277
+ # Extract and expand keywords from function docs
278
+ if keyword_extractor and func.get("doc"):
279
+ try:
280
+ # Include function name in text for keyword extraction
281
+ # This ensures the function name identifier gets 10x weight
282
+ text_for_keywords = f"{func_name} {func['doc']}"
283
+ # Step 1: Extract keywords with scores
284
+ extraction_result = keyword_extractor.extract_keywords(
285
+ text_for_keywords, top_n=10
286
+ )
287
+ extracted_keywords = [
288
+ kw for kw, _ in extraction_result["top_keywords"]
289
+ ]
290
+ keyword_scores = {
291
+ kw.lower(): score
292
+ for kw, score in extraction_result["top_keywords"]
293
+ }
294
+
295
+ # Step 2: Expand keywords with scores
296
+ if keyword_expander and extracted_keywords:
297
+ expansion_result = keyword_expander.expand_keywords(
298
+ extracted_keywords,
299
+ top_n=self.DEFAULT_EXPANSION_TOP_N,
300
+ threshold=self.DEFAULT_EXPANSION_THRESHOLD,
301
+ return_scores=True,
302
+ keyword_scores=keyword_scores,
222
303
  )
223
- if func_keywords:
224
- func["keywords"] = func_keywords
225
- except Exception as e:
226
- keyword_extraction_failures += 1
227
- if self.verbose:
228
- print(
229
- f"Warning: Keyword extraction failed for {module_name}.{func_name}: {e}",
230
- file=sys.stderr,
304
+ # Convert to dict: word -> max_score
305
+ func_keywords = {}
306
+ # When return_scores=True, expansion_result is a dict
307
+ if not isinstance(expansion_result, dict):
308
+ raise TypeError(
309
+ "Expected dict from expand_keywords with return_scores=True"
231
310
  )
311
+ for item in expansion_result["words"]:
312
+ word = item["word"]
313
+ score = item["score"]
314
+ if (
315
+ word not in func_keywords
316
+ or score > func_keywords[word]
317
+ ):
318
+ func_keywords[word] = score
319
+ else:
320
+ func_keywords = keyword_scores
321
+
322
+ if func_keywords:
323
+ func["keywords"] = func_keywords
324
+ except Exception as e:
325
+ keyword_extraction_failures += 1
326
+ if self.verbose:
327
+ print(
328
+ f"Warning: Keyword extraction failed for {module_name}.{func_name}: {e}",
329
+ file=sys.stderr,
330
+ )
331
+
332
+ # Compute git history timestamps if enabled
333
+ if git_helper and func_name:
334
+ try:
335
+ # Get function evolution metadata
336
+ evolution = git_helper.get_function_evolution(
337
+ file_path=str(file_path.relative_to(repo_path_obj)),
338
+ function_name=func_name,
339
+ )
340
+
341
+ if evolution:
342
+ # Add timestamp fields to function
343
+ func["created_at"] = evolution["created_at"]["date"]
344
+ func["last_modified_at"] = evolution["last_modified"][
345
+ "date"
346
+ ]
347
+ func["last_modified_sha"] = evolution["last_modified"][
348
+ "sha"
349
+ ]
350
+ func["modification_count"] = evolution[
351
+ "total_modifications"
352
+ ]
353
+ except Exception as e:
354
+ if self.verbose:
355
+ print(
356
+ f"Warning: Could not compute timestamps for {module_name}.{func_name}: {e}",
357
+ file=sys.stderr,
358
+ )
359
+
360
+ # Extract dependencies
361
+ module_dependencies, functions = self._extract_dependencies(
362
+ module_data, functions
363
+ )
232
364
 
233
365
  # Store module info
234
366
  module_info = {
@@ -246,6 +378,7 @@ class ElixirIndexer:
246
378
  "behaviours": module_data.get("behaviours", []),
247
379
  "value_mentions": module_data.get("value_mentions", []),
248
380
  "calls": module_data.get("calls", []),
381
+ "dependencies": module_dependencies,
249
382
  }
250
383
 
251
384
  # Add module keywords if extracted
@@ -282,6 +415,7 @@ class ElixirIndexer:
282
415
  "total_modules": len(all_modules),
283
416
  "total_functions": total_functions,
284
417
  "repo_path": str(repo_path_obj),
418
+ "cicada_version": get_version_string(),
285
419
  },
286
420
  }
287
421
 
@@ -390,6 +524,20 @@ class ElixirIndexer:
390
524
  )
391
525
  existing_index = None
392
526
 
527
+ # Check for version mismatch - if cicada version differs, force full reindex
528
+ if existing_index:
529
+ stored_version = existing_index.get("metadata", {}).get("cicada_version")
530
+ current_version = get_version_string()
531
+ if version_mismatch(stored_version, current_version):
532
+ if self.verbose:
533
+ print(
534
+ f"Warning: Cicada version mismatch. "
535
+ f"Index was built with {stored_version}, current version is {current_version}. "
536
+ f"Performing full reindex..."
537
+ )
538
+ existing_index = None
539
+ existing_hashes = {}
540
+
393
541
  # If no existing data, do full index
394
542
  if not existing_index or not existing_hashes:
395
543
  if self.verbose:
@@ -398,9 +546,9 @@ class ElixirIndexer:
398
546
 
399
547
  if self.verbose:
400
548
  # Read and display keyword extraction config
401
- method, tier = read_keyword_extraction_config(repo_path_obj)
549
+ extraction_method, expansion_method = read_keyword_extraction_config(repo_path_obj)
402
550
  print(f"Performing incremental index of: {repo_path_obj}")
403
- print(f"Keyword extraction: {method.upper()} ({tier})")
551
+ print(f"Keyword extraction: {extraction_method.upper()} + {expansion_method.upper()}")
404
552
 
405
553
  # Set up signal handlers for graceful interruption
406
554
  signal.signal(signal.SIGINT, self._handle_interrupt)
@@ -436,27 +584,34 @@ class ElixirIndexer:
436
584
  if files_to_process:
437
585
  print(f"\nProcessing {len(files_to_process)} changed file(s)...")
438
586
 
439
- # Initialize keyword extractor if requested
587
+ # Initialize keyword extractor and expander if requested
440
588
  keyword_extractor = None
589
+ keyword_expander = None
441
590
  if extract_keywords:
442
591
  try:
443
592
  # Read keyword extraction config from config.yaml
444
- method, tier = read_keyword_extraction_config(repo_path_obj)
593
+ extraction_method, expansion_method = read_keyword_extraction_config(repo_path_obj)
445
594
 
446
- if method == "bert":
447
- # Initialize KeyBERT extractor
448
- from cicada.keybert_extractor import KeyBERTExtractor
595
+ # Initialize extraction method
596
+ if extraction_method == "bert":
597
+ from cicada.extractors.keybert import KeyBERTExtractor
449
598
 
450
- keyword_extractor = KeyBERTExtractor(model_tier=tier, verbose=self.verbose)
599
+ keyword_extractor = KeyBERTExtractor(verbose=self.verbose)
451
600
  else:
452
- # Initialize lemminflect extractor (default)
453
- from cicada.lightweight_keyword_extractor import (
454
- LightweightKeywordExtractor,
455
- )
601
+ # Use regular (TF-based) extractor as default
602
+ from cicada.extractors.keyword import RegularKeywordExtractor
603
+
604
+ keyword_extractor = RegularKeywordExtractor(verbose=self.verbose)
605
+
606
+ # Initialize expansion method
607
+ from cicada.keyword_expander import KeywordExpander
608
+
609
+ keyword_expander = KeywordExpander(
610
+ expansion_type=expansion_method, verbose=self.verbose
611
+ )
456
612
 
457
- keyword_extractor = LightweightKeywordExtractor(verbose=self.verbose)
458
613
  except Exception as e:
459
- print(f"Warning: Could not initialize keyword extractor: {e}")
614
+ print(f"Warning: Could not initialize keyword extractor/expander: {e}")
460
615
  print("Continuing without keyword extraction...")
461
616
  extract_keywords = False
462
617
 
@@ -480,31 +635,107 @@ class ElixirIndexer:
480
635
  public_count = sum(1 for f in functions if f["type"] == "def")
481
636
  private_count = sum(1 for f in functions if f["type"] == "defp")
482
637
 
483
- # Extract keywords if enabled
638
+ # Extract and expand keywords if enabled
484
639
  module_keywords = None
485
640
  if keyword_extractor and module_data.get("moduledoc"):
486
641
  try:
487
- module_keywords = keyword_extractor.extract_keywords_simple(
642
+ # Step 1: Extract keywords with scores
643
+ extraction_result = keyword_extractor.extract_keywords(
488
644
  module_data["moduledoc"], top_n=10
489
645
  )
646
+ extracted_keywords = [
647
+ kw for kw, _ in extraction_result["top_keywords"]
648
+ ]
649
+ keyword_scores = {
650
+ kw.lower(): score
651
+ for kw, score in extraction_result["top_keywords"]
652
+ }
653
+
654
+ # Step 2: Expand keywords with scores
655
+ if keyword_expander and extracted_keywords:
656
+ expansion_result = keyword_expander.expand_keywords(
657
+ extracted_keywords,
658
+ top_n=self.DEFAULT_EXPANSION_TOP_N,
659
+ threshold=self.DEFAULT_EXPANSION_THRESHOLD,
660
+ return_scores=True,
661
+ keyword_scores=keyword_scores,
662
+ )
663
+ # Convert to dict: word -> max_score
664
+ module_keywords = {}
665
+ # When return_scores=True, expansion_result is a dict
666
+ if not isinstance(expansion_result, dict):
667
+ raise TypeError(
668
+ "Expected dict from expand_keywords with return_scores=True"
669
+ )
670
+ for item in expansion_result["words"]:
671
+ word = item["word"]
672
+ score = item["score"]
673
+ if (
674
+ word not in module_keywords
675
+ or score > module_keywords[word]
676
+ ):
677
+ module_keywords[word] = score
678
+ else:
679
+ module_keywords = keyword_scores
490
680
  except Exception:
491
681
  keyword_extraction_failures += 1
492
682
 
493
- # Extract keywords from function docs
683
+ # Extract and expand keywords from function docs
494
684
  if keyword_extractor:
495
685
  for func in functions:
496
686
  if func.get("doc"):
497
687
  try:
498
688
  func_name = func.get("name", "")
499
689
  text_for_keywords = f"{func_name} {func['doc']}"
500
- func_keywords = keyword_extractor.extract_keywords_simple(
690
+ # Step 1: Extract keywords with scores
691
+ extraction_result = keyword_extractor.extract_keywords(
501
692
  text_for_keywords, top_n=10
502
693
  )
694
+ extracted_keywords = [
695
+ kw for kw, _ in extraction_result["top_keywords"]
696
+ ]
697
+ keyword_scores = {
698
+ kw.lower(): score
699
+ for kw, score in extraction_result["top_keywords"]
700
+ }
701
+
702
+ # Step 2: Expand keywords with scores
703
+ if keyword_expander and extracted_keywords:
704
+ expansion_result = keyword_expander.expand_keywords(
705
+ extracted_keywords,
706
+ top_n=self.DEFAULT_EXPANSION_TOP_N,
707
+ threshold=self.DEFAULT_EXPANSION_THRESHOLD,
708
+ return_scores=True,
709
+ keyword_scores=keyword_scores,
710
+ )
711
+ # Convert to dict: word -> max_score
712
+ func_keywords = {}
713
+ # When return_scores=True, expansion_result is a dict
714
+ if not isinstance(expansion_result, dict):
715
+ raise TypeError(
716
+ "Expected dict from expand_keywords with return_scores=True"
717
+ )
718
+ for item in expansion_result["words"]:
719
+ word = item["word"]
720
+ score = item["score"]
721
+ if (
722
+ word not in func_keywords
723
+ or score > func_keywords[word]
724
+ ):
725
+ func_keywords[word] = score
726
+ else:
727
+ func_keywords = keyword_scores
728
+
503
729
  if func_keywords:
504
730
  func["keywords"] = func_keywords
505
731
  except Exception:
506
732
  keyword_extraction_failures += 1
507
733
 
734
+ # Extract dependencies
735
+ module_dependencies, functions = self._extract_dependencies(
736
+ module_data, functions
737
+ )
738
+
508
739
  # Store module info
509
740
  module_info = {
510
741
  "file": relative_file,
@@ -521,6 +752,7 @@ class ElixirIndexer:
521
752
  "behaviours": module_data.get("behaviours", []),
522
753
  "value_mentions": module_data.get("value_mentions", []),
523
754
  "calls": module_data.get("calls", []),
755
+ "dependencies": module_dependencies,
524
756
  }
525
757
 
526
758
  # Add module keywords if extracted
@@ -549,6 +781,7 @@ class ElixirIndexer:
549
781
  "metadata": {
550
782
  "indexed_at": datetime.now().isoformat(),
551
783
  "repo_path": str(repo_path_obj),
784
+ "cicada_version": get_version_string(),
552
785
  },
553
786
  }
554
787