cicada-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. cicada/_version_hash.py +4 -0
  2. cicada/cli.py +6 -748
  3. cicada/commands.py +1255 -0
  4. cicada/dead_code/__init__.py +1 -0
  5. cicada/{find_dead_code.py → dead_code/finder.py} +2 -1
  6. cicada/dependency_analyzer.py +147 -0
  7. cicada/entry_utils.py +92 -0
  8. cicada/extractors/base.py +9 -9
  9. cicada/extractors/call.py +17 -20
  10. cicada/extractors/common.py +64 -0
  11. cicada/extractors/dependency.py +117 -235
  12. cicada/extractors/doc.py +2 -49
  13. cicada/extractors/function.py +10 -14
  14. cicada/extractors/keybert.py +228 -0
  15. cicada/extractors/keyword.py +191 -0
  16. cicada/extractors/module.py +6 -10
  17. cicada/extractors/spec.py +8 -56
  18. cicada/format/__init__.py +20 -0
  19. cicada/{ascii_art.py → format/ascii_art.py} +1 -1
  20. cicada/format/formatter.py +1145 -0
  21. cicada/git_helper.py +134 -7
  22. cicada/indexer.py +322 -89
  23. cicada/interactive_setup.py +251 -323
  24. cicada/interactive_setup_helpers.py +302 -0
  25. cicada/keyword_expander.py +437 -0
  26. cicada/keyword_search.py +208 -422
  27. cicada/keyword_test.py +383 -16
  28. cicada/mcp/__init__.py +10 -0
  29. cicada/mcp/entry.py +17 -0
  30. cicada/mcp/filter_utils.py +107 -0
  31. cicada/mcp/pattern_utils.py +118 -0
  32. cicada/{mcp_server.py → mcp/server.py} +819 -73
  33. cicada/mcp/tools.py +473 -0
  34. cicada/pr_finder.py +2 -3
  35. cicada/pr_indexer/indexer.py +3 -2
  36. cicada/setup.py +167 -35
  37. cicada/tier.py +225 -0
  38. cicada/utils/__init__.py +9 -2
  39. cicada/utils/fuzzy_match.py +54 -0
  40. cicada/utils/index_utils.py +9 -0
  41. cicada/utils/path_utils.py +18 -0
  42. cicada/utils/text_utils.py +52 -1
  43. cicada/utils/tree_utils.py +47 -0
  44. cicada/version_check.py +99 -0
  45. cicada/watch_manager.py +320 -0
  46. cicada/watcher.py +431 -0
  47. cicada_mcp-0.3.0.dist-info/METADATA +541 -0
  48. cicada_mcp-0.3.0.dist-info/RECORD +70 -0
  49. cicada_mcp-0.3.0.dist-info/entry_points.txt +4 -0
  50. cicada/formatter.py +0 -864
  51. cicada/keybert_extractor.py +0 -286
  52. cicada/lightweight_keyword_extractor.py +0 -290
  53. cicada/mcp_entry.py +0 -683
  54. cicada/mcp_tools.py +0 -291
  55. cicada_mcp-0.2.0.dist-info/METADATA +0 -735
  56. cicada_mcp-0.2.0.dist-info/RECORD +0 -53
  57. cicada_mcp-0.2.0.dist-info/entry_points.txt +0 -4
  58. /cicada/{dead_code_analyzer.py → dead_code/analyzer.py} +0 -0
  59. /cicada/{colors.py → format/colors.py} +0 -0
  60. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/WHEEL +0 -0
  61. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
  62. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/top_level.txt +0 -0
cicada/setup.py CHANGED
@@ -22,7 +22,7 @@ from cicada.utils import (
22
22
  get_index_path,
23
23
  )
24
24
 
25
- EditorType = Literal["claude", "cursor", "vs"]
25
+ EditorType = Literal["claude", "cursor", "vs", "gemini", "codex"]
26
26
 
27
27
 
28
28
  def _load_existing_config(config_path: Path) -> dict:
@@ -74,7 +74,6 @@ def _build_server_config(
74
74
  server_config["cwd"] = cwd
75
75
 
76
76
  server_config["env"] = {
77
- "CICADA_REPO_PATH": str(repo_path),
78
77
  "CICADA_CONFIG_DIR": str(storage_dir),
79
78
  }
80
79
 
@@ -118,6 +117,16 @@ def get_mcp_config_for_editor(
118
117
  "config_key": "mcp.servers",
119
118
  "needs_dir": True,
120
119
  },
120
+ "gemini": {
121
+ "config_path": repo_path / ".gemini" / "mcp.json",
122
+ "config_key": "mcpServers",
123
+ "needs_dir": True,
124
+ },
125
+ "codex": {
126
+ "config_path": repo_path / ".codex" / "mcp.json",
127
+ "config_key": "mcpServers",
128
+ "needs_dir": True,
129
+ },
121
130
  }
122
131
 
123
132
  if editor not in editor_specs:
@@ -147,8 +156,8 @@ def get_mcp_config_for_editor(
147
156
  def create_config_yaml(
148
157
  repo_path: Path,
149
158
  storage_dir: Path,
150
- keyword_method: str | None = None,
151
- keyword_tier: str | None = None,
159
+ extraction_method: str | None = None,
160
+ expansion_method: str | None = None,
152
161
  verbose: bool = True,
153
162
  ) -> None:
154
163
  """
@@ -157,18 +166,18 @@ def create_config_yaml(
157
166
  Args:
158
167
  repo_path: Path to the repository
159
168
  storage_dir: Path to the storage directory
160
- keyword_method: Keyword extraction method ('lemminflect' or 'bert'), None for default
161
- keyword_tier: Model tier ('fast', 'regular', 'max'), None for default
169
+ extraction_method: Keyword extraction method ('regular' or 'bert'), None for default
170
+ expansion_method: Expansion method ('lemmi', 'glove', or 'fasttext'), None for default
162
171
  verbose: If True, print success message. If False, silently create config.
163
172
  """
164
173
  config_path = get_config_path(repo_path)
165
174
  index_path = get_index_path(repo_path)
166
175
 
167
- # Default to lemminflect if not specified
168
- if keyword_method is None:
169
- keyword_method = "lemminflect"
170
- if keyword_tier is None:
171
- keyword_tier = "regular"
176
+ # Default to regular extraction + lemmi expansion
177
+ if extraction_method is None:
178
+ extraction_method = "regular"
179
+ if expansion_method is None:
180
+ expansion_method = "lemmi"
172
181
 
173
182
  config_content = f"""repository:
174
183
  path: {repo_path}
@@ -177,8 +186,10 @@ storage:
177
186
  index_path: {index_path}
178
187
 
179
188
  keyword_extraction:
180
- method: {keyword_method}
181
- tier: {keyword_tier}
189
+ method: {extraction_method}
190
+
191
+ keyword_expansion:
192
+ method: {expansion_method}
182
193
  """
183
194
 
184
195
  with open(config_path, "w") as f:
@@ -249,11 +260,118 @@ def setup_multiple_editors(
249
260
  print(f"⚠ Error creating {editor.upper()} config: {e}")
250
261
 
251
262
 
263
+ def update_claude_md(repo_path: Path, editor: EditorType | None = None) -> None:
264
+ """Update CLAUDE.md and AGENTS.md with instructions to use cicada-mcp for Elixir codebase searches.
265
+
266
+ Args:
267
+ repo_path: Path to the repository
268
+ editor: Editor type - defaults to None which updates CLAUDE.md (for backward compatibility)
269
+ """
270
+ from cicada.mcp.tools import get_tool_definitions
271
+
272
+ claude_md_path = repo_path / "CLAUDE.md"
273
+ agents_md_path = repo_path / "AGENTS.md"
274
+
275
+ # Process CLAUDE.md if no editor specified (backward compatibility) or if editor is 'claude'
276
+ if (editor is None or editor == "claude") and claude_md_path.exists():
277
+ _update_md_file(claude_md_path, get_tool_definitions())
278
+
279
+ # Process AGENTS.md for all editors if it exists (when editor is specified)
280
+ if editor is not None and agents_md_path.exists():
281
+ _update_md_file(agents_md_path, get_tool_definitions())
282
+
283
+
284
+ def _update_md_file(md_path: Path, tools) -> None:
285
+ """Update a markdown file with cicada tool instructions.
286
+
287
+ Args:
288
+ md_path: Path to the markdown file (CLAUDE.md or AGENTS.md)
289
+ tools: Tool definitions from get_tool_definitions()
290
+ """
291
+ import re
292
+
293
+ # Auto-generate tool list from tools
294
+ tool_list: list[str] = []
295
+
296
+ for tool in tools:
297
+ # Skip deprecated tools
298
+ if tool.description and "DEPRECATED" in tool.description:
299
+ continue
300
+
301
+ # Extract first sentence from description (up to first period or newline)
302
+ if tool.description:
303
+ desc = tool.description.split("\n")[0].strip()
304
+ if "." in desc:
305
+ desc = desc.split(".")[0] + "."
306
+ line = f" - {desc} `mcp__cicada__{tool.name}`"
307
+ tool_list.append(line)
308
+
309
+ tool_list_str = "\n".join(tool_list)
310
+
311
+ # Identify the categories of tools
312
+ grep_antipatterns = [
313
+ " - ❌ Searching for module structure",
314
+ " - ❌ Searching for function definitions",
315
+ " - ❌ Searching for module imports/usage",
316
+ ]
317
+ grep_antipatterns_str = "\n".join(grep_antipatterns)
318
+
319
+ instruction_content = f"""<cicada>
320
+ **ALWAYS use cicada-mcp tools for Elixir code searches. NEVER use Grep/Find for these tasks.**
321
+
322
+ ### Use cicada tools for:
323
+ {tool_list_str}
324
+
325
+ ### DO NOT use Grep for:
326
+ {grep_antipatterns_str}
327
+
328
+ ### You can still use Grep for:
329
+ - ✓ Non-code files (markdown, JSON, config)
330
+ - ✓ String literal searches
331
+ - ✓ Pattern matching in single line comments
332
+ </cicada>
333
+ """
334
+
335
+ try:
336
+ # Read existing content
337
+ with open(md_path) as f:
338
+ content = f.read()
339
+
340
+ # Pattern to find existing <cicada>...</cicada> tags
341
+ cicada_pattern = re.compile(r"<cicada>.*?</cicada>", re.DOTALL)
342
+
343
+ # Check if <cicada> tags exist
344
+ if cicada_pattern.search(content):
345
+ # Replace existing content between tags
346
+ new_content = cicada_pattern.sub(instruction_content, content)
347
+ with open(md_path, "w") as f:
348
+ f.write(new_content)
349
+ print(f"✓ Updated <cicada> instructions in {md_path.name}")
350
+ elif "cicada-mcp" in content.lower() or "cicada" in content.lower():
351
+ # Content already mentions cicada, don't add duplication
352
+ # This handles cases where users manually added cicada instructions
353
+ print(f"✓ {md_path.name} already mentions cicada, skipping update")
354
+ else:
355
+ # Append the instruction
356
+ with open(md_path, "a") as f:
357
+ # Add newline if file doesn't end with one
358
+ if content and not content.endswith("\n"):
359
+ f.write("\n")
360
+
361
+ f.write("\n")
362
+ f.write(instruction_content)
363
+
364
+ print(f"✓ Added cicada-mcp usage instructions to {md_path.name}")
365
+ except Exception:
366
+ # Fail silently on any errors
367
+ pass
368
+
369
+
252
370
  def setup(
253
371
  editor: EditorType,
254
372
  repo_path: Path | None = None,
255
- keyword_method: str | None = None,
256
- keyword_tier: str | None = None,
373
+ extraction_method: str | None = None,
374
+ expansion_method: str | None = None,
257
375
  index_exists: bool = False,
258
376
  ) -> None:
259
377
  """
@@ -262,8 +380,8 @@ def setup(
262
380
  Args:
263
381
  editor: Editor type (claude, cursor, vs)
264
382
  repo_path: Path to the repository (defaults to current directory)
265
- keyword_method: Keyword extraction method ('lemminflect' or 'bert'), None for default
266
- keyword_tier: Model tier ('fast', 'regular', 'max'), None for default
383
+ extraction_method: Keyword extraction method ('regular' or 'bert'), None for default
384
+ expansion_method: Expansion method ('lemmi', 'glove', or 'fasttext'), None for default
267
385
  index_exists: If True, skip banner and show condensed output (index already exists)
268
386
  """
269
387
  # Determine repository path
@@ -276,15 +394,19 @@ def setup(
276
394
 
277
395
  # Show condensed output if index already exists
278
396
  if index_exists:
279
- # Determine method and tier for display
280
- display_method = keyword_method if keyword_method else "lemminflect"
281
- display_tier = keyword_tier if keyword_tier else "regular"
282
- print(f"✓ Found existing index ({display_method.upper()} {display_tier})")
397
+ # Determine method for display
398
+ display_extraction = extraction_method if extraction_method else "regular"
399
+ display_expansion = expansion_method if expansion_method else "lemmi"
400
+ print(
401
+ f"✓ Found existing index ({display_extraction.upper()} + {display_expansion.upper()})"
402
+ )
283
403
  # Skip indexing when index_exists is True - we're just reusing it
284
404
  should_index = False
285
405
  force_full = False
286
406
  # Ensure config.yaml is up to date with current settings
287
- create_config_yaml(repo_path, storage_dir, keyword_method, keyword_tier, verbose=False)
407
+ create_config_yaml(
408
+ repo_path, storage_dir, extraction_method, expansion_method, verbose=False
409
+ )
288
410
  else:
289
411
  # Show full banner for new setup
290
412
  print("=" * 60)
@@ -307,20 +429,20 @@ def setup(
307
429
  try:
308
430
  with open(config_path) as f:
309
431
  existing_config = yaml.safe_load(f)
310
- existing_method = existing_config.get("keyword_extraction", {}).get(
311
- "method", "lemminflect"
432
+ existing_extraction = existing_config.get("keyword_extraction", {}).get(
433
+ "method", "regular"
312
434
  )
313
- existing_tier = existing_config.get("keyword_extraction", {}).get(
314
- "tier", "regular"
435
+ existing_expansion = existing_config.get("keyword_expansion", {}).get(
436
+ "method", "lemmi"
315
437
  )
316
438
 
317
- # Determine new method and tier (default to lemminflect/regular if not specified)
318
- new_method = keyword_method if keyword_method else "lemminflect"
319
- new_tier = keyword_tier if keyword_tier else "regular"
439
+ # Determine new methods (default to regular + lemmi if not specified)
440
+ new_extraction = extraction_method if extraction_method else "regular"
441
+ new_expansion = expansion_method if expansion_method else "lemmi"
320
442
 
321
443
  # Check if settings changed
322
- settings_changed = (existing_method != new_method) or (
323
- existing_tier != new_tier
444
+ settings_changed = (existing_extraction != new_extraction) or (
445
+ existing_expansion != new_expansion
324
446
  )
325
447
 
326
448
  if settings_changed:
@@ -329,9 +451,11 @@ def setup(
329
451
  print("=" * 60)
330
452
  print()
331
453
  print(
332
- f"This repository already has an index with {existing_method.upper()} ({existing_tier}) keyword extraction."
454
+ f"This repository already has an index with {existing_extraction.upper()} + {existing_expansion.upper()}."
455
+ )
456
+ print(
457
+ f"You are now switching to {new_extraction.upper()} + {new_expansion.upper()}."
333
458
  )
334
- print(f"You are now switching to {new_method.upper()} ({new_tier}).")
335
459
  print()
336
460
  print(
337
461
  "This will require reindexing the ENTIRE codebase, which may take several minutes."
@@ -347,7 +471,9 @@ def setup(
347
471
  force_full = True # Force full reindex when settings change
348
472
  else:
349
473
  # Settings unchanged - just use existing index
350
- print(f"✓ Using existing index ({existing_method}, {existing_tier})")
474
+ print(
475
+ f"✓ Using existing index ({existing_extraction.upper()} + {existing_expansion.upper()})"
476
+ )
351
477
  print()
352
478
  should_index = False
353
479
  except Exception:
@@ -355,13 +481,19 @@ def setup(
355
481
  pass
356
482
 
357
483
  # Create/update config.yaml BEFORE indexing (indexer reads this to determine keyword method)
358
- create_config_yaml(repo_path, storage_dir, keyword_method, keyword_tier, verbose=False)
484
+ create_config_yaml(
485
+ repo_path, storage_dir, extraction_method, expansion_method, verbose=False
486
+ )
359
487
 
360
488
  # Index repository if needed
361
489
  if should_index:
362
490
  index_repository(repo_path, force_full=force_full)
363
491
  print()
364
492
 
493
+ # Update CLAUDE.md with cicada instructions (only for Claude Code editor)
494
+ if editor == "claude":
495
+ update_claude_md(repo_path)
496
+
365
497
  # Create MCP config for the editor
366
498
  config_path, config_content = get_mcp_config_for_editor(editor, repo_path, storage_dir)
367
499
 
cicada/tier.py ADDED
@@ -0,0 +1,225 @@
1
+ """
2
+ Tier Configuration Module - Centralized tier resolution and conversion logic.
3
+
4
+ This module provides a single source of truth for:
5
+ - Tier validation (fast, regular, max)
6
+ - Tier resolution from arguments or config files
7
+ - Tier <-> (extraction_method, expansion_method) conversions
8
+ """
9
+
10
+ import argparse
11
+ import sys
12
+ from pathlib import Path
13
+
14
+ # Tier to methods mapping
15
+ TIER_METHODS = {
16
+ "fast": ("regular", "lemmi"),
17
+ "regular": ("bert", "glove"),
18
+ "max": ("bert", "fasttext"),
19
+ }
20
+
21
+ # Default methods if no configuration exists
22
+ DEFAULT_METHODS = ("regular", "lemmi")
23
+
24
+
25
+ def validate_tier_flags(args: argparse.Namespace, *, require_force: bool = False) -> None:
26
+ """Validate that only one tier flag is specified.
27
+
28
+ Args:
29
+ args: Parsed command-line arguments with fast, regular, and max attributes
30
+ require_force: Whether --force is required when specifying tier flags
31
+
32
+ Raises:
33
+ SystemExit: If validation fails
34
+ """
35
+ tier_flags = [bool(args.fast), bool(getattr(args, "regular", False)), bool(args.max)]
36
+ tier_count = sum(tier_flags)
37
+
38
+ if tier_count > 1:
39
+ print(
40
+ "Error: Can only specify one tier flag (--fast, --regular, or --max)",
41
+ file=sys.stderr,
42
+ )
43
+ sys.exit(1)
44
+
45
+ if not require_force:
46
+ return
47
+
48
+ force_enabled = getattr(args, "force", False) is True
49
+ tier_specified = tier_count == 1
50
+
51
+ if force_enabled and not tier_specified:
52
+ print(
53
+ "Error: --force requires specifying a tier flag (--fast, --regular, or --max).",
54
+ file=sys.stderr,
55
+ )
56
+ sys.exit(2)
57
+
58
+ if tier_specified and not force_enabled:
59
+ print(
60
+ "Error: Tier flags now require --force to override the configured tier.",
61
+ file=sys.stderr,
62
+ )
63
+ print(
64
+ "Run 'cicada index --force --fast|--regular|--max' to select a tier.",
65
+ file=sys.stderr,
66
+ )
67
+ sys.exit(2)
68
+
69
+
70
+ def tier_flag_specified(args: argparse.Namespace) -> bool:
71
+ """Return True when any tier flag is present."""
72
+ return bool(args.fast or getattr(args, "regular", False) or args.max)
73
+
74
+
75
+ def get_tier_from_args(args: argparse.Namespace) -> str | None:
76
+ """Extract tier from command-line arguments.
77
+
78
+ Args:
79
+ args: Parsed command-line arguments with fast, regular, and max attributes
80
+
81
+ Returns:
82
+ Tier string ("fast", "regular", or "max"), or None if no tier flag specified
83
+ """
84
+ if args.fast:
85
+ return "fast"
86
+ if args.max:
87
+ return "max"
88
+ if getattr(args, "regular", False):
89
+ return "regular"
90
+ return None
91
+
92
+
93
+ def tier_to_methods(tier: str) -> tuple[str, str]:
94
+ """Convert tier to (extraction_method, expansion_method).
95
+
96
+ Args:
97
+ tier: Tier string ("fast", "regular", or "max")
98
+
99
+ Returns:
100
+ Tuple of (extraction_method, expansion_method)
101
+ - extraction_method is 'regular' or 'bert'
102
+ - expansion_method is 'lemmi', 'glove', or 'fasttext'
103
+
104
+ Tier mappings:
105
+ - fast: regular extraction + lemmi expansion
106
+ - regular: bert extraction + glove expansion
107
+ - max: bert extraction + fasttext expansion
108
+ """
109
+ return TIER_METHODS.get(tier, DEFAULT_METHODS)
110
+
111
+
112
+ def methods_to_tier(extraction_method: str, expansion_method: str) -> str:
113
+ """Convert (extraction_method, expansion_method) to tier.
114
+
115
+ Args:
116
+ extraction_method: 'regular' or 'bert'
117
+ expansion_method: 'lemmi', 'glove', or 'fasttext'
118
+
119
+ Returns:
120
+ Tier string: "fast", "regular", or "max"
121
+ """
122
+ method_pair = (extraction_method, expansion_method)
123
+
124
+ # Find matching tier in our mapping
125
+ for tier, methods in TIER_METHODS.items():
126
+ if methods == method_pair:
127
+ return tier
128
+
129
+ # Fallback logic for partial matches
130
+ if extraction_method == "regular":
131
+ return "fast"
132
+
133
+ if extraction_method == "bert":
134
+ if expansion_method == "fasttext":
135
+ return "max"
136
+ return "regular"
137
+
138
+ # Default to regular for unknown combinations
139
+ return "regular"
140
+
141
+
142
+ def read_keyword_extraction_config(repo_path: Path) -> tuple[str, str]:
143
+ """Read keyword extraction configuration from config.yaml.
144
+
145
+ Args:
146
+ repo_path: Path to the repository
147
+
148
+ Returns:
149
+ tuple[str, str]: (extraction_method, expansion_method) where:
150
+ - extraction_method is 'regular' or 'bert'
151
+ - expansion_method is 'lemmi', 'glove', or 'fasttext'
152
+ Returns DEFAULT_METHODS if config not found.
153
+ """
154
+ try:
155
+ import yaml
156
+
157
+ from cicada.utils.storage import get_config_path
158
+
159
+ config_path = get_config_path(repo_path)
160
+ if not config_path.exists():
161
+ return DEFAULT_METHODS
162
+
163
+ with open(config_path) as f:
164
+ config = yaml.safe_load(f)
165
+
166
+ if not config:
167
+ return DEFAULT_METHODS
168
+
169
+ extraction_method = config.get("keyword_extraction", {}).get("method", DEFAULT_METHODS[0])
170
+ expansion_method = config.get("keyword_expansion", {}).get("method", DEFAULT_METHODS[1])
171
+ return (extraction_method, expansion_method)
172
+
173
+ except Exception:
174
+ # If anything goes wrong, use defaults
175
+ return DEFAULT_METHODS
176
+
177
+
178
+ def determine_tier(args: argparse.Namespace, repo_path: Path | None = None) -> str:
179
+ """Determine indexing tier from args or existing config.
180
+
181
+ This is the main function for tier resolution. It:
182
+ 1. Checks command-line arguments first (--fast, --regular, --max)
183
+ 2. Falls back to reading from config.yaml if no args provided
184
+ 3. Defaults to "regular" if no config found
185
+
186
+ Args:
187
+ args: Parsed command-line arguments with fast, regular, and max attributes
188
+ repo_path: Optional repository path to read config from
189
+
190
+ Returns:
191
+ Tier string: "fast", "regular", or "max"
192
+ """
193
+ # Check args first
194
+ tier = get_tier_from_args(args)
195
+ if tier is not None:
196
+ return tier
197
+
198
+ # If no tier flag specified, try to load from existing config
199
+ if repo_path is not None:
200
+ extraction_method, expansion_method = read_keyword_extraction_config(repo_path)
201
+ return methods_to_tier(extraction_method, expansion_method)
202
+
203
+ # Default to regular tier
204
+ return "regular"
205
+
206
+
207
+ def get_extraction_expansion_methods(
208
+ args: argparse.Namespace,
209
+ ) -> tuple[str | None, str | None]:
210
+ """Map tier flags to extraction and expansion methods.
211
+
212
+ This is a convenience function for backward compatibility.
213
+ Returns (None, None) if no tier flag is specified, allowing callers
214
+ to distinguish between "no tier specified" and "default tier".
215
+
216
+ Args:
217
+ args: Parsed command-line arguments with fast, regular, and max attributes
218
+
219
+ Returns:
220
+ Tuple of (extraction_method, expansion_method), or (None, None) if no tier flag
221
+ """
222
+ tier = get_tier_from_args(args)
223
+ if tier is None:
224
+ return None, None
225
+ return tier_to_methods(tier)
cicada/utils/__init__.py CHANGED
@@ -7,13 +7,14 @@ code duplication and improve maintainability.
7
7
 
8
8
  from .call_site_formatter import CallSiteFormatter
9
9
  from .function_grouper import FunctionGrouper
10
+ from .fuzzy_match import find_similar_names
10
11
  from .index_utils import (
11
12
  load_index,
12
13
  merge_indexes_incremental,
13
14
  save_index,
14
15
  validate_index_structure,
15
16
  )
16
- from .path_utils import normalize_file_path, resolve_to_repo_root
17
+ from .path_utils import is_git_repository, normalize_file_path, resolve_to_repo_root
17
18
  from .signature_builder import SignatureBuilder
18
19
  from .storage import (
19
20
  create_storage_dir,
@@ -25,7 +26,8 @@ from .storage import (
25
26
  get_storage_dir,
26
27
  )
27
28
  from .subprocess_runner import SubprocessRunner, run_gh_command, run_git_command
28
- from .text_utils import split_camel_snake_case, split_identifier
29
+ from .text_utils import extract_code_identifiers, split_camel_snake_case, split_identifier
30
+ from .tree_utils import extract_text_from_node, is_function_definition_call
29
31
 
30
32
  __all__ = [
31
33
  "SubprocessRunner",
@@ -33,6 +35,7 @@ __all__ = [
33
35
  "run_gh_command",
34
36
  "normalize_file_path",
35
37
  "resolve_to_repo_root",
38
+ "is_git_repository",
36
39
  "load_index",
37
40
  "save_index",
38
41
  "merge_indexes_incremental",
@@ -40,8 +43,10 @@ __all__ = [
40
43
  "FunctionGrouper",
41
44
  "CallSiteFormatter",
42
45
  "SignatureBuilder",
46
+ "find_similar_names",
43
47
  "split_identifier",
44
48
  "split_camel_snake_case",
49
+ "extract_code_identifiers",
45
50
  "get_repo_hash",
46
51
  "get_storage_dir",
47
52
  "create_storage_dir",
@@ -49,4 +54,6 @@ __all__ = [
49
54
  "get_config_path",
50
55
  "get_hashes_path",
51
56
  "get_pr_index_path",
57
+ "extract_text_from_node",
58
+ "is_function_definition_call",
52
59
  ]
@@ -0,0 +1,54 @@
1
+ """
2
+ Fuzzy matching utilities for finding similar names.
3
+
4
+ This module provides utilities for finding similar module and function names
5
+ using fuzzy string matching algorithms.
6
+ """
7
+
8
+ from difflib import SequenceMatcher
9
+
10
+
11
+ def find_similar_names(
12
+ query: str, candidates: list[str], max_suggestions: int = 5, threshold: float = 0.4
13
+ ) -> list[tuple[str, float]]:
14
+ """
15
+ Find similar names using fuzzy matching.
16
+
17
+ Args:
18
+ query: The query string to match
19
+ candidates: List of candidate names to match against
20
+ max_suggestions: Maximum number of suggestions to return
21
+ threshold: Minimum similarity score (0.0-1.0) to include in results
22
+
23
+ Returns:
24
+ List of (name, similarity_score) tuples, sorted by similarity (descending)
25
+ """
26
+ similarities: list[tuple[str, float]] = []
27
+ query_lower = query.lower()
28
+
29
+ # Early exit for exact match
30
+ for candidate in candidates:
31
+ if query_lower == candidate.lower():
32
+ return [(candidate, 1.0)]
33
+
34
+ # Limit search space for very large indices to prevent performance issues
35
+ search_candidates = candidates[:500] if len(candidates) > 500 else candidates
36
+
37
+ for candidate in search_candidates:
38
+ # Calculate base similarity score
39
+ similarity = SequenceMatcher(None, query_lower, candidate.lower()).ratio()
40
+
41
+ # Boost score for substring matches
42
+ if query_lower in candidate.lower():
43
+ similarity = max(similarity, 0.7)
44
+
45
+ # Boost score for partial component matches (e.g., "User" matches "MyApp.User")
46
+ query_parts = query.split(".")
47
+ if any(qpart.lower() in candidate.lower() for qpart in query_parts):
48
+ similarity = max(similarity, 0.6)
49
+
50
+ similarities.append((candidate, similarity))
51
+
52
+ # Sort by similarity (descending) and return top matches above threshold
53
+ similarities.sort(key=lambda x: x[1], reverse=True)
54
+ return [(name, score) for name, score in similarities[:max_suggestions] if score > threshold]
@@ -274,12 +274,21 @@ def merge_indexes_incremental(
274
274
  if "modules" in new_index:
275
275
  merged["modules"].update(new_index["modules"])
276
276
 
277
+ # Preserve original cicada_version from old_index if it exists
278
+ original_version = None
279
+ if "metadata" in old_index:
280
+ original_version = old_index["metadata"].get("cicada_version")
281
+
277
282
  # Merge metadata - take from new_index if available, else old_index
278
283
  if "metadata" in new_index:
279
284
  merged["metadata"].update(new_index["metadata"])
280
285
  elif "metadata" in old_index:
281
286
  merged["metadata"].update(old_index["metadata"])
282
287
 
288
+ # Restore original version if it existed (don't overwrite with new version)
289
+ if original_version:
290
+ merged["metadata"]["cicada_version"] = original_version
291
+
283
292
  # Update module and function counts
284
293
  stats = get_index_stats(merged)
285
294
  merged["metadata"]["total_modules"] = stats["total_modules"]