cicada-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. cicada/_version_hash.py +4 -0
  2. cicada/cli.py +6 -748
  3. cicada/commands.py +1255 -0
  4. cicada/dead_code/__init__.py +1 -0
  5. cicada/{find_dead_code.py → dead_code/finder.py} +2 -1
  6. cicada/dependency_analyzer.py +147 -0
  7. cicada/entry_utils.py +92 -0
  8. cicada/extractors/base.py +9 -9
  9. cicada/extractors/call.py +17 -20
  10. cicada/extractors/common.py +64 -0
  11. cicada/extractors/dependency.py +117 -235
  12. cicada/extractors/doc.py +2 -49
  13. cicada/extractors/function.py +10 -14
  14. cicada/extractors/keybert.py +228 -0
  15. cicada/extractors/keyword.py +191 -0
  16. cicada/extractors/module.py +6 -10
  17. cicada/extractors/spec.py +8 -56
  18. cicada/format/__init__.py +20 -0
  19. cicada/{ascii_art.py → format/ascii_art.py} +1 -1
  20. cicada/format/formatter.py +1145 -0
  21. cicada/git_helper.py +134 -7
  22. cicada/indexer.py +322 -89
  23. cicada/interactive_setup.py +251 -323
  24. cicada/interactive_setup_helpers.py +302 -0
  25. cicada/keyword_expander.py +437 -0
  26. cicada/keyword_search.py +208 -422
  27. cicada/keyword_test.py +383 -16
  28. cicada/mcp/__init__.py +10 -0
  29. cicada/mcp/entry.py +17 -0
  30. cicada/mcp/filter_utils.py +107 -0
  31. cicada/mcp/pattern_utils.py +118 -0
  32. cicada/{mcp_server.py → mcp/server.py} +819 -73
  33. cicada/mcp/tools.py +473 -0
  34. cicada/pr_finder.py +2 -3
  35. cicada/pr_indexer/indexer.py +3 -2
  36. cicada/setup.py +167 -35
  37. cicada/tier.py +225 -0
  38. cicada/utils/__init__.py +9 -2
  39. cicada/utils/fuzzy_match.py +54 -0
  40. cicada/utils/index_utils.py +9 -0
  41. cicada/utils/path_utils.py +18 -0
  42. cicada/utils/text_utils.py +52 -1
  43. cicada/utils/tree_utils.py +47 -0
  44. cicada/version_check.py +99 -0
  45. cicada/watch_manager.py +320 -0
  46. cicada/watcher.py +431 -0
  47. cicada_mcp-0.3.0.dist-info/METADATA +541 -0
  48. cicada_mcp-0.3.0.dist-info/RECORD +70 -0
  49. cicada_mcp-0.3.0.dist-info/entry_points.txt +4 -0
  50. cicada/formatter.py +0 -864
  51. cicada/keybert_extractor.py +0 -286
  52. cicada/lightweight_keyword_extractor.py +0 -290
  53. cicada/mcp_entry.py +0 -683
  54. cicada/mcp_tools.py +0 -291
  55. cicada_mcp-0.2.0.dist-info/METADATA +0 -735
  56. cicada_mcp-0.2.0.dist-info/RECORD +0 -53
  57. cicada_mcp-0.2.0.dist-info/entry_points.txt +0 -4
  58. /cicada/{dead_code_analyzer.py → dead_code/analyzer.py} +0 -0
  59. /cicada/{colors.py → format/colors.py} +0 -0
  60. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/WHEEL +0 -0
  61. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
  62. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/top_level.txt +0 -0
cicada/keyword_test.py CHANGED
@@ -7,7 +7,333 @@ Provides an interactive REPL for testing keyword extraction methods.
7
7
  import sys
8
8
 
9
9
 
10
- def run_keywords_interactive(method: str = "lemminflect", tier: str = "regular"):
10
+ def run_expansion_interactive(
11
+ expansion_type: str = "lemmi",
12
+ extraction_method: str = "regular",
13
+ extraction_tier: str = "regular",
14
+ extraction_threshold: float | None = 0.3,
15
+ expansion_threshold: float = 0.2,
16
+ min_score: float = 0.5,
17
+ ):
18
+ """
19
+ Interactive keyword expansion testing mode.
20
+
21
+ Shows the full pipeline: Text → Extracted Keywords → Expanded Keywords
22
+
23
+ Args:
24
+ expansion_type: Expansion strategy ('lemmi', 'glove', or 'fasttext')
25
+ extraction_method: Extraction method ('regular' or 'bert')
26
+ extraction_tier: Model tier for extraction ('fast', 'regular', or 'max')
27
+ extraction_threshold: Minimum score for extraction (default: 0.3)
28
+ expansion_threshold: Minimum similarity score for expansion (default: 0.2)
29
+ min_score: Minimum score threshold for keywords (default: 0.5)
30
+ """
31
+ print(f"\n{'='*70}")
32
+ print("🔄 Cicada Interactive Keyword Pipeline Test")
33
+ print(f"{'='*70}")
34
+
35
+ # Map extraction method to display name
36
+ extraction_display = "REGULAR (token-based)" if extraction_method == "regular" else "BERT"
37
+ print(f"Extraction: {extraction_display} ({extraction_tier})")
38
+ print(f"Expansion: {expansion_type.upper()}")
39
+ if extraction_threshold is not None:
40
+ print(f"Extraction threshold: {extraction_threshold}")
41
+ if min_score > 0.0:
42
+ print(f"Min score: {min_score}")
43
+ print(f"Expansion threshold: {expansion_threshold}")
44
+
45
+ # Show strategy description
46
+ expansion_descriptions = {
47
+ "lemmi": "Inflected forms only (run → running, runs, ran)",
48
+ "glove": "GloVe embeddings + inflected forms (128MB download first time)",
49
+ "fasttext": "FastText embeddings + inflected forms (958MB download first time)",
50
+ }
51
+ print(f"Strategy: {expansion_descriptions.get(expansion_type, 'Unknown')}")
52
+
53
+ print("\nEnter text, then press Ctrl-D (Unix) or Ctrl-Z+Enter (Windows)")
54
+ print("Press Ctrl-C to exit.\n")
55
+ print(f"{'='*70}\n")
56
+
57
+ # Initialize keyword extractor
58
+ try:
59
+ if extraction_method == "regular":
60
+ from cicada.extractors.keyword import RegularKeywordExtractor
61
+
62
+ extractor = RegularKeywordExtractor(verbose=True)
63
+ elif extraction_method == "bert":
64
+ from cicada.extractors.keybert import KeyBERTExtractor
65
+
66
+ extractor = KeyBERTExtractor(verbose=True)
67
+ else:
68
+ raise ValueError(f"Unknown extraction method: {extraction_method}")
69
+ print() # Add newline after initialization
70
+ except Exception as e:
71
+ print(f"Error initializing keyword extractor: {e}", file=sys.stderr)
72
+ sys.exit(1)
73
+
74
+ # Initialize keyword expander
75
+ try:
76
+ from cicada.keyword_expander import KeywordExpander
77
+
78
+ expander = KeywordExpander(expansion_type=expansion_type, verbose=True)
79
+
80
+ # Force-load embedding model if using glove/fasttext
81
+ if expansion_type in ["glove", "fasttext"]:
82
+ print(f"\nPreloading {expansion_type} model...")
83
+ # Trigger model loading with a dummy keyword
84
+ _ = expander.expand_keywords(["test"], top_n=1, threshold=0.9)
85
+ print(f"✓ {expansion_type.title()} model ready\n")
86
+ else:
87
+ print() # Add newline after initialization
88
+ except Exception as e:
89
+ print(f"Error initializing keyword expander: {e}", file=sys.stderr)
90
+ sys.exit(1)
91
+
92
+ # Interactive loop
93
+ stdin_closed = False
94
+ try:
95
+ while True:
96
+ print("📝 Enter text (Ctrl-D or Ctrl-Z+Enter when done):")
97
+ print("-" * 70)
98
+
99
+ # Read multi-line input until EOF
100
+ lines = []
101
+ try:
102
+ while True:
103
+ line = input()
104
+ lines.append(line)
105
+ except EOFError:
106
+ if not lines and stdin_closed:
107
+ print("\n👋 No more input available. Exiting.")
108
+ return
109
+ stdin_closed = True
110
+
111
+ text = "\n".join(lines)
112
+
113
+ if not text.strip():
114
+ print("\n⚠️ Empty input. Please enter some text.\n")
115
+ continue
116
+
117
+ # Full pipeline display
118
+ print("\n" + "=" * 70)
119
+ print("🔄 KEYWORD PIPELINE:")
120
+ print("=" * 70)
121
+
122
+ try:
123
+ # Step 1: Show input text
124
+ print("\n1️⃣ INPUT TEXT:")
125
+ print("-" * 70)
126
+ preview = text[:200] + "..." if len(text) > 200 else text
127
+ print(f"{preview}\n")
128
+
129
+ # Step 2: Extract keywords
130
+ print("2️⃣ EXTRACTED KEYWORDS:")
131
+ print("-" * 70)
132
+ results = extractor.extract_keywords(text, top_n=15, min_score=min_score)
133
+ top_keywords = results.get("top_keywords", [])
134
+
135
+ # Apply extraction threshold if specified
136
+ if extraction_threshold is not None and top_keywords:
137
+ filtered_keywords = [
138
+ item
139
+ for item in top_keywords
140
+ if isinstance(item, (list, tuple))
141
+ and len(item) >= 2
142
+ and item[1] >= extraction_threshold
143
+ ]
144
+ if len(filtered_keywords) < len(top_keywords):
145
+ removed_count = len(top_keywords) - len(filtered_keywords)
146
+ print(
147
+ f"⚠️ Filtered out {removed_count} keywords below threshold {extraction_threshold}\n"
148
+ )
149
+ top_keywords = filtered_keywords
150
+
151
+ extracted_keywords = []
152
+ extraction_scores = {} # Map keywords to their extraction scores
153
+ code_identifiers_lower = [
154
+ ident.lower() for ident in results.get("code_identifiers", [])
155
+ ]
156
+ code_split_words_lower = [
157
+ word.lower() for word in results.get("code_split_words", [])
158
+ ]
159
+
160
+ if top_keywords and isinstance(top_keywords, list):
161
+ for i, item in enumerate(top_keywords, 1):
162
+ if isinstance(item, (list, tuple)) and len(item) >= 2:
163
+ keyword, score = item[0], item[1]
164
+ extracted_keywords.append(keyword)
165
+ extraction_scores[keyword.lower()] = score # Store extraction score
166
+
167
+ # Determine if this keyword was boosted
168
+ boost_label = ""
169
+ if keyword.lower() in code_identifiers_lower:
170
+ boost_label = " [10x boost]"
171
+ elif keyword.lower() in code_split_words_lower:
172
+ boost_label = " [3x boost]"
173
+
174
+ print(f" {i:2}. {keyword:20s} (score: {score:.4f}){boost_label}")
175
+
176
+ if not extracted_keywords:
177
+ print(" No keywords extracted.")
178
+ print("\n" + "=" * 70 + "\n")
179
+ continue
180
+
181
+ print(f"\nTotal extracted: {len(extracted_keywords)} keywords")
182
+ if code_identifiers_lower or code_split_words_lower:
183
+ print(f" • Code identifiers (10x): {len(code_identifiers_lower)}")
184
+ print(f" • Code split words (3x): {len(code_split_words_lower)}")
185
+ print()
186
+
187
+ # Step 3: Expand keywords
188
+ print("3️⃣ EXPANDED KEYWORDS:")
189
+ print("-" * 70)
190
+ print("Note: Expansion scores = extraction score × similarity score")
191
+ print("Note: Code identifiers are NOT inflected or expanded (kept exact)\n")
192
+ result = expander.expand_keywords(
193
+ extracted_keywords,
194
+ top_n=3,
195
+ threshold=expansion_threshold,
196
+ return_scores=True,
197
+ keyword_scores=extraction_scores,
198
+ min_score=min_score,
199
+ code_identifiers=results.get("code_identifiers", []),
200
+ )
201
+
202
+ # Extract detailed and simple lists
203
+ if isinstance(result, dict):
204
+ expanded_with_scores = result["words"]
205
+ expanded = result["simple"]
206
+ else:
207
+ # Fallback if return_scores wasn't supported
208
+ expanded = result
209
+ expanded_with_scores = []
210
+
211
+ # Group by source type
212
+ by_source = {
213
+ "original": [],
214
+ "split": [],
215
+ "inflection": [],
216
+ "embedding": [],
217
+ "embedding_inflection": [],
218
+ }
219
+
220
+ for item in expanded_with_scores:
221
+ source = item.get("source", "unknown")
222
+ by_source.setdefault(source, []).append(item)
223
+
224
+ # Display originals
225
+ if by_source["original"]:
226
+ print(f"\nFrom extraction ({len(by_source['original'])}):")
227
+ for item in by_source["original"][:20]:
228
+ score = item.get("score", 1.0)
229
+ print(f" ✓ {item['word']:25s} (score: {score:.3f})")
230
+
231
+ # Display splits
232
+ if by_source["split"]:
233
+ print(f"\nFrom splitting ({len(by_source['split'])}):")
234
+ for item in by_source["split"][:10]:
235
+ parent = item.get("parent", "")
236
+ score = item.get("score", 1.0)
237
+ print(f" → {item['word']:25s} (split from '{parent}', score: {score:.3f})")
238
+
239
+ # Display inflections
240
+ if by_source["inflection"]:
241
+ print(f"\nFrom inflection ({len(by_source['inflection'])}):")
242
+ for item in by_source["inflection"][:15]:
243
+ parent = item.get("parent", "")
244
+ score = item.get("score", 1.0)
245
+ print(
246
+ f" ~ {item['word']:25s} (inflection of '{parent}', score: {score:.3f})"
247
+ )
248
+ if len(by_source["inflection"]) > 15:
249
+ print(f" ... and {len(by_source['inflection']) - 15} more")
250
+
251
+ # Display embeddings (semantic expansion)
252
+ if by_source["embedding"]:
253
+ print(
254
+ f"\nFrom semantic expansion ({len(by_source['embedding'])}) [extraction × similarity]:"
255
+ )
256
+ for item in by_source["embedding"][:15]:
257
+ score = item.get("score", 0)
258
+ parent = item.get("parent", "")
259
+ print(
260
+ f" + {item['word']:25s} (similar to '{parent}', final score: {score:.3f})"
261
+ )
262
+ if len(by_source["embedding"]) > 15:
263
+ print(f" ... and {len(by_source['embedding']) - 15} more")
264
+
265
+ # Display embedding inflections
266
+ if by_source["embedding_inflection"]:
267
+ print(
268
+ f"\nFrom semantic expansion inflections ({len(by_source['embedding_inflection'])}) [inherits final score]:"
269
+ )
270
+ for item in by_source["embedding_inflection"][:10]:
271
+ score = item.get("score", 0)
272
+ parent = item.get("parent", "")
273
+ print(
274
+ f" ≈ {item['word']:25s} (inflection of '{parent}', final score: {score:.3f})"
275
+ )
276
+ if len(by_source["embedding_inflection"]) > 10:
277
+ print(f" ... and {len(by_source['embedding_inflection']) - 10} more")
278
+
279
+ # Show statistics
280
+ print("\n📊 STATISTICS:")
281
+ print("-" * 70)
282
+ print(f" • Extracted: {len(extracted_keywords)} keywords")
283
+ print(f" • Expanded: {len(expanded)} keywords")
284
+ expansion_ratio = (
285
+ len(expanded) / len(extracted_keywords) if extracted_keywords else 0
286
+ )
287
+ print(f" • Ratio: {expansion_ratio:.1f}x expansion")
288
+ print("\n Breakdown by source:")
289
+ print(f" - Original: {len(by_source['original'])}")
290
+ print(f" - Split: {len(by_source['split'])}")
291
+ print(f" - Inflections: {len(by_source['inflection'])}")
292
+ print(f" - Semantic (embeddings): {len(by_source['embedding'])}")
293
+ print(f" - Semantic inflections: {len(by_source['embedding_inflection'])}")
294
+
295
+ # Show expansion info
296
+ info = expander.get_expansion_info()
297
+ if "embedding_vocab_size" in info:
298
+ print("\n🧠 Model Info:")
299
+ print(f" • Vocabulary size: {info['embedding_vocab_size']:,}")
300
+ print(f" • Vector dimensions: {info['embedding_vector_size']}")
301
+
302
+ # Show complete sorted list of all keywords with scores
303
+ if expanded_with_scores:
304
+ print("\n📋 ALL EXPANDED KEYWORDS (sorted by score):")
305
+ print("-" * 70)
306
+ # Sort by score descending
307
+ sorted_keywords = sorted(
308
+ expanded_with_scores, key=lambda x: x.get("score", 0), reverse=True
309
+ )
310
+ # Show top 50
311
+ for i, item in enumerate(sorted_keywords[:50], 1):
312
+ word = item["word"]
313
+ score = item.get("score", 0)
314
+ print(f" {i:3}. {word:25s} (score: {score:.4f})")
315
+ if len(sorted_keywords) > 50:
316
+ print(f"\n ... and {len(sorted_keywords) - 50} more keywords")
317
+ print(
318
+ f" Score range: {sorted_keywords[-1].get('score', 0):.4f} - {sorted_keywords[0].get('score', 0):.4f}"
319
+ )
320
+
321
+ except Exception as e:
322
+ print(f"\n❌ Error in pipeline: {e}", file=sys.stderr)
323
+ import traceback
324
+
325
+ traceback.print_exc()
326
+
327
+ print("\n" + "=" * 70 + "\n")
328
+
329
+ except KeyboardInterrupt:
330
+ print("\n\n👋 Exiting interactive mode. Goodbye!")
331
+ sys.exit(0)
332
+
333
+
334
+ def run_keywords_interactive(
335
+ method: str = "regular", tier: str = "regular", extraction_threshold: float | None = None
336
+ ):
11
337
  """
12
338
  Interactive keyword extraction testing mode.
13
339
 
@@ -15,28 +341,36 @@ def run_keywords_interactive(method: str = "lemminflect", tier: str = "regular")
15
341
  using the specified extraction method.
16
342
 
17
343
  Args:
18
- method: Extraction method ('lemminflect' or 'bert')
344
+ method: Extraction method ('regular' or 'bert')
19
345
  tier: Model tier ('fast', 'regular', or 'max')
346
+ extraction_threshold: Minimum score for extraction (None = no filtering)
20
347
  """
21
348
  print(f"\n{'='*70}")
22
349
  print("🔍 Cicada Interactive Keyword Extraction Test")
23
350
  print(f"{'='*70}")
24
- print(f"Method: {method.upper()}")
351
+
352
+ # Map extraction method to display name
353
+ method_display = "REGULAR (token-based)" if method == "regular" else "BERT"
354
+ print(f"Method: {method_display}")
25
355
  print(f"Tier: {tier}")
356
+ if extraction_threshold is not None:
357
+ print(f"Extraction threshold: {extraction_threshold}")
26
358
  print("\nPaste or type text, then press Ctrl-D (Unix) or Ctrl-Z+Enter (Windows)")
27
359
  print("to extract keywords. Press Ctrl-C to exit.\n")
28
360
  print(f"{'='*70}\n")
29
361
 
30
362
  # Initialize keyword extractor
31
363
  try:
32
- if method == "bert":
33
- from cicada.keybert_extractor import KeyBERTExtractor
364
+ if method == "regular":
365
+ from cicada.extractors.keyword import RegularKeywordExtractor
34
366
 
35
- extractor = KeyBERTExtractor(model_tier=tier, verbose=True)
36
- else:
37
- from cicada.lightweight_keyword_extractor import LightweightKeywordExtractor
367
+ extractor = RegularKeywordExtractor(verbose=True)
368
+ elif method == "bert":
369
+ from cicada.extractors.keybert import KeyBERTExtractor
38
370
 
39
- extractor = LightweightKeywordExtractor(verbose=True)
371
+ extractor = KeyBERTExtractor(verbose=True)
372
+ else:
373
+ raise ValueError(f"Unknown extraction method: {method}")
40
374
  print() # Add newline after initialization
41
375
  except Exception as e:
42
376
  print(f"Error initializing keyword extractor: {e}", file=sys.stderr)
@@ -84,28 +418,61 @@ def run_keywords_interactive(method: str = "lemminflect", tier: str = "regular")
84
418
 
85
419
  # Display top keywords with scores
86
420
  top_keywords = results.get("top_keywords", [])
421
+
422
+ # Apply extraction threshold if specified
423
+ if extraction_threshold is not None and top_keywords:
424
+ filtered_keywords = [
425
+ item
426
+ for item in top_keywords
427
+ if isinstance(item, (list, tuple))
428
+ and len(item) >= 2
429
+ and item[1] >= extraction_threshold
430
+ ]
431
+ if len(filtered_keywords) < len(top_keywords):
432
+ removed_count = len(top_keywords) - len(filtered_keywords)
433
+ print(
434
+ f"\n⚠️ Filtered out {removed_count} keywords below threshold {extraction_threshold}"
435
+ )
436
+ top_keywords = filtered_keywords
437
+
438
+ # Get code identifiers and split words for boost detection
439
+ code_identifiers = results.get("code_identifiers", [])
440
+ code_split_words = results.get("code_split_words", [])
441
+ code_identifiers_lower = [ident.lower() for ident in code_identifiers]
442
+ code_split_words_lower = [word.lower() for word in code_split_words]
443
+
87
444
  if top_keywords and isinstance(top_keywords, list):
88
- print("\n📊 Top Keywords (with scores):")
445
+ print("\n📊 Top Keywords (with weighted scores):")
89
446
  for i, item in enumerate(top_keywords, 1):
90
447
  if isinstance(item, (list, tuple)) and len(item) >= 2:
91
448
  keyword, score = item[0], item[1]
92
- print(f" {i:2}. {keyword:20s} (score: {score:.4f})")
449
+
450
+ # Determine if this keyword was boosted
451
+ boost_label = ""
452
+ if keyword.lower() in code_identifiers_lower:
453
+ boost_label = " [10x boost]"
454
+ elif keyword.lower() in code_split_words_lower:
455
+ boost_label = " [3x boost]"
456
+
457
+ print(f" {i:2}. {keyword:20s} (score: {score:.4f}){boost_label}")
93
458
  else:
94
459
  print(" No keywords extracted.")
95
460
 
96
461
  # Display code identifiers if any
97
- code_identifiers = results.get("code_identifiers")
98
462
  if code_identifiers and isinstance(code_identifiers, list):
99
- print("\n💻 Code Identifiers (10x weight):")
100
- for ident in code_identifiers:
463
+ print(f"\n💻 Code Identifiers ({len(code_identifiers)} found, 10x weight):")
464
+ for ident in code_identifiers[:10]:
101
465
  print(f" • {ident}")
466
+ if len(code_identifiers) > 10:
467
+ print(f" ... and {len(code_identifiers) - 10} more")
102
468
 
103
469
  # Display code split words if any
104
- code_split_words = results.get("code_split_words")
105
470
  if code_split_words and isinstance(code_split_words, list):
106
- print("\n🔤 Code Split Words (3x weight):")
471
+ print(f"\n🔤 Code Split Words ({len(code_split_words)} found, 3x weight):")
107
472
  for word in code_split_words[:10]: # Limit to 10
108
473
  print(f" • {word}")
474
+ if len(code_split_words) > 10:
475
+ print(f" ... and {len(code_split_words) - 10} more")
109
476
 
110
477
  # Display statistics
111
478
  stats = results.get("stats")
cicada/mcp/__init__.py ADDED
@@ -0,0 +1,10 @@
1
+ """Cicada MCP (Model Context Protocol) Server package.
2
+
3
+ This package contains the MCP server implementation for Cicada,
4
+ providing Elixir code search and analysis capabilities via the MCP standard.
5
+ """
6
+
7
+ # Note: Avoid importing server.py at package level to prevent circular dependencies
8
+ # and optional dependency issues. Users should import directly:
9
+ # from cicada.mcp.server import CicadaServer, async_main, main
10
+ # from cicada.mcp.tools import get_tool_definitions
cicada/mcp/entry.py ADDED
@@ -0,0 +1,17 @@
1
+ from cicada.entry_utils import run_cli
2
+
3
+
4
+ def main() -> None:
5
+ """Main entry point for cicada-mcp command."""
6
+ run_cli(
7
+ prog_name="cicada-mcp",
8
+ version_prog_name="cicada-mcp",
9
+ default_on_unknown="server",
10
+ default_on_none="server",
11
+ default_on_unknown_args=["--fast"],
12
+ default_on_none_args=["--fast"],
13
+ )
14
+
15
+
16
+ if __name__ == "__main__":
17
+ main()
@@ -0,0 +1,107 @@
1
+ """
2
+ Shared filtering utilities for MCP tools.
3
+
4
+ Provides reusable filtering functions for various MCP tool operations.
5
+ """
6
+
7
+ from typing import Any
8
+
9
+
10
+ def filter_by_score_threshold(
11
+ results: list[dict[str, Any]], min_score: float
12
+ ) -> list[dict[str, Any]]:
13
+ """
14
+ Filter search results by minimum score threshold.
15
+
16
+ Args:
17
+ results: List of search results with 'score' field
18
+ min_score: Minimum score threshold (0.0 to 1.0)
19
+
20
+ Returns:
21
+ Filtered list of results meeting the score threshold
22
+ """
23
+ if not results or min_score <= 0.0:
24
+ return results
25
+
26
+ return [r for r in results if r.get("score", 0.0) >= min_score]
27
+
28
+
29
+ def is_test_file(file_path: str) -> bool:
30
+ """
31
+ Determine if a file path is a test file.
32
+
33
+ Checks for common test file patterns:
34
+ - Contains 'test' in the path
35
+ - Located in 'test' directory
36
+ - Filename starts with 'test_'
37
+ - Filename ends with '_test.ex' or '_test.exs'
38
+
39
+ Args:
40
+ file_path: Path to check
41
+
42
+ Returns:
43
+ True if the file is a test file
44
+ """
45
+ file_lower = file_path.lower()
46
+
47
+ # Common test file patterns
48
+ patterns = [
49
+ "/test/",
50
+ "\\test\\", # Windows paths
51
+ "test_",
52
+ "_test.ex",
53
+ "_test.exs",
54
+ ]
55
+
56
+ return any(pattern in file_lower for pattern in patterns)
57
+
58
+
59
+ def classify_usage_type(usage_sites: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
60
+ """
61
+ Classify usage sites into test and production categories.
62
+
63
+ Args:
64
+ usage_sites: List of usage sites with 'file' field
65
+
66
+ Returns:
67
+ Dictionary with 'test' and 'production' keys containing categorized sites
68
+ """
69
+ test_sites = []
70
+ production_sites = []
71
+
72
+ for site in usage_sites:
73
+ file_path = site.get("file", "")
74
+ if is_test_file(file_path):
75
+ test_sites.append(site)
76
+ else:
77
+ production_sites.append(site)
78
+
79
+ return {
80
+ "test": test_sites,
81
+ "production": production_sites,
82
+ }
83
+
84
+
85
+ def filter_by_file_type(usage_sites: list[dict[str, Any]], usage_type: str) -> list[dict[str, Any]]:
86
+ """
87
+ Filter usage sites by file type (test vs production).
88
+
89
+ Args:
90
+ usage_sites: List of usage sites with 'file' field
91
+ usage_type: One of 'all', 'test_only', 'production_only'
92
+
93
+ Returns:
94
+ Filtered list of usage sites
95
+ """
96
+ if usage_type == "all":
97
+ return usage_sites
98
+
99
+ classified = classify_usage_type(usage_sites)
100
+
101
+ if usage_type == "test_only":
102
+ return classified["test"]
103
+ elif usage_type == "production_only":
104
+ return classified["production"]
105
+ else:
106
+ # Default to all if invalid type
107
+ return usage_sites