cicada-mcp 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. cicada/_version_hash.py +4 -0
  2. cicada/cli.py +6 -748
  3. cicada/commands.py +1255 -0
  4. cicada/dead_code/__init__.py +1 -0
  5. cicada/{find_dead_code.py → dead_code/finder.py} +2 -1
  6. cicada/dependency_analyzer.py +147 -0
  7. cicada/entry_utils.py +92 -0
  8. cicada/extractors/base.py +9 -9
  9. cicada/extractors/call.py +17 -20
  10. cicada/extractors/common.py +64 -0
  11. cicada/extractors/dependency.py +117 -235
  12. cicada/extractors/doc.py +2 -49
  13. cicada/extractors/function.py +10 -14
  14. cicada/extractors/keybert.py +228 -0
  15. cicada/extractors/keyword.py +191 -0
  16. cicada/extractors/module.py +6 -10
  17. cicada/extractors/spec.py +8 -56
  18. cicada/format/__init__.py +20 -0
  19. cicada/{ascii_art.py → format/ascii_art.py} +1 -1
  20. cicada/format/formatter.py +1145 -0
  21. cicada/git_helper.py +134 -7
  22. cicada/indexer.py +322 -89
  23. cicada/interactive_setup.py +251 -323
  24. cicada/interactive_setup_helpers.py +302 -0
  25. cicada/keyword_expander.py +437 -0
  26. cicada/keyword_search.py +208 -422
  27. cicada/keyword_test.py +383 -16
  28. cicada/mcp/__init__.py +10 -0
  29. cicada/mcp/entry.py +17 -0
  30. cicada/mcp/filter_utils.py +107 -0
  31. cicada/mcp/pattern_utils.py +118 -0
  32. cicada/{mcp_server.py → mcp/server.py} +819 -73
  33. cicada/mcp/tools.py +473 -0
  34. cicada/pr_finder.py +2 -3
  35. cicada/pr_indexer/indexer.py +3 -2
  36. cicada/setup.py +167 -35
  37. cicada/tier.py +225 -0
  38. cicada/utils/__init__.py +9 -2
  39. cicada/utils/fuzzy_match.py +54 -0
  40. cicada/utils/index_utils.py +9 -0
  41. cicada/utils/path_utils.py +18 -0
  42. cicada/utils/text_utils.py +52 -1
  43. cicada/utils/tree_utils.py +47 -0
  44. cicada/version_check.py +99 -0
  45. cicada/watch_manager.py +320 -0
  46. cicada/watcher.py +431 -0
  47. cicada_mcp-0.3.0.dist-info/METADATA +541 -0
  48. cicada_mcp-0.3.0.dist-info/RECORD +70 -0
  49. cicada_mcp-0.3.0.dist-info/entry_points.txt +4 -0
  50. cicada/formatter.py +0 -864
  51. cicada/keybert_extractor.py +0 -286
  52. cicada/lightweight_keyword_extractor.py +0 -290
  53. cicada/mcp_entry.py +0 -683
  54. cicada/mcp_tools.py +0 -291
  55. cicada_mcp-0.2.0.dist-info/METADATA +0 -735
  56. cicada_mcp-0.2.0.dist-info/RECORD +0 -53
  57. cicada_mcp-0.2.0.dist-info/entry_points.txt +0 -4
  58. /cicada/{dead_code_analyzer.py → dead_code/analyzer.py} +0 -0
  59. /cicada/{colors.py → format/colors.py} +0 -0
  60. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/WHEEL +0 -0
  61. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/licenses/LICENSE +0 -0
  62. {cicada_mcp-0.2.0.dist-info → cicada_mcp-0.3.0.dist-info}/top_level.txt +0 -0
@@ -124,6 +124,24 @@ def match_file_path(
124
124
  return bool(target_str.endswith(candidate_str))
125
125
 
126
126
 
127
+ def is_git_repository(path: str | Path) -> bool:
128
+ """
129
+ Check if a path is a git repository.
130
+
131
+ Args:
132
+ path: Path to check
133
+
134
+ Returns:
135
+ True if the path is a git repository, False otherwise
136
+
137
+ Example:
138
+ is_git_repository('/repo') -> True
139
+ is_git_repository('/not/a/repo') -> False
140
+ """
141
+ git_dir = Path(path) / ".git"
142
+ return git_dir.exists()
143
+
144
+
127
145
  def find_repo_root(start_path: str | Path | None = None) -> Path | None:
128
146
  """
129
147
  Find the git repository root starting from a given path.
@@ -2,12 +2,63 @@
2
2
  Text utilities for identifier manipulation and processing.
3
3
 
4
4
  This module provides shared utilities for working with code identifiers,
5
- including splitting camelCase, PascalCase, and snake_case identifiers.
5
+ including splitting camelCase, PascalCase, and snake_case identifiers,
6
+ and extracting code-specific identifiers from text.
6
7
  """
7
8
 
8
9
  import re
9
10
 
10
11
 
12
+ def extract_code_identifiers(text: str) -> tuple[list[str], list[str]]:
13
+ """
14
+ Extract code-specific identifiers and their split words from text.
15
+
16
+ Matches various identifier patterns: camelCase, PascalCase, snake_case,
17
+ and acronyms. Returns both the original identifiers and the individual
18
+ words extracted from those identifiers.
19
+
20
+ Args:
21
+ text: Input text to analyze
22
+
23
+ Returns:
24
+ Tuple of (identifiers, split_words) where:
25
+ - identifiers: original camelCase/PascalCase/snake_case identifiers
26
+ - split_words: individual words extracted from those identifiers
27
+
28
+ Examples:
29
+ >>> identifiers, split_words = extract_code_identifiers("getUserData and HTTPServer")
30
+ >>> "getUserData" in identifiers
31
+ True
32
+ >>> "get" in split_words
33
+ True
34
+ """
35
+ # Match camelCase, snake_case, PascalCase, and mixed patterns
36
+ patterns = [
37
+ r"\b[a-z]+[A-Z][a-zA-Z]*\b", # camelCase (e.g., getUserData)
38
+ r"\b[A-Z]{2,}[a-z]+[a-zA-Z]*\b", # Uppercase prefix + PascalCase
39
+ r"\b[A-Z][a-z]+[A-Z][a-zA-Z]*\b", # PascalCase (e.g., UserController)
40
+ r"\b[a-z]+_[a-z_]+\b", # snake_case (e.g., get_user_data)
41
+ r"\b[A-Z]{2,}\b", # All UPPERCASE (e.g., HTTP, API)
42
+ ]
43
+
44
+ identifiers = []
45
+ for pattern in patterns:
46
+ matches = re.findall(pattern, text)
47
+ identifiers.extend(matches)
48
+
49
+ identifiers = list(set(identifiers))
50
+
51
+ # Split identifiers into individual words
52
+ split_words = []
53
+ for identifier in identifiers:
54
+ split_text = split_camel_snake_case(identifier)
55
+ # Extract individual words (lowercase, length > 1)
56
+ words = [word.lower() for word in split_text.split() if len(word) > 1 and word.isalpha()]
57
+ split_words.extend(words)
58
+
59
+ return identifiers, list(set(split_words))
60
+
61
+
11
62
  def split_identifier(identifier: str, lowercase: bool = True) -> list[str]:
12
63
  """
13
64
  Split an identifier by camelCase, PascalCase, and snake_case.
@@ -0,0 +1,47 @@
1
+ """
2
+ Tree-sitter utilities for extracting and analyzing tree nodes.
3
+
4
+ This module provides shared utilities for working with tree-sitter parse trees,
5
+ including extracting text from nodes and identifying function definitions.
6
+ """
7
+
8
+
9
+ def extract_text_from_node(node, source_code: bytes) -> str:
10
+ """
11
+ Extract text from a tree-sitter node.
12
+
13
+ Args:
14
+ node: The tree-sitter node to extract text from
15
+ source_code: The source code bytes that the node was parsed from
16
+
17
+ Returns:
18
+ The decoded text content of the node
19
+
20
+ Examples:
21
+ >>> text = extract_text_from_node(node, source_code)
22
+ >>> text = extract_text_from_node(child_node, source_code)
23
+ """
24
+ return source_code[node.start_byte : node.end_byte].decode("utf-8")
25
+
26
+
27
+ def is_function_definition_call(call_node, source_code: bytes) -> bool:
28
+ """
29
+ Check if a call node represents a function definition (def, defp, or defmodule).
30
+
31
+ Args:
32
+ call_node: A tree-sitter call node
33
+ source_code: The source code bytes that the node was parsed from
34
+
35
+ Returns:
36
+ True if the call is a function definition, False otherwise
37
+
38
+ Examples:
39
+ >>> if is_function_definition_call(node, source_code):
40
+ ... skip_processing()
41
+ """
42
+ for child in call_node.children:
43
+ if child.type == "identifier":
44
+ target_text = extract_text_from_node(child, source_code)
45
+ if target_text in ["def", "defp", "defmodule"]:
46
+ return True
47
+ return False
cicada/version_check.py CHANGED
@@ -7,6 +7,42 @@ Checks if a newer version of cicada is available on GitHub.
7
7
  import subprocess
8
8
 
9
9
 
10
+ def get_git_tag() -> str | None:
11
+ """
12
+ Get the most recent git tag from build-time generated file.
13
+
14
+ Returns:
15
+ Git tag (e.g., "v0.2.0-rc1"), or None if not available
16
+ """
17
+ try:
18
+ from cicada._version_hash import GIT_TAG
19
+
20
+ if GIT_TAG and GIT_TAG != "unknown":
21
+ return GIT_TAG
22
+ except (ImportError, AttributeError):
23
+ pass
24
+
25
+ return None
26
+
27
+
28
+ def get_git_commit_hash() -> str | None:
29
+ """
30
+ Get the current git commit hash from build-time generated file.
31
+
32
+ Returns:
33
+ Git commit hash (7-char short form), or None if not available
34
+ """
35
+ try:
36
+ from cicada._version_hash import GIT_HASH
37
+
38
+ if GIT_HASH and GIT_HASH != "unknown":
39
+ return GIT_HASH
40
+ except ImportError:
41
+ pass
42
+
43
+ return None
44
+
45
+
10
46
  def get_current_version() -> str:
11
47
  """
12
48
  Get the current version of cicada from pyproject.toml.
@@ -85,6 +121,69 @@ def compare_versions(current: str, latest: str) -> bool:
85
121
  return False
86
122
 
87
123
 
124
+ def get_version_string() -> str:
125
+ """
126
+ Get a formatted version string including git tag and commit hash if available.
127
+
128
+ Returns:
129
+ Version string in format:
130
+ - "0.2.0" (no git info)
131
+ - "0.2.0 (v0.2.0-rc1/abc1234)" (with tag and hash)
132
+ - "0.2.0 (abc1234)" (hash only, no tag)
133
+ """
134
+ version = get_current_version()
135
+ git_tag = get_git_tag()
136
+ commit_hash = get_git_commit_hash()
137
+
138
+ # Build git info string
139
+ git_info_parts = []
140
+ if git_tag:
141
+ git_info_parts.append(git_tag)
142
+ if commit_hash:
143
+ git_info_parts.append(commit_hash)
144
+
145
+ if git_info_parts:
146
+ git_info = "/".join(git_info_parts)
147
+ return f"{version} ({git_info})"
148
+ return version
149
+
150
+
151
+ def extract_version_tag(version_string: str) -> str:
152
+ """
153
+ Extract the pyproject version tag from a version string.
154
+
155
+ Args:
156
+ version_string: Version string like "0.2.2" or "0.2.2 (v0.2.2/0991325)"
157
+
158
+ Returns:
159
+ Just the version tag (e.g., "0.2.2")
160
+ """
161
+ # Split on space and take the first part (before any git info in parentheses)
162
+ return version_string.split()[0] if version_string else ""
163
+
164
+
165
+ def version_mismatch(stored_version: str | None, current_version: str | None) -> bool:
166
+ """
167
+ Check if the stored version differs from the current version.
168
+
169
+ Only compares version tags (pyproject version), ignoring git tags and commit hashes.
170
+
171
+ Args:
172
+ stored_version: Version string from index.json metadata
173
+ current_version: Current cicada version string
174
+
175
+ Returns:
176
+ True if versions differ (or if stored_version is missing), False if they match
177
+ """
178
+ if not stored_version:
179
+ return True
180
+
181
+ stored_tag = extract_version_tag(stored_version)
182
+ current_tag = extract_version_tag(current_version or get_version_string())
183
+
184
+ return stored_tag != current_tag
185
+
186
+
88
187
  def check_for_updates() -> None:
89
188
  """
90
189
  Check if there's a newer version available on GitHub.
@@ -0,0 +1,320 @@
1
+ """
2
+ Watch Process Manager - Manages a linked watch process for automatic reindexing.
3
+
4
+ This module provides functionality to spawn and manage a watch process that runs
5
+ alongside the MCP server, automatically reindexing when files change.
6
+ """
7
+
8
+ import atexit
9
+ import logging
10
+ import os
11
+ import signal
12
+ import subprocess
13
+ import sys
14
+ import time
15
+ from pathlib import Path
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class WatchProcessManager:
21
+ """
22
+ Manages a linked watch process for automatic reindexing.
23
+
24
+ The watch process is spawned as a child process and is automatically
25
+ terminated when the parent process exits.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ repo_path: str | Path,
31
+ tier: str = "regular",
32
+ debounce: float = 2.0,
33
+ register_atexit: bool = True,
34
+ ):
35
+ """
36
+ Initialize the watch process manager.
37
+
38
+ Args:
39
+ repo_path: Path to the repository to watch
40
+ tier: Indexing tier (fast, regular, or max)
41
+ debounce: Debounce interval in seconds
42
+ register_atexit: Whether to register atexit cleanup handler (disable for testing)
43
+ """
44
+ self.repo_path = Path(repo_path).resolve()
45
+ self.tier = tier
46
+ self.debounce = debounce
47
+ self.process: subprocess.Popen[bytes] | None = None
48
+ self._cleanup_registered = False
49
+ self._register_atexit = register_atexit
50
+
51
+ def start(self) -> bool:
52
+ """
53
+ Start the watch process.
54
+
55
+ Returns:
56
+ True if the process was started successfully, False otherwise
57
+ """
58
+ if self.process is not None:
59
+ logger.warning("Watch process is already running")
60
+ return False
61
+
62
+ try:
63
+ # Build the command to run cicada watch
64
+ cmd = [
65
+ sys.executable,
66
+ "-m",
67
+ "cicada.cli",
68
+ "watch",
69
+ str(self.repo_path),
70
+ "--debounce",
71
+ str(self.debounce),
72
+ ]
73
+
74
+ # Add tier flag
75
+ if self.tier == "fast":
76
+ cmd.append("--fast")
77
+ elif self.tier == "max":
78
+ cmd.append("--max")
79
+ else:
80
+ cmd.append("--regular")
81
+
82
+ # Log to stderr so it doesn't interfere with MCP protocol
83
+ print(
84
+ f"Starting watch process for {self.repo_path} (tier={self.tier}, debounce={self.debounce}s)...",
85
+ file=sys.stderr,
86
+ )
87
+
88
+ # Start the watch process
89
+ # Use stdout=sys.stderr to redirect watch output to stderr
90
+ # This prevents it from interfering with the MCP protocol on stdout
91
+ self.process = subprocess.Popen(
92
+ cmd,
93
+ stdout=sys.stderr,
94
+ stderr=sys.stderr,
95
+ # Create new process group so it doesn't receive signals from parent's terminal
96
+ start_new_session=True,
97
+ )
98
+
99
+ # Verify the process actually started and didn't crash immediately
100
+ time.sleep(0.1) # Brief delay to allow process to crash if it's going to
101
+ if self.process.poll() is not None:
102
+ print(
103
+ f"Watch process exited immediately with code {self.process.returncode}",
104
+ file=sys.stderr,
105
+ )
106
+ self.process = None
107
+ return False
108
+
109
+ # Register cleanup handler (unless disabled for testing)
110
+ if self._register_atexit and not self._cleanup_registered:
111
+ atexit.register(self._cleanup)
112
+ self._cleanup_registered = True
113
+
114
+ print(f"Watch process started (PID: {self.process.pid})", file=sys.stderr)
115
+ return True
116
+
117
+ except (FileNotFoundError, PermissionError, OSError) as e:
118
+ # Expected failures - bad config, permissions, or OS-level issues
119
+ logger.error(f"Cannot start watch process: {e}")
120
+ print(f"Error: Cannot start watch process: {e}", file=sys.stderr)
121
+ print("\nPossible causes:", file=sys.stderr)
122
+ print(" - Python interpreter not found", file=sys.stderr)
123
+ print(" - No execute permission", file=sys.stderr)
124
+ print(" - Repository path invalid", file=sys.stderr)
125
+ print(" - System resource issues", file=sys.stderr)
126
+ return False
127
+
128
+ except (ImportError, ModuleNotFoundError) as e:
129
+ # Module missing - installation problem
130
+ logger.error(f"Cicada module import failed: {e}")
131
+ print(f"Error: Cicada installation appears corrupted: {e}", file=sys.stderr)
132
+ print("Try reinstalling: uv tool install --force cicada-mcp", file=sys.stderr)
133
+ raise RuntimeError(f"Corrupted installation: {e}") from e
134
+
135
+ except (MemoryError, SystemError) as e:
136
+ # System-level failures - cannot recover
137
+ logger.critical(f"System error starting watch process: {e}")
138
+ print(f"CRITICAL: System error prevents watch process: {e}", file=sys.stderr)
139
+ raise RuntimeError(f"System failure: {e}") from e
140
+
141
+ except Exception as e:
142
+ # Unknown errors - log details and fail loudly
143
+ logger.exception("Unexpected error starting watch process")
144
+ print(f"ERROR: Unexpected failure starting watch process: {e}", file=sys.stderr)
145
+ raise RuntimeError(f"Unexpected error: {e}") from e
146
+
147
+ def stop(self) -> None:
148
+ """Stop the watch process gracefully."""
149
+ if self.process is None:
150
+ return
151
+
152
+ pid = self.process.pid # Save PID before any cleanup
153
+ try:
154
+ print(f"Stopping watch process (PID: {pid})...", file=sys.stderr)
155
+
156
+ # Try graceful termination first (SIGTERM)
157
+ if self._terminate_process(signal.SIGTERM):
158
+ print("Watch process stopped gracefully", file=sys.stderr)
159
+ else:
160
+ # Force kill if graceful termination timed out (SIGKILL)
161
+ print("Watch process didn't stop gracefully, forcing...", file=sys.stderr)
162
+ self._terminate_process(signal.SIGKILL, force=True)
163
+ print("Watch process killed", file=sys.stderr)
164
+
165
+ except (ProcessLookupError, PermissionError) as e:
166
+ # Expected errors when process is already gone
167
+ logger.warning(f"Process {pid} already terminated: {e}")
168
+ print(f"Warning: Watch process {pid} already terminated", file=sys.stderr)
169
+ # Clear process reference since process is gone
170
+ self.process = None
171
+ except Exception as e:
172
+ # Unexpected errors - log with full context and warn user
173
+ logger.exception(f"Unexpected error stopping watch process {pid}")
174
+ print(f"ERROR: Failed to stop watch process {pid}: {e}", file=sys.stderr)
175
+ print(
176
+ "Warning: Process may still be running. Manual cleanup may be needed.",
177
+ file=sys.stderr,
178
+ )
179
+ raise # Re-raise to propagate error
180
+ else:
181
+ # Only clear process reference if we successfully stopped it (no exceptions)
182
+ self.process = None
183
+
184
+ def _terminate_process(self, sig: signal.Signals, force: bool = False) -> bool:
185
+ """
186
+ Terminate the process using the specified signal.
187
+
188
+ This method handles platform differences (Unix vs Windows) and gracefully
189
+ falls back if process group operations aren't available or fail.
190
+
191
+ Args:
192
+ sig: Signal to send (SIGTERM for graceful, SIGKILL for force)
193
+ force: If True, waits indefinitely; if False, times out after 5 seconds
194
+
195
+ Returns:
196
+ True if process terminated successfully within timeout, False otherwise
197
+
198
+ Raises:
199
+ ValueError: If process is None
200
+ """
201
+ if self.process is None:
202
+ raise ValueError("Cannot terminate a None process")
203
+
204
+ # Send termination signal
205
+ if not self._send_termination_signal(sig):
206
+ return True # Process already gone
207
+
208
+ # Wait for process to exit
209
+ if force:
210
+ # Force kill - wait without timeout
211
+ self.process.wait()
212
+ return True
213
+
214
+ # Graceful termination with timeout
215
+ try:
216
+ self.process.wait(timeout=5)
217
+ return True
218
+ except subprocess.TimeoutExpired:
219
+ return False
220
+
221
+ def _send_termination_signal(self, sig: signal.Signals) -> bool:
222
+ """Send termination signal to process.
223
+
224
+ Returns:
225
+ True if signal was sent, False if process already gone
226
+ """
227
+ assert self.process is not None, "Process should not be None"
228
+ try:
229
+ # Try process group termination on Unix-like systems
230
+ if hasattr(os, "killpg") and hasattr(os, "getpgid"):
231
+ try:
232
+ os.killpg(os.getpgid(self.process.pid), sig)
233
+ except (ProcessLookupError, PermissionError, AttributeError):
234
+ # Fall back to direct process termination
235
+ self._send_direct_signal(sig)
236
+ else:
237
+ # Windows or platforms without killpg - use direct termination
238
+ self._send_direct_signal(sig)
239
+ return True
240
+ except (ProcessLookupError, PermissionError) as e:
241
+ # Process already gone or cannot signal - consider success
242
+ logger.info(f"Process {self.process.pid} termination: {e}")
243
+ return False
244
+
245
+ def _send_direct_signal(self, sig: signal.Signals) -> None:
246
+ """Send signal directly to process."""
247
+ assert self.process is not None, "Process should not be None"
248
+ if sig == signal.SIGTERM:
249
+ self.process.terminate()
250
+ else:
251
+ self.process.kill()
252
+
253
+ def _cleanup(self) -> None:
254
+ """Cleanup handler registered with atexit."""
255
+ try:
256
+ self.stop()
257
+ except Exception as e:
258
+ # Don't re-raise during atexit - just log the error and let process exit
259
+ logger.exception("Error during atexit cleanup")
260
+ print(f"Warning: Error stopping watch process during cleanup: {e}", file=sys.stderr)
261
+ # Don't re-raise - let process exit cleanly
262
+
263
+ def is_running(self) -> bool:
264
+ """
265
+ Check if the watch process is running.
266
+
267
+ Returns:
268
+ True if the process is running, False otherwise
269
+ """
270
+ if self.process is None:
271
+ return False
272
+
273
+ # Check if process is still running
274
+ return self.process.poll() is None
275
+
276
+
277
+ # Global watch manager instance
278
+ _watch_manager: WatchProcessManager | None = None
279
+
280
+
281
+ def get_watch_manager() -> WatchProcessManager | None:
282
+ """Get the global watch manager instance."""
283
+ return _watch_manager
284
+
285
+
286
+ def set_watch_manager(manager: WatchProcessManager | None) -> None:
287
+ """Set the global watch manager instance."""
288
+ global _watch_manager
289
+ _watch_manager = manager
290
+
291
+
292
+ def start_watch_process(
293
+ repo_path: str | Path, tier: str = "regular", debounce: float = 2.0
294
+ ) -> bool:
295
+ """
296
+ Start a watch process for the given repository.
297
+
298
+ This is a convenience function that creates and starts a WatchProcessManager.
299
+
300
+ Args:
301
+ repo_path: Path to the repository to watch
302
+ tier: Indexing tier (fast, regular, or max)
303
+ debounce: Debounce interval in seconds
304
+
305
+ Returns:
306
+ True if started successfully, False otherwise
307
+ """
308
+ manager = WatchProcessManager(repo_path, tier, debounce)
309
+ if manager.start():
310
+ set_watch_manager(manager)
311
+ return True
312
+ return False
313
+
314
+
315
+ def stop_watch_process() -> None:
316
+ """Stop the global watch process if running."""
317
+ manager = get_watch_manager()
318
+ if manager is not None:
319
+ manager.stop()
320
+ set_watch_manager(None)