@embedder/embedder 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (100) hide show
  1. package/LICENSE +36 -0
  2. package/bundle/embedder.js +600 -0
  3. package/bundle/gdb-debugger-python/gdb_bridge.py +392 -0
  4. package/bundle/gdb-debugger-python/requirements.txt +1 -0
  5. package/bundle/postinstall-for-users.js +497 -0
  6. package/bundle/prebuilt/darwin-arm64/node-pty.node +0 -0
  7. package/bundle/prebuilt/darwin-arm64/serialport.node +0 -0
  8. package/bundle/prebuilt/darwin-x64/node-pty.node +0 -0
  9. package/bundle/prebuilt/darwin-x64/serialport.node +0 -0
  10. package/bundle/prebuilt/js/LICENSE +21 -0
  11. package/bundle/prebuilt/js/README.md +16 -0
  12. package/bundle/prebuilt/js/dist/index.d.ts +180 -0
  13. package/bundle/prebuilt/js/dist/index.js +380 -0
  14. package/bundle/prebuilt/js/package.json +30 -0
  15. package/bundle/prebuilt/linux-x64/node-pty.node +0 -0
  16. package/bundle/prebuilt/linux-x64/serialport.node +0 -0
  17. package/bundle/prebuilt/win32-x64/node-pty.node +0 -0
  18. package/bundle/prebuilt/win32-x64/serialport.node +0 -0
  19. package/bundle/repomap-bridge.js +6 -0
  20. package/bundle/repomap-python/.repomap.tags.cache.v1/16/f1/46475231336389d911f729227da4.val +0 -0
  21. package/bundle/repomap-python/.repomap.tags.cache.v1/4b/ed/71b2bc3ff2b4ae3127312ffb93b6.val +0 -0
  22. package/bundle/repomap-python/.repomap.tags.cache.v1/9a/a5/4cd70a20713e3b8fb1e15ada7795.val +0 -0
  23. package/bundle/repomap-python/.repomap.tags.cache.v1/a2/bd/43da7881d5016e770db1c6facb21.val +0 -0
  24. package/bundle/repomap-python/.repomap.tags.cache.v1/a9/9a/8d9d8580960d3db4249ad5534c93.val +0 -0
  25. package/bundle/repomap-python/.repomap.tags.cache.v1/c9/b3/539c4fa477faa91028d0911cbd93.val +0 -0
  26. package/bundle/repomap-python/.repomap.tags.cache.v1/cache.db +0 -0
  27. package/bundle/repomap-python/.repomap.tags.cache.v1/d2/7f/23d90301a6beae01ee51643cbdec.val +0 -0
  28. package/bundle/repomap-python/.repomap.tags.cache.v1/d4/03/91f221322e309efe044a99fd3b12.val +0 -0
  29. package/bundle/repomap-python/__pycache__/importance.cpython-310.pyc +0 -0
  30. package/bundle/repomap-python/__pycache__/repomap_class.cpython-310.pyc +0 -0
  31. package/bundle/repomap-python/__pycache__/scm.cpython-310.pyc +0 -0
  32. package/bundle/repomap-python/__pycache__/utils.cpython-310.pyc +0 -0
  33. package/bundle/repomap-python/importance.py +58 -0
  34. package/bundle/repomap-python/queries/repomap_server.py +577 -0
  35. package/bundle/repomap-python/queries/tree-sitter-language-pack/README.md +9 -0
  36. package/bundle/repomap-python/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
  37. package/bundle/repomap-python/queries/tree-sitter-language-pack/c-tags.scm +9 -0
  38. package/bundle/repomap-python/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
  39. package/bundle/repomap-python/queries/tree-sitter-language-pack/commonlisp-tags.scm +122 -0
  40. package/bundle/repomap-python/queries/tree-sitter-language-pack/cpp-tags.scm +15 -0
  41. package/bundle/repomap-python/queries/tree-sitter-language-pack/csharp-tags.scm +26 -0
  42. package/bundle/repomap-python/queries/tree-sitter-language-pack/d-tags.scm +26 -0
  43. package/bundle/repomap-python/queries/tree-sitter-language-pack/dart-tags.scm +92 -0
  44. package/bundle/repomap-python/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
  45. package/bundle/repomap-python/queries/tree-sitter-language-pack/elixir-tags.scm +54 -0
  46. package/bundle/repomap-python/queries/tree-sitter-language-pack/elm-tags.scm +19 -0
  47. package/bundle/repomap-python/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
  48. package/bundle/repomap-python/queries/tree-sitter-language-pack/go-tags.scm +42 -0
  49. package/bundle/repomap-python/queries/tree-sitter-language-pack/java-tags.scm +20 -0
  50. package/bundle/repomap-python/queries/tree-sitter-language-pack/javascript-tags.scm +88 -0
  51. package/bundle/repomap-python/queries/tree-sitter-language-pack/lua-tags.scm +34 -0
  52. package/bundle/repomap-python/queries/tree-sitter-language-pack/ocaml-tags.scm +115 -0
  53. package/bundle/repomap-python/queries/tree-sitter-language-pack/ocaml_interface-tags.scm +98 -0
  54. package/bundle/repomap-python/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
  55. package/bundle/repomap-python/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
  56. package/bundle/repomap-python/queries/tree-sitter-language-pack/python-tags.scm +14 -0
  57. package/bundle/repomap-python/queries/tree-sitter-language-pack/r-tags.scm +21 -0
  58. package/bundle/repomap-python/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
  59. package/bundle/repomap-python/queries/tree-sitter-language-pack/ruby-tags.scm +64 -0
  60. package/bundle/repomap-python/queries/tree-sitter-language-pack/rust-tags.scm +60 -0
  61. package/bundle/repomap-python/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
  62. package/bundle/repomap-python/queries/tree-sitter-language-pack/swift-tags.scm +51 -0
  63. package/bundle/repomap-python/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
  64. package/bundle/repomap-python/queries/tree-sitter-languages/README.md +24 -0
  65. package/bundle/repomap-python/queries/tree-sitter-languages/c-tags.scm +9 -0
  66. package/bundle/repomap-python/queries/tree-sitter-languages/c_sharp-tags.scm +46 -0
  67. package/bundle/repomap-python/queries/tree-sitter-languages/cpp-tags.scm +15 -0
  68. package/bundle/repomap-python/queries/tree-sitter-languages/dart-tags.scm +91 -0
  69. package/bundle/repomap-python/queries/tree-sitter-languages/elisp-tags.scm +8 -0
  70. package/bundle/repomap-python/queries/tree-sitter-languages/elixir-tags.scm +54 -0
  71. package/bundle/repomap-python/queries/tree-sitter-languages/elm-tags.scm +19 -0
  72. package/bundle/repomap-python/queries/tree-sitter-languages/go-tags.scm +30 -0
  73. package/bundle/repomap-python/queries/tree-sitter-languages/hcl-tags.scm +77 -0
  74. package/bundle/repomap-python/queries/tree-sitter-languages/java-tags.scm +20 -0
  75. package/bundle/repomap-python/queries/tree-sitter-languages/javascript-tags.scm +88 -0
  76. package/bundle/repomap-python/queries/tree-sitter-languages/kotlin-tags.scm +27 -0
  77. package/bundle/repomap-python/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
  78. package/bundle/repomap-python/queries/tree-sitter-languages/ocaml_interface-tags.scm +98 -0
  79. package/bundle/repomap-python/queries/tree-sitter-languages/php-tags.scm +26 -0
  80. package/bundle/repomap-python/queries/tree-sitter-languages/python-tags.scm +12 -0
  81. package/bundle/repomap-python/queries/tree-sitter-languages/ql-tags.scm +26 -0
  82. package/bundle/repomap-python/queries/tree-sitter-languages/ruby-tags.scm +64 -0
  83. package/bundle/repomap-python/queries/tree-sitter-languages/rust-tags.scm +60 -0
  84. package/bundle/repomap-python/queries/tree-sitter-languages/scala-tags.scm +65 -0
  85. package/bundle/repomap-python/queries/tree-sitter-languages/typescript-tags.scm +41 -0
  86. package/bundle/repomap-python/repomap.py +229 -0
  87. package/bundle/repomap-python/repomap_bridge.py +234 -0
  88. package/bundle/repomap-python/repomap_class.py +637 -0
  89. package/bundle/repomap-python/repomap_server.py +585 -0
  90. package/bundle/repomap-python/requirements.txt +7 -0
  91. package/bundle/repomap-python/scm.py +59 -0
  92. package/bundle/repomap-python/utils.py +58 -0
  93. package/bundle/sandbox-macos-permissive-closed.sb +26 -0
  94. package/bundle/sandbox-macos-permissive-open.sb +19 -0
  95. package/bundle/sandbox-macos-permissive-proxied.sb +31 -0
  96. package/bundle/sandbox-macos-restrictive-closed.sb +87 -0
  97. package/bundle/sandbox-macos-restrictive-open.sb +90 -0
  98. package/bundle/sandbox-macos-restrictive-proxied.sb +92 -0
  99. package/package.json +97 -0
  100. package/postinstall.js +42 -0
@@ -0,0 +1,58 @@
1
+ """
2
+ Important file filtering for RepoMap.
3
+ """
4
+
5
+ import os
6
+ from typing import List
7
+
8
+ IMPORTANT_FILENAMES = {
9
+ "README.md", "README.txt", "readme.md", "README.rst", "README",
10
+ "requirements.txt", "Pipfile", "pyproject.toml", "setup.py", "setup.cfg",
11
+ "package.json", "yarn.lock", "package-lock.json", "npm-shrinkwrap.json",
12
+ "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
13
+ ".gitignore", ".gitattributes", ".dockerignore",
14
+ "Makefile", "makefile", "CMakeLists.txt",
15
+ "LICENSE", "LICENSE.txt", "LICENSE.md", "COPYING",
16
+ "CHANGELOG.md", "CHANGELOG.txt", "HISTORY.md",
17
+ "CONTRIBUTING.md", "CODE_OF_CONDUCT.md",
18
+ ".env", ".env.example", ".env.local",
19
+ "tox.ini", "pytest.ini", ".pytest.ini",
20
+ ".flake8", ".pylintrc", "mypy.ini",
21
+ "go.mod", "go.sum", "Cargo.toml", "Cargo.lock",
22
+ "pom.xml", "build.gradle", "build.gradle.kts",
23
+ "composer.json", "composer.lock",
24
+ "Gemfile", "Gemfile.lock",
25
+ }
26
+
27
+ IMPORTANT_DIR_PATTERNS = {
28
+ os.path.normpath(".github/workflows"): lambda fname: fname.endswith((".yml", ".yaml")),
29
+ os.path.normpath(".github"): lambda fname: fname.endswith((".md", ".yml", ".yaml")),
30
+ os.path.normpath("docs"): lambda fname: fname.endswith((".md", ".rst", ".txt")),
31
+ }
32
+
33
+
34
+ def is_important(rel_file_path: str) -> bool:
35
+ """Check if a file is considered important."""
36
+ normalized_path = os.path.normpath(rel_file_path)
37
+ file_name = os.path.basename(normalized_path)
38
+ dir_name = os.path.dirname(normalized_path)
39
+
40
+ # Check specific directory patterns
41
+ for important_dir, checker_func in IMPORTANT_DIR_PATTERNS.items():
42
+ if dir_name == important_dir and checker_func(file_name):
43
+ return True
44
+
45
+ # Check if the full normalized path is important
46
+ if normalized_path in IMPORTANT_FILENAMES:
47
+ return True
48
+
49
+ # Check if just the basename is important
50
+ if file_name in IMPORTANT_FILENAMES:
51
+ return True
52
+
53
+ return False
54
+
55
+
56
+ def filter_important_files(file_paths: List[str]) -> List[str]:
57
+ """Filter list to only include important files."""
58
+ return [path for path in file_paths if is_important(path)]
@@ -0,0 +1,577 @@
1
+ import asyncio
2
+ import json
3
+ import os
4
+ import sys
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import List, Optional, Dict, Any, Set
8
+ import dataclasses
9
+
10
+ # Add startup logging before any imports that might fail
11
+ startup_log = logging.getLogger('startup')
12
+
13
+ # Check if startup logs should be suppressed (build-time configuration)
14
+ suppress_startup_logs = os.environ.get('SUPPRESS_STARTUP_LOGS', 'true').lower() == 'true'
15
+
16
+ if not suppress_startup_logs:
17
+ startup_handler = logging.StreamHandler(sys.stderr)
18
+ startup_handler.setLevel(logging.INFO)
19
+ startup_formatter = logging.Formatter('🐍 STARTUP: %(message)s')
20
+ startup_handler.setFormatter(startup_formatter)
21
+ startup_log.addHandler(startup_handler)
22
+ startup_log.setLevel(logging.INFO)
23
+ else:
24
+ # Create a no-op logger in suppressed mode - only log errors
25
+ startup_log.addHandler(logging.NullHandler())
26
+ startup_log.setLevel(logging.ERROR)
27
+
28
+ startup_log.info("Starting RepoMapper server initialization...")
29
+ startup_log.info(f"Python version: {sys.version}")
30
+ startup_log.info(f"Working directory: {os.getcwd()}")
31
+ startup_log.info(f"Script path: {__file__}")
32
+ startup_log.info(f"Python path: {sys.path[:3]}...") # Show first few entries
33
+
34
+ try:
35
+ startup_log.info("Importing FastMCP...")
36
+ from fastmcp import FastMCP, settings
37
+ startup_log.info("✅ FastMCP imported successfully")
38
+ except ImportError as e:
39
+ startup_log.error(f"❌ Failed to import FastMCP: {e}")
40
+ startup_log.error("💡 Try: pip install fastmcp")
41
+ sys.exit(1)
42
+
43
+ try:
44
+ startup_log.info("Importing RepoMapper dependencies...")
45
+ from repomap_class import RepoMap
46
+ from utils import count_tokens, read_text
47
+ from scm import get_scm_fname
48
+ from importance import filter_important_files
49
+ startup_log.info("✅ All RepoMapper dependencies imported successfully")
50
+ except ImportError as e:
51
+ startup_log.error(f"❌ Failed to import RepoMapper dependencies: {e}")
52
+ startup_log.error(f"💡 Missing dependency: {str(e).split('No module named')[-1] if 'No module named' in str(e) else 'unknown'}")
53
+ startup_log.error(f"Current working directory: {os.getcwd()}")
54
+ startup_log.error(f"Files in current directory: {list(os.listdir('.'))[:10]}")
55
+ sys.exit(1)
56
+
57
+
58
+ # Helper function from your CLI, useful to have here
59
+ def find_src_files(directory: str) -> List[str]:
60
+ if not os.path.isdir(directory):
61
+ return [directory] if os.path.isfile(directory) else []
62
+ src_files = []
63
+ for r, d, f_list in os.walk(directory):
64
+ d[:] = [d_name for d_name in d if not d_name.startswith('.') and d_name not in {'node_modules', '__pycache__', 'venv', 'env'}]
65
+ for f in f_list:
66
+ if not f.startswith('.'):
67
+ src_files.append(os.path.join(r, f))
68
+ return src_files
69
+
70
+ # Configure logging - show info and above
71
+ root_logger = logging.getLogger()
72
+ root_logger.setLevel(logging.INFO)
73
+
74
+ # Create console handler for info and above (explicitly use stderr)
75
+ console_handler = logging.StreamHandler(sys.stderr)
76
+ console_handler.setLevel(logging.INFO)
77
+ console_formatter = logging.Formatter('%(levelname)-5s %(asctime)-15s %(name)s:%(funcName)s:%(lineno)d - %(message)s')
78
+ console_handler.setFormatter(console_formatter)
79
+ root_logger.addHandler(console_handler)
80
+
81
+ # Suppress FastMCP logs
82
+ fastmcp_logger = logging.getLogger('fastmcp')
83
+ fastmcp_logger.setLevel(logging.ERROR)
84
+ # Suppress server startup message
85
+ server_logger = logging.getLogger('fastmcp.server')
86
+ server_logger.setLevel(logging.ERROR)
87
+ # Suppress all mcp-related loggers
88
+ mcp_logger = logging.getLogger('mcp')
89
+ mcp_logger.setLevel(logging.ERROR)
90
+ # Suppress root FastMCP server messages
91
+ logging.getLogger('server').setLevel(logging.ERROR)
92
+ # Additional suppression for common MCP logger patterns
93
+ for logger_name in ['fastmcp.transport', 'fastmcp.stdio', 'mcp.server', 'mcp.transport']:
94
+ logging.getLogger(logger_name).setLevel(logging.ERROR)
95
+
96
+ log = logging.getLogger(__name__)
97
+
98
+ # Set global stateless_http setting
99
+ settings.stateless_http = True
100
+
101
+ # Create MCP server
102
+ mcp = FastMCP("RepoMapServer")
103
+
104
+ async def _repo_map_impl(
105
+ project_root: str,
106
+ chat_files: Optional[List[str]] = None,
107
+ other_files: Optional[List[str]] = None,
108
+ token_limit: Any = 8192, # Accept any type to handle empty strings
109
+ exclude_unranked: bool = False,
110
+ force_refresh: bool = False,
111
+ mentioned_files: Optional[List[str]] = None,
112
+ mentioned_idents: Optional[List[str]] = None,
113
+ verbose: bool = False,
114
+ max_context_window: Optional[int] = None,
115
+ ) -> Dict[str, Any]:
116
+ """Generate a repository map for the specified files, providing a list of function prototypes and variables for files as well as relevant related
117
+ files. Provide filenames relative to the project_root. In addition to the files provided, relevant related files will also be included with a
118
+ very small ranking boost.
119
+
120
+ :param project_root: Root directory of the project to search. (must be an absolute path!)
121
+ :param chat_files: A list of file paths that are currently in the chat context. These files will receive the highest ranking.
122
+ :param other_files: A list of other relevant file paths in the repository to consider for the map. They receive a lower ranking boost than mentioned_files and chat_files.
123
+ :param token_limit: The maximum number of tokens the generated repository map should occupy. Defaults to 8192.
124
+ :param exclude_unranked: If True, files with a PageRank of 0.0 will be excluded from the map. Defaults to False.
125
+ :param force_refresh: If True, forces a refresh of the repository map cache. Defaults to False.
126
+ :param mentioned_files: Optional list of file paths explicitly mentioned in the conversation and receive a mid-level ranking boost.
127
+ :param mentioned_idents: Optional list of identifiers explicitly mentioned in the conversation, to boost their ranking.
128
+ :param verbose: If True, enables verbose logging for the RepoMap generation process. Defaults to False.
129
+ :param max_context_window: Optional maximum context window size for token calculation, used to adjust map token limit when no chat files are provided.
130
+ :returns: A dictionary containing:
131
+ - 'map': the generated repository map string
132
+ - 'report': a dictionary with file processing details including:
133
+ - 'included': list of processed files
134
+ - 'excluded': dictionary of excluded files with reasons
135
+ - 'definition_matches': count of matched definitions
136
+ - 'reference_matches': count of matched references
137
+ - 'total_files_considered': total files processed
138
+ Or an 'error' key if an error occurred.
139
+ """
140
+ if not os.path.isdir(project_root):
141
+ return {"error": f"Project root directory not found: {project_root}"}
142
+
143
+ # 1. Handle and validate parameters
144
+ # Convert token_limit to integer with fallback
145
+ try:
146
+ token_limit = int(token_limit) if token_limit else 8192
147
+ except (TypeError, ValueError):
148
+ token_limit = 8192
149
+
150
+ # Ensure token_limit is positive
151
+ if token_limit <= 0:
152
+ token_limit = 8192
153
+
154
+ chat_files_list = chat_files or []
155
+ mentioned_fnames_set = set(mentioned_files) if mentioned_files else None
156
+ mentioned_idents_set = set(mentioned_idents) if mentioned_idents else None
157
+
158
+ # 2. If a specific list of other_files isn't provided, scan the whole root directory.
159
+ # This should happen regardless of whether chat_files are present.
160
+ effective_other_files = []
161
+ if other_files:
162
+ effective_other_files = other_files
163
+ else:
164
+ log.info("No other_files provided, scanning root directory for context...")
165
+ effective_other_files = find_src_files(project_root)
166
+
167
+ # Add a print statement for debugging so you can see what the tool is working with.
168
+ log.debug(f"Chat files: {chat_files_list}")
169
+ log.debug(f"Effective other_files count: {len(effective_other_files)}")
170
+
171
+ # If after all that we have no files, we can exit early.
172
+ if not chat_files_list and not effective_other_files:
173
+ log.info("No files to process.")
174
+ return {"map": "No files found to generate a map."}
175
+
176
+ # 3. Resolve paths relative to project root
177
+ root_path = Path(project_root).resolve()
178
+ abs_chat_files = [str(root_path / f) for f in chat_files_list]
179
+ abs_other_files = [str(root_path / f) for f in effective_other_files]
180
+
181
+ # Remove any chat files from the other_files list to avoid duplication
182
+ abs_chat_files_set = set(abs_chat_files)
183
+ abs_other_files = [f for f in abs_other_files if f not in abs_chat_files_set]
184
+
185
+ # 4. Instantiate and run RepoMap
186
+ try:
187
+ repo_mapper = RepoMap(
188
+ map_tokens=token_limit,
189
+ root=str(root_path),
190
+ token_counter_func=lambda text: count_tokens(text, "gpt-4"),
191
+ file_reader_func=read_text,
192
+ output_handler_funcs={'info': log.info, 'warning': log.warning, 'error': log.error},
193
+ verbose=verbose,
194
+ exclude_unranked=exclude_unranked,
195
+ max_context_window=max_context_window
196
+ )
197
+ except Exception as e:
198
+ log.exception(f"Failed to initialize RepoMap for project '{project_root}': {e}")
199
+ return {"error": f"Failed to initialize RepoMap: {str(e)}"}
200
+
201
+ try:
202
+ map_content, file_report = await asyncio.to_thread(
203
+ repo_mapper.get_repo_map,
204
+ chat_files=abs_chat_files,
205
+ other_files=abs_other_files,
206
+ mentioned_fnames=mentioned_fnames_set,
207
+ mentioned_idents=mentioned_idents_set,
208
+ force_refresh=force_refresh
209
+ )
210
+
211
+ # Convert FileReport to dictionary for JSON serialization
212
+ report_dict = {
213
+ "excluded": file_report.excluded,
214
+ "definition_matches": file_report.definition_matches,
215
+ "reference_matches": file_report.reference_matches,
216
+ "total_files_considered": file_report.total_files_considered
217
+ }
218
+
219
+ return {
220
+ "map": map_content or "No repository map could be generated.",
221
+ "report": report_dict
222
+ }
223
+ except Exception as e:
224
+ log.exception(f"Error generating repository map for project '{project_root}': {e}")
225
+ return {"error": f"Error generating repository map: {str(e)}"}
226
+
227
+ @mcp.tool()
228
+ async def repo_map(
229
+ project_root: str,
230
+ chat_files: Optional[List[str]] = None,
231
+ other_files: Optional[List[str]] = None,
232
+ token_limit: Any = 8192, # Accept any type to handle empty strings
233
+ exclude_unranked: bool = False,
234
+ force_refresh: bool = False,
235
+ mentioned_files: Optional[List[str]] = None,
236
+ mentioned_idents: Optional[List[str]] = None,
237
+ verbose: bool = False,
238
+ max_context_window: Optional[int] = None,
239
+ ) -> Dict[str, Any]:
240
+ """Generate a repository map for a given project root.
241
+
242
+ Returns:
243
+ Dictionary containing map content and generation report
244
+ """
245
+ return await _repo_map_impl(project_root, chat_files, other_files, token_limit, exclude_unranked, force_refresh, mentioned_files, mentioned_idents, verbose, max_context_window)
246
+
247
+ async def _search_identifiers_impl(
248
+ project_root: str,
249
+ query: str,
250
+ max_results: int = 50,
251
+ context_lines: int = 2,
252
+ include_definitions: bool = True,
253
+ include_references: bool = True
254
+ ) -> Dict[str, Any]:
255
+ """Implementation of search_identifiers without MCP decoration for direct calling.
256
+
257
+ Args:
258
+ project_root: Root directory of the project to search. (must be an absolute path!)
259
+ query: Search query (identifier name)
260
+ max_results: Maximum number of results to return
261
+ context_lines: Number of lines of context to show
262
+ include_definitions: Whether to include definition occurrences
263
+ include_references: Whether to include reference occurrences
264
+
265
+ Returns:
266
+ Dictionary containing search results or error message
267
+ """
268
+ if not os.path.isdir(project_root):
269
+ return {"error": f"Project root directory not found: {project_root}"}
270
+
271
+ try:
272
+ # Initialize RepoMap with search-specific settings
273
+ repo_map = RepoMap(
274
+ root=project_root,
275
+ token_counter_func=lambda text: count_tokens(text, "gpt-4"),
276
+ file_reader_func=read_text,
277
+ output_handler_funcs={'info': log.info, 'warning': log.warning, 'error': log.error},
278
+ verbose=False,
279
+ exclude_unranked=True
280
+ )
281
+
282
+ # Find all source files in the project
283
+ all_files = find_src_files(project_root)
284
+
285
+ # Get all tags (definitions and references) for all files
286
+ all_tags = []
287
+ for file_path in all_files:
288
+ rel_path = str(Path(file_path).relative_to(project_root))
289
+ tags = repo_map.get_tags(file_path, rel_path)
290
+ all_tags.extend(tags)
291
+
292
+ # Filter tags based on search query and options
293
+ matching_tags = []
294
+ query_lower = query.lower()
295
+
296
+ for tag in all_tags:
297
+ if query_lower in tag.name.lower():
298
+ if (tag.kind == "def" and include_definitions) or \
299
+ (tag.kind == "ref" and include_references):
300
+ matching_tags.append(tag)
301
+
302
+ # Sort by relevance (definitions first, then references)
303
+ matching_tags.sort(key=lambda x: (x.kind != "def", x.name.lower().find(query_lower)))
304
+
305
+ # Limit results
306
+ matching_tags = matching_tags[:max_results]
307
+
308
+ # Format results with context
309
+ results = []
310
+ for tag in matching_tags:
311
+ file_path = str(Path(project_root) / tag.rel_fname)
312
+
313
+ # Calculate context range based on context_lines parameter
314
+ start_line = max(1, tag.line - context_lines)
315
+ end_line = tag.line + context_lines
316
+ context_range = list(range(start_line, end_line + 1))
317
+
318
+ context = repo_map.render_tree(
319
+ file_path,
320
+ tag.rel_fname,
321
+ context_range
322
+ )
323
+
324
+ if context:
325
+ results.append({
326
+ "file": tag.rel_fname,
327
+ "line": tag.line,
328
+ "name": tag.name,
329
+ "kind": tag.kind,
330
+ "context": context
331
+ })
332
+
333
+ return {"results": results}
334
+
335
+ except Exception as e:
336
+ log.exception(f"Error searching identifiers in project '{project_root}': {e}")
337
+ return {"error": f"Error searching identifiers: {str(e)}"}
338
+
339
+ @mcp.tool()
340
+ async def search_identifiers(
341
+ project_root: str,
342
+ query: str,
343
+ max_results: int = 50,
344
+ context_lines: int = 2,
345
+ include_definitions: bool = True,
346
+ include_references: bool = True
347
+ ) -> Dict[str, Any]:
348
+ """Search for identifiers in code files. Get back a list of matching identifiers with their file, line number, and context.
349
+ When searching, just use the identifier name without any special characters, prefixes or suffixes. The search is
350
+ case-insensitive.
351
+
352
+ Args:
353
+ project_root: Root directory of the project to search. (must be an absolute path!)
354
+ query: Search query (identifier name)
355
+ max_results: Maximum number of results to return
356
+ context_lines: Number of lines of context to show
357
+ include_definitions: Whether to include definition occurrences
358
+ include_references: Whether to include reference occurrences
359
+
360
+ Returns:
361
+ Dictionary containing search results or error message
362
+ """
363
+ return await _search_identifiers_impl(project_root, query, max_results, context_lines, include_definitions, include_references)
364
+
365
+ async def _warm_up_cache_impl(project_root: str) -> Dict[str, Any]:
366
+ """Implementation of warm_up_cache without MCP decoration for direct calling.
367
+
368
+ Performs background indexing of the project to populate the cache.
369
+ This should be called when the application starts to avoid delays
370
+ when the user first runs search queries.
371
+
372
+ Args:
373
+ project_root: Root directory of the project to index
374
+
375
+ Returns:
376
+ Dictionary containing status or error message
377
+ """
378
+ if not os.path.isdir(project_root):
379
+ return {"error": f"Project root directory not found: {project_root}"}
380
+
381
+ try:
382
+ log.info(f"Starting background cache warm-up for project: {project_root}")
383
+
384
+ # Initialize RepoMap with indexing-specific settings
385
+ repo_map = RepoMap(
386
+ root=project_root,
387
+ token_counter_func=lambda text: count_tokens(text, "gpt-4"),
388
+ file_reader_func=read_text,
389
+ output_handler_funcs={'info': log.info, 'warning': log.warning, 'error': log.error},
390
+ verbose=False,
391
+ exclude_unranked=True
392
+ )
393
+
394
+ # Find all source files in the project
395
+ all_files = find_src_files(project_root)
396
+ log.info(f"Found {len(all_files)} files to index")
397
+
398
+ # Process files in batches to avoid overwhelming the system
399
+ batch_size = 50
400
+ indexed_files = 0
401
+
402
+ for i in range(0, len(all_files), batch_size):
403
+ batch_files = all_files[i:i + batch_size]
404
+
405
+ for file_path in batch_files:
406
+ try:
407
+ rel_path = str(Path(file_path).relative_to(project_root))
408
+ # This call to get_tags will populate the cache for this file
409
+ tags = repo_map.get_tags(file_path, rel_path)
410
+ indexed_files += 1
411
+
412
+ # Log progress every 100 files
413
+ if indexed_files % 100 == 0:
414
+ log.info(f"Cache warm-up progress: {indexed_files}/{len(all_files)} files indexed")
415
+
416
+ except Exception as file_error:
417
+ # Log individual file errors but continue processing
418
+ log.warning(f"Failed to index file {file_path}: {file_error}")
419
+
420
+ # Small delay between batches to be nice to the system
421
+ await asyncio.sleep(0.01)
422
+
423
+ log.info(f"Cache warm-up completed. Indexed {indexed_files} files.")
424
+
425
+ return {
426
+ "status": "completed",
427
+ "files_indexed": indexed_files,
428
+ "total_files": len(all_files)
429
+ }
430
+
431
+ except Exception as e:
432
+ log.exception(f"Error during cache warm-up for project '{project_root}': {e}")
433
+ return {"error": f"Error during cache warm-up: {str(e)}"}
434
+
435
+ @mcp.tool()
436
+ async def warm_up_cache(project_root: str) -> Dict[str, Any]:
437
+ """Warm up the cache by indexing all files in the project.
438
+
439
+ This is a background operation that should be called when the application
440
+ starts to ensure that subsequent search queries are fast.
441
+
442
+ Args:
443
+ project_root: Root directory of the project to index (must be an absolute path!)
444
+
445
+ Returns:
446
+ Dictionary containing status information or error message
447
+ """
448
+ return await _warm_up_cache_impl(project_root)
449
+
450
+ def handle_interactive_query(query_data: Dict[str, Any]) -> Dict[str, Any]:
451
+ """Handle interactive queries from the TypeScript client"""
452
+ try:
453
+ if 'tool' not in query_data:
454
+ return {"error": "Missing 'tool' field in query"}
455
+
456
+ tool_name = query_data['tool']
457
+ args = query_data.get('args', {})
458
+
459
+ if tool_name == 'repo_map':
460
+ return asyncio.run(_repo_map_impl(**args))
461
+ elif tool_name == 'search_identifiers':
462
+ return asyncio.run(_search_identifiers_impl(**args))
463
+ elif tool_name == 'warm_up_cache':
464
+ return asyncio.run(_warm_up_cache_impl(**args))
465
+ else:
466
+ return {"error": f"Unknown tool: {tool_name}"}
467
+ except Exception as e:
468
+ log.exception(f"Error handling interactive query: {e}")
469
+ return {"error": str(e)}
470
+
471
+ def interactive_mode():
472
+ """Run in interactive mode for direct TypeScript communication"""
473
+ log.info("Starting RepoMapper in terminal interactive mode...")
474
+
475
+ try:
476
+ while True:
477
+ # Read line from stdin
478
+ line = input()
479
+ if not line.strip():
480
+ continue
481
+
482
+ try:
483
+ # Parse the JSON query
484
+ query_data = json.loads(line)
485
+ query_id = query_data.get('id', 'unknown')
486
+ query = query_data.get('query', {})
487
+
488
+ # Handle the query
489
+ result = handle_interactive_query(query)
490
+
491
+ # Send response back
492
+ response = {
493
+ "id": query_id,
494
+ "result": result
495
+ }
496
+ print(json.dumps(response), flush=True)
497
+
498
+ except json.JSONDecodeError as e:
499
+ error_response = {
500
+ "id": "unknown",
501
+ "error": f"Invalid JSON: {str(e)}"
502
+ }
503
+ print(json.dumps(error_response), flush=True)
504
+ except EOFError:
505
+ # Client disconnected
506
+ break
507
+ except Exception as e:
508
+ error_response = {
509
+ "id": query_data.get('id', 'unknown') if 'query_data' in locals() else 'unknown',
510
+ "error": str(e)
511
+ }
512
+ print(json.dumps(error_response), flush=True)
513
+
514
+ except KeyboardInterrupt:
515
+ log.debug("Received interrupt signal, shutting down...")
516
+ except Exception as e:
517
+ log.exception(f"Fatal error in interactive mode: {e}")
518
+
519
+ # --- Main Entry Point ---
520
+ def main():
521
+ import sys
522
+ import argparse
523
+
524
+ startup_log.info("Parsing command line arguments...")
525
+ parser = argparse.ArgumentParser(description='RepoMapper Server')
526
+ parser.add_argument('--interactive', action='store_true',
527
+ help='Force interactive mode for direct stdin/stdout JSON communication')
528
+ parser.add_argument('--mcp-server', action='store_true',
529
+ help='Force MCP server mode')
530
+
531
+ args = parser.parse_args()
532
+ startup_log.info(f"Arguments: {args}")
533
+
534
+ # Determine which mode to run in
535
+ if args.interactive:
536
+ # Explicitly requested interactive mode
537
+ startup_log.info("🔄 Running in interactive mode...")
538
+ interactive_mode()
539
+ elif args.mcp_server:
540
+ # Explicitly requested MCP server mode
541
+ startup_log.info("🚀 Running in MCP server mode...")
542
+ sys.stderr.flush() # Force immediate output to stderr
543
+
544
+ try:
545
+ startup_log.info("🔧 Initializing FastMCP server...")
546
+ mcp.run()
547
+ except Exception as e:
548
+ startup_log.error(f"❌ FastMCP server failed to start: {e}")
549
+ startup_log.error(f"Error type: {type(e).__name__}")
550
+ import traceback
551
+ startup_log.error(f"Traceback: {traceback.format_exc()}")
552
+ sys.exit(1)
553
+ else:
554
+ # Auto-detect based on stdin
555
+ startup_log.info("🔍 Auto-detecting mode based on stdin...")
556
+ if sys.stdin.isatty():
557
+ # Running from terminal - use interactive mode for direct communication
558
+ startup_log.info("📺 TTY detected, using interactive mode")
559
+ interactive_mode()
560
+ else:
561
+ # Running as subprocess - default to interactive mode for TypeScript tools
562
+ # (This fixes the issue where piped stdin was incorrectly starting MCP server)
563
+ startup_log.info("🔗 Non-TTY detected, using interactive mode for subprocess")
564
+ interactive_mode()
565
+
566
+ if __name__ == "__main__":
567
+ try:
568
+ startup_log.info("🎬 Starting main function...")
569
+ main()
570
+ except KeyboardInterrupt:
571
+ startup_log.info("🛑 Interrupted by user")
572
+ sys.exit(0)
573
+ except Exception as e:
574
+ startup_log.error(f"💥 Fatal error in main: {e}")
575
+ import traceback
576
+ startup_log.error(f"Traceback: {traceback.format_exc()}")
577
+ sys.exit(1)
@@ -0,0 +1,9 @@
1
+ These scm files are all adapted from the github repositories listed here:
2
+
3
+ https://github.com/Goldziher/tree-sitter-language-pack/blob/main/sources/language_definitions.json
4
+
5
+ See this URL for information on the licenses of each repo:
6
+
7
+ https://github.com/Goldziher/tree-sitter-language-pack/
8
+
9
+ (RepoMapper stole these from Aider)
@@ -0,0 +1,5 @@
1
+ (function_declarator
2
+ declarator: (identifier) @name.definition.function) @definition.function
3
+
4
+ (call_expression
5
+ function: (identifier) @name.reference.call) @reference.call
@@ -0,0 +1,9 @@
1
+ (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
2
+
3
+ (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class
4
+
5
+ (function_declarator declarator: (identifier) @name.definition.function) @definition.function
6
+
7
+ (type_definition declarator: (type_identifier) @name.definition.type) @definition.type
8
+
9
+ (enum_specifier name: (type_identifier) @name.definition.type) @definition.type