@embedder/embedder 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +36 -0
- package/bundle/embedder.js +600 -0
- package/bundle/gdb-debugger-python/gdb_bridge.py +392 -0
- package/bundle/gdb-debugger-python/requirements.txt +1 -0
- package/bundle/postinstall-for-users.js +497 -0
- package/bundle/prebuilt/darwin-arm64/node-pty.node +0 -0
- package/bundle/prebuilt/darwin-arm64/serialport.node +0 -0
- package/bundle/prebuilt/darwin-x64/node-pty.node +0 -0
- package/bundle/prebuilt/darwin-x64/serialport.node +0 -0
- package/bundle/prebuilt/js/LICENSE +21 -0
- package/bundle/prebuilt/js/README.md +16 -0
- package/bundle/prebuilt/js/dist/index.d.ts +180 -0
- package/bundle/prebuilt/js/dist/index.js +380 -0
- package/bundle/prebuilt/js/package.json +30 -0
- package/bundle/prebuilt/linux-x64/node-pty.node +0 -0
- package/bundle/prebuilt/linux-x64/serialport.node +0 -0
- package/bundle/prebuilt/win32-x64/node-pty.node +0 -0
- package/bundle/prebuilt/win32-x64/serialport.node +0 -0
- package/bundle/repomap-bridge.js +6 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/16/f1/46475231336389d911f729227da4.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/4b/ed/71b2bc3ff2b4ae3127312ffb93b6.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/9a/a5/4cd70a20713e3b8fb1e15ada7795.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/a2/bd/43da7881d5016e770db1c6facb21.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/a9/9a/8d9d8580960d3db4249ad5534c93.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/c9/b3/539c4fa477faa91028d0911cbd93.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/cache.db +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/d2/7f/23d90301a6beae01ee51643cbdec.val +0 -0
- package/bundle/repomap-python/.repomap.tags.cache.v1/d4/03/91f221322e309efe044a99fd3b12.val +0 -0
- package/bundle/repomap-python/__pycache__/importance.cpython-310.pyc +0 -0
- package/bundle/repomap-python/__pycache__/repomap_class.cpython-310.pyc +0 -0
- package/bundle/repomap-python/__pycache__/scm.cpython-310.pyc +0 -0
- package/bundle/repomap-python/__pycache__/utils.cpython-310.pyc +0 -0
- package/bundle/repomap-python/importance.py +58 -0
- package/bundle/repomap-python/queries/repomap_server.py +577 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/README.md +9 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/arduino-tags.scm +5 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/c-tags.scm +9 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/chatito-tags.scm +16 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/commonlisp-tags.scm +122 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/cpp-tags.scm +15 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/csharp-tags.scm +26 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/d-tags.scm +26 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/dart-tags.scm +92 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/elisp-tags.scm +5 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/elixir-tags.scm +54 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/elm-tags.scm +19 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/gleam-tags.scm +41 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/go-tags.scm +42 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/java-tags.scm +20 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/javascript-tags.scm +88 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/lua-tags.scm +34 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/ocaml-tags.scm +115 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/ocaml_interface-tags.scm +98 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/pony-tags.scm +39 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/properties-tags.scm +5 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/python-tags.scm +14 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/r-tags.scm +21 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/racket-tags.scm +12 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/ruby-tags.scm +64 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/rust-tags.scm +60 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/solidity-tags.scm +43 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/swift-tags.scm +51 -0
- package/bundle/repomap-python/queries/tree-sitter-language-pack/udev-tags.scm +20 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/README.md +24 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/c-tags.scm +9 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/c_sharp-tags.scm +46 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/cpp-tags.scm +15 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/dart-tags.scm +91 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/elisp-tags.scm +8 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/elixir-tags.scm +54 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/elm-tags.scm +19 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/go-tags.scm +30 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/hcl-tags.scm +77 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/java-tags.scm +20 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/javascript-tags.scm +88 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/kotlin-tags.scm +27 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/ocaml-tags.scm +115 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/ocaml_interface-tags.scm +98 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/php-tags.scm +26 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/python-tags.scm +12 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/ql-tags.scm +26 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/ruby-tags.scm +64 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/rust-tags.scm +60 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/scala-tags.scm +65 -0
- package/bundle/repomap-python/queries/tree-sitter-languages/typescript-tags.scm +41 -0
- package/bundle/repomap-python/repomap.py +229 -0
- package/bundle/repomap-python/repomap_bridge.py +234 -0
- package/bundle/repomap-python/repomap_class.py +637 -0
- package/bundle/repomap-python/repomap_server.py +585 -0
- package/bundle/repomap-python/requirements.txt +7 -0
- package/bundle/repomap-python/scm.py +59 -0
- package/bundle/repomap-python/utils.py +58 -0
- package/bundle/sandbox-macos-permissive-closed.sb +26 -0
- package/bundle/sandbox-macos-permissive-open.sb +19 -0
- package/bundle/sandbox-macos-permissive-proxied.sb +31 -0
- package/bundle/sandbox-macos-restrictive-closed.sb +87 -0
- package/bundle/sandbox-macos-restrictive-open.sb +90 -0
- package/bundle/sandbox-macos-restrictive-proxied.sb +92 -0
- package/package.json +97 -0
- package/postinstall.js +42 -0
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Important file filtering for RepoMap.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from typing import List
|
|
7
|
+
|
|
8
|
+
IMPORTANT_FILENAMES = {
|
|
9
|
+
"README.md", "README.txt", "readme.md", "README.rst", "README",
|
|
10
|
+
"requirements.txt", "Pipfile", "pyproject.toml", "setup.py", "setup.cfg",
|
|
11
|
+
"package.json", "yarn.lock", "package-lock.json", "npm-shrinkwrap.json",
|
|
12
|
+
"Dockerfile", "docker-compose.yml", "docker-compose.yaml",
|
|
13
|
+
".gitignore", ".gitattributes", ".dockerignore",
|
|
14
|
+
"Makefile", "makefile", "CMakeLists.txt",
|
|
15
|
+
"LICENSE", "LICENSE.txt", "LICENSE.md", "COPYING",
|
|
16
|
+
"CHANGELOG.md", "CHANGELOG.txt", "HISTORY.md",
|
|
17
|
+
"CONTRIBUTING.md", "CODE_OF_CONDUCT.md",
|
|
18
|
+
".env", ".env.example", ".env.local",
|
|
19
|
+
"tox.ini", "pytest.ini", ".pytest.ini",
|
|
20
|
+
".flake8", ".pylintrc", "mypy.ini",
|
|
21
|
+
"go.mod", "go.sum", "Cargo.toml", "Cargo.lock",
|
|
22
|
+
"pom.xml", "build.gradle", "build.gradle.kts",
|
|
23
|
+
"composer.json", "composer.lock",
|
|
24
|
+
"Gemfile", "Gemfile.lock",
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
IMPORTANT_DIR_PATTERNS = {
|
|
28
|
+
os.path.normpath(".github/workflows"): lambda fname: fname.endswith((".yml", ".yaml")),
|
|
29
|
+
os.path.normpath(".github"): lambda fname: fname.endswith((".md", ".yml", ".yaml")),
|
|
30
|
+
os.path.normpath("docs"): lambda fname: fname.endswith((".md", ".rst", ".txt")),
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def is_important(rel_file_path: str) -> bool:
|
|
35
|
+
"""Check if a file is considered important."""
|
|
36
|
+
normalized_path = os.path.normpath(rel_file_path)
|
|
37
|
+
file_name = os.path.basename(normalized_path)
|
|
38
|
+
dir_name = os.path.dirname(normalized_path)
|
|
39
|
+
|
|
40
|
+
# Check specific directory patterns
|
|
41
|
+
for important_dir, checker_func in IMPORTANT_DIR_PATTERNS.items():
|
|
42
|
+
if dir_name == important_dir and checker_func(file_name):
|
|
43
|
+
return True
|
|
44
|
+
|
|
45
|
+
# Check if the full normalized path is important
|
|
46
|
+
if normalized_path in IMPORTANT_FILENAMES:
|
|
47
|
+
return True
|
|
48
|
+
|
|
49
|
+
# Check if just the basename is important
|
|
50
|
+
if file_name in IMPORTANT_FILENAMES:
|
|
51
|
+
return True
|
|
52
|
+
|
|
53
|
+
return False
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def filter_important_files(file_paths: List[str]) -> List[str]:
|
|
57
|
+
"""Filter list to only include important files."""
|
|
58
|
+
return [path for path in file_paths if is_important(path)]
|
|
@@ -0,0 +1,577 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import os
|
|
4
|
+
import sys
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import List, Optional, Dict, Any, Set
|
|
8
|
+
import dataclasses
|
|
9
|
+
|
|
10
|
+
# Add startup logging before any imports that might fail
|
|
11
|
+
startup_log = logging.getLogger('startup')
|
|
12
|
+
|
|
13
|
+
# Check if startup logs should be suppressed (build-time configuration)
|
|
14
|
+
suppress_startup_logs = os.environ.get('SUPPRESS_STARTUP_LOGS', 'true').lower() == 'true'
|
|
15
|
+
|
|
16
|
+
if not suppress_startup_logs:
|
|
17
|
+
startup_handler = logging.StreamHandler(sys.stderr)
|
|
18
|
+
startup_handler.setLevel(logging.INFO)
|
|
19
|
+
startup_formatter = logging.Formatter('🐍 STARTUP: %(message)s')
|
|
20
|
+
startup_handler.setFormatter(startup_formatter)
|
|
21
|
+
startup_log.addHandler(startup_handler)
|
|
22
|
+
startup_log.setLevel(logging.INFO)
|
|
23
|
+
else:
|
|
24
|
+
# Create a no-op logger in suppressed mode - only log errors
|
|
25
|
+
startup_log.addHandler(logging.NullHandler())
|
|
26
|
+
startup_log.setLevel(logging.ERROR)
|
|
27
|
+
|
|
28
|
+
startup_log.info("Starting RepoMapper server initialization...")
|
|
29
|
+
startup_log.info(f"Python version: {sys.version}")
|
|
30
|
+
startup_log.info(f"Working directory: {os.getcwd()}")
|
|
31
|
+
startup_log.info(f"Script path: {__file__}")
|
|
32
|
+
startup_log.info(f"Python path: {sys.path[:3]}...") # Show first few entries
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
startup_log.info("Importing FastMCP...")
|
|
36
|
+
from fastmcp import FastMCP, settings
|
|
37
|
+
startup_log.info("✅ FastMCP imported successfully")
|
|
38
|
+
except ImportError as e:
|
|
39
|
+
startup_log.error(f"❌ Failed to import FastMCP: {e}")
|
|
40
|
+
startup_log.error("💡 Try: pip install fastmcp")
|
|
41
|
+
sys.exit(1)
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
startup_log.info("Importing RepoMapper dependencies...")
|
|
45
|
+
from repomap_class import RepoMap
|
|
46
|
+
from utils import count_tokens, read_text
|
|
47
|
+
from scm import get_scm_fname
|
|
48
|
+
from importance import filter_important_files
|
|
49
|
+
startup_log.info("✅ All RepoMapper dependencies imported successfully")
|
|
50
|
+
except ImportError as e:
|
|
51
|
+
startup_log.error(f"❌ Failed to import RepoMapper dependencies: {e}")
|
|
52
|
+
startup_log.error(f"💡 Missing dependency: {str(e).split('No module named')[-1] if 'No module named' in str(e) else 'unknown'}")
|
|
53
|
+
startup_log.error(f"Current working directory: {os.getcwd()}")
|
|
54
|
+
startup_log.error(f"Files in current directory: {list(os.listdir('.'))[:10]}")
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
# Helper function from your CLI, useful to have here
|
|
59
|
+
def find_src_files(directory: str) -> List[str]:
|
|
60
|
+
if not os.path.isdir(directory):
|
|
61
|
+
return [directory] if os.path.isfile(directory) else []
|
|
62
|
+
src_files = []
|
|
63
|
+
for r, d, f_list in os.walk(directory):
|
|
64
|
+
d[:] = [d_name for d_name in d if not d_name.startswith('.') and d_name not in {'node_modules', '__pycache__', 'venv', 'env'}]
|
|
65
|
+
for f in f_list:
|
|
66
|
+
if not f.startswith('.'):
|
|
67
|
+
src_files.append(os.path.join(r, f))
|
|
68
|
+
return src_files
|
|
69
|
+
|
|
70
|
+
# Configure logging - show info and above
|
|
71
|
+
root_logger = logging.getLogger()
|
|
72
|
+
root_logger.setLevel(logging.INFO)
|
|
73
|
+
|
|
74
|
+
# Create console handler for info and above (explicitly use stderr)
|
|
75
|
+
console_handler = logging.StreamHandler(sys.stderr)
|
|
76
|
+
console_handler.setLevel(logging.INFO)
|
|
77
|
+
console_formatter = logging.Formatter('%(levelname)-5s %(asctime)-15s %(name)s:%(funcName)s:%(lineno)d - %(message)s')
|
|
78
|
+
console_handler.setFormatter(console_formatter)
|
|
79
|
+
root_logger.addHandler(console_handler)
|
|
80
|
+
|
|
81
|
+
# Suppress FastMCP logs
|
|
82
|
+
fastmcp_logger = logging.getLogger('fastmcp')
|
|
83
|
+
fastmcp_logger.setLevel(logging.ERROR)
|
|
84
|
+
# Suppress server startup message
|
|
85
|
+
server_logger = logging.getLogger('fastmcp.server')
|
|
86
|
+
server_logger.setLevel(logging.ERROR)
|
|
87
|
+
# Suppress all mcp-related loggers
|
|
88
|
+
mcp_logger = logging.getLogger('mcp')
|
|
89
|
+
mcp_logger.setLevel(logging.ERROR)
|
|
90
|
+
# Suppress root FastMCP server messages
|
|
91
|
+
logging.getLogger('server').setLevel(logging.ERROR)
|
|
92
|
+
# Additional suppression for common MCP logger patterns
|
|
93
|
+
for logger_name in ['fastmcp.transport', 'fastmcp.stdio', 'mcp.server', 'mcp.transport']:
|
|
94
|
+
logging.getLogger(logger_name).setLevel(logging.ERROR)
|
|
95
|
+
|
|
96
|
+
log = logging.getLogger(__name__)
|
|
97
|
+
|
|
98
|
+
# Set global stateless_http setting
|
|
99
|
+
settings.stateless_http = True
|
|
100
|
+
|
|
101
|
+
# Create MCP server
|
|
102
|
+
mcp = FastMCP("RepoMapServer")
|
|
103
|
+
|
|
104
|
+
async def _repo_map_impl(
|
|
105
|
+
project_root: str,
|
|
106
|
+
chat_files: Optional[List[str]] = None,
|
|
107
|
+
other_files: Optional[List[str]] = None,
|
|
108
|
+
token_limit: Any = 8192, # Accept any type to handle empty strings
|
|
109
|
+
exclude_unranked: bool = False,
|
|
110
|
+
force_refresh: bool = False,
|
|
111
|
+
mentioned_files: Optional[List[str]] = None,
|
|
112
|
+
mentioned_idents: Optional[List[str]] = None,
|
|
113
|
+
verbose: bool = False,
|
|
114
|
+
max_context_window: Optional[int] = None,
|
|
115
|
+
) -> Dict[str, Any]:
|
|
116
|
+
"""Generate a repository map for the specified files, providing a list of function prototypes and variables for files as well as relevant related
|
|
117
|
+
files. Provide filenames relative to the project_root. In addition to the files provided, relevant related files will also be included with a
|
|
118
|
+
very small ranking boost.
|
|
119
|
+
|
|
120
|
+
:param project_root: Root directory of the project to search. (must be an absolute path!)
|
|
121
|
+
:param chat_files: A list of file paths that are currently in the chat context. These files will receive the highest ranking.
|
|
122
|
+
:param other_files: A list of other relevant file paths in the repository to consider for the map. They receive a lower ranking boost than mentioned_files and chat_files.
|
|
123
|
+
:param token_limit: The maximum number of tokens the generated repository map should occupy. Defaults to 8192.
|
|
124
|
+
:param exclude_unranked: If True, files with a PageRank of 0.0 will be excluded from the map. Defaults to False.
|
|
125
|
+
:param force_refresh: If True, forces a refresh of the repository map cache. Defaults to False.
|
|
126
|
+
:param mentioned_files: Optional list of file paths explicitly mentioned in the conversation and receive a mid-level ranking boost.
|
|
127
|
+
:param mentioned_idents: Optional list of identifiers explicitly mentioned in the conversation, to boost their ranking.
|
|
128
|
+
:param verbose: If True, enables verbose logging for the RepoMap generation process. Defaults to False.
|
|
129
|
+
:param max_context_window: Optional maximum context window size for token calculation, used to adjust map token limit when no chat files are provided.
|
|
130
|
+
:returns: A dictionary containing:
|
|
131
|
+
- 'map': the generated repository map string
|
|
132
|
+
- 'report': a dictionary with file processing details including:
|
|
133
|
+
- 'included': list of processed files
|
|
134
|
+
- 'excluded': dictionary of excluded files with reasons
|
|
135
|
+
- 'definition_matches': count of matched definitions
|
|
136
|
+
- 'reference_matches': count of matched references
|
|
137
|
+
- 'total_files_considered': total files processed
|
|
138
|
+
Or an 'error' key if an error occurred.
|
|
139
|
+
"""
|
|
140
|
+
if not os.path.isdir(project_root):
|
|
141
|
+
return {"error": f"Project root directory not found: {project_root}"}
|
|
142
|
+
|
|
143
|
+
# 1. Handle and validate parameters
|
|
144
|
+
# Convert token_limit to integer with fallback
|
|
145
|
+
try:
|
|
146
|
+
token_limit = int(token_limit) if token_limit else 8192
|
|
147
|
+
except (TypeError, ValueError):
|
|
148
|
+
token_limit = 8192
|
|
149
|
+
|
|
150
|
+
# Ensure token_limit is positive
|
|
151
|
+
if token_limit <= 0:
|
|
152
|
+
token_limit = 8192
|
|
153
|
+
|
|
154
|
+
chat_files_list = chat_files or []
|
|
155
|
+
mentioned_fnames_set = set(mentioned_files) if mentioned_files else None
|
|
156
|
+
mentioned_idents_set = set(mentioned_idents) if mentioned_idents else None
|
|
157
|
+
|
|
158
|
+
# 2. If a specific list of other_files isn't provided, scan the whole root directory.
|
|
159
|
+
# This should happen regardless of whether chat_files are present.
|
|
160
|
+
effective_other_files = []
|
|
161
|
+
if other_files:
|
|
162
|
+
effective_other_files = other_files
|
|
163
|
+
else:
|
|
164
|
+
log.info("No other_files provided, scanning root directory for context...")
|
|
165
|
+
effective_other_files = find_src_files(project_root)
|
|
166
|
+
|
|
167
|
+
# Add a print statement for debugging so you can see what the tool is working with.
|
|
168
|
+
log.debug(f"Chat files: {chat_files_list}")
|
|
169
|
+
log.debug(f"Effective other_files count: {len(effective_other_files)}")
|
|
170
|
+
|
|
171
|
+
# If after all that we have no files, we can exit early.
|
|
172
|
+
if not chat_files_list and not effective_other_files:
|
|
173
|
+
log.info("No files to process.")
|
|
174
|
+
return {"map": "No files found to generate a map."}
|
|
175
|
+
|
|
176
|
+
# 3. Resolve paths relative to project root
|
|
177
|
+
root_path = Path(project_root).resolve()
|
|
178
|
+
abs_chat_files = [str(root_path / f) for f in chat_files_list]
|
|
179
|
+
abs_other_files = [str(root_path / f) for f in effective_other_files]
|
|
180
|
+
|
|
181
|
+
# Remove any chat files from the other_files list to avoid duplication
|
|
182
|
+
abs_chat_files_set = set(abs_chat_files)
|
|
183
|
+
abs_other_files = [f for f in abs_other_files if f not in abs_chat_files_set]
|
|
184
|
+
|
|
185
|
+
# 4. Instantiate and run RepoMap
|
|
186
|
+
try:
|
|
187
|
+
repo_mapper = RepoMap(
|
|
188
|
+
map_tokens=token_limit,
|
|
189
|
+
root=str(root_path),
|
|
190
|
+
token_counter_func=lambda text: count_tokens(text, "gpt-4"),
|
|
191
|
+
file_reader_func=read_text,
|
|
192
|
+
output_handler_funcs={'info': log.info, 'warning': log.warning, 'error': log.error},
|
|
193
|
+
verbose=verbose,
|
|
194
|
+
exclude_unranked=exclude_unranked,
|
|
195
|
+
max_context_window=max_context_window
|
|
196
|
+
)
|
|
197
|
+
except Exception as e:
|
|
198
|
+
log.exception(f"Failed to initialize RepoMap for project '{project_root}': {e}")
|
|
199
|
+
return {"error": f"Failed to initialize RepoMap: {str(e)}"}
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
map_content, file_report = await asyncio.to_thread(
|
|
203
|
+
repo_mapper.get_repo_map,
|
|
204
|
+
chat_files=abs_chat_files,
|
|
205
|
+
other_files=abs_other_files,
|
|
206
|
+
mentioned_fnames=mentioned_fnames_set,
|
|
207
|
+
mentioned_idents=mentioned_idents_set,
|
|
208
|
+
force_refresh=force_refresh
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Convert FileReport to dictionary for JSON serialization
|
|
212
|
+
report_dict = {
|
|
213
|
+
"excluded": file_report.excluded,
|
|
214
|
+
"definition_matches": file_report.definition_matches,
|
|
215
|
+
"reference_matches": file_report.reference_matches,
|
|
216
|
+
"total_files_considered": file_report.total_files_considered
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
return {
|
|
220
|
+
"map": map_content or "No repository map could be generated.",
|
|
221
|
+
"report": report_dict
|
|
222
|
+
}
|
|
223
|
+
except Exception as e:
|
|
224
|
+
log.exception(f"Error generating repository map for project '{project_root}': {e}")
|
|
225
|
+
return {"error": f"Error generating repository map: {str(e)}"}
|
|
226
|
+
|
|
227
|
+
@mcp.tool()
|
|
228
|
+
async def repo_map(
|
|
229
|
+
project_root: str,
|
|
230
|
+
chat_files: Optional[List[str]] = None,
|
|
231
|
+
other_files: Optional[List[str]] = None,
|
|
232
|
+
token_limit: Any = 8192, # Accept any type to handle empty strings
|
|
233
|
+
exclude_unranked: bool = False,
|
|
234
|
+
force_refresh: bool = False,
|
|
235
|
+
mentioned_files: Optional[List[str]] = None,
|
|
236
|
+
mentioned_idents: Optional[List[str]] = None,
|
|
237
|
+
verbose: bool = False,
|
|
238
|
+
max_context_window: Optional[int] = None,
|
|
239
|
+
) -> Dict[str, Any]:
|
|
240
|
+
"""Generate a repository map for a given project root.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Dictionary containing map content and generation report
|
|
244
|
+
"""
|
|
245
|
+
return await _repo_map_impl(project_root, chat_files, other_files, token_limit, exclude_unranked, force_refresh, mentioned_files, mentioned_idents, verbose, max_context_window)
|
|
246
|
+
|
|
247
|
+
async def _search_identifiers_impl(
|
|
248
|
+
project_root: str,
|
|
249
|
+
query: str,
|
|
250
|
+
max_results: int = 50,
|
|
251
|
+
context_lines: int = 2,
|
|
252
|
+
include_definitions: bool = True,
|
|
253
|
+
include_references: bool = True
|
|
254
|
+
) -> Dict[str, Any]:
|
|
255
|
+
"""Implementation of search_identifiers without MCP decoration for direct calling.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
project_root: Root directory of the project to search. (must be an absolute path!)
|
|
259
|
+
query: Search query (identifier name)
|
|
260
|
+
max_results: Maximum number of results to return
|
|
261
|
+
context_lines: Number of lines of context to show
|
|
262
|
+
include_definitions: Whether to include definition occurrences
|
|
263
|
+
include_references: Whether to include reference occurrences
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Dictionary containing search results or error message
|
|
267
|
+
"""
|
|
268
|
+
if not os.path.isdir(project_root):
|
|
269
|
+
return {"error": f"Project root directory not found: {project_root}"}
|
|
270
|
+
|
|
271
|
+
try:
|
|
272
|
+
# Initialize RepoMap with search-specific settings
|
|
273
|
+
repo_map = RepoMap(
|
|
274
|
+
root=project_root,
|
|
275
|
+
token_counter_func=lambda text: count_tokens(text, "gpt-4"),
|
|
276
|
+
file_reader_func=read_text,
|
|
277
|
+
output_handler_funcs={'info': log.info, 'warning': log.warning, 'error': log.error},
|
|
278
|
+
verbose=False,
|
|
279
|
+
exclude_unranked=True
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# Find all source files in the project
|
|
283
|
+
all_files = find_src_files(project_root)
|
|
284
|
+
|
|
285
|
+
# Get all tags (definitions and references) for all files
|
|
286
|
+
all_tags = []
|
|
287
|
+
for file_path in all_files:
|
|
288
|
+
rel_path = str(Path(file_path).relative_to(project_root))
|
|
289
|
+
tags = repo_map.get_tags(file_path, rel_path)
|
|
290
|
+
all_tags.extend(tags)
|
|
291
|
+
|
|
292
|
+
# Filter tags based on search query and options
|
|
293
|
+
matching_tags = []
|
|
294
|
+
query_lower = query.lower()
|
|
295
|
+
|
|
296
|
+
for tag in all_tags:
|
|
297
|
+
if query_lower in tag.name.lower():
|
|
298
|
+
if (tag.kind == "def" and include_definitions) or \
|
|
299
|
+
(tag.kind == "ref" and include_references):
|
|
300
|
+
matching_tags.append(tag)
|
|
301
|
+
|
|
302
|
+
# Sort by relevance (definitions first, then references)
|
|
303
|
+
matching_tags.sort(key=lambda x: (x.kind != "def", x.name.lower().find(query_lower)))
|
|
304
|
+
|
|
305
|
+
# Limit results
|
|
306
|
+
matching_tags = matching_tags[:max_results]
|
|
307
|
+
|
|
308
|
+
# Format results with context
|
|
309
|
+
results = []
|
|
310
|
+
for tag in matching_tags:
|
|
311
|
+
file_path = str(Path(project_root) / tag.rel_fname)
|
|
312
|
+
|
|
313
|
+
# Calculate context range based on context_lines parameter
|
|
314
|
+
start_line = max(1, tag.line - context_lines)
|
|
315
|
+
end_line = tag.line + context_lines
|
|
316
|
+
context_range = list(range(start_line, end_line + 1))
|
|
317
|
+
|
|
318
|
+
context = repo_map.render_tree(
|
|
319
|
+
file_path,
|
|
320
|
+
tag.rel_fname,
|
|
321
|
+
context_range
|
|
322
|
+
)
|
|
323
|
+
|
|
324
|
+
if context:
|
|
325
|
+
results.append({
|
|
326
|
+
"file": tag.rel_fname,
|
|
327
|
+
"line": tag.line,
|
|
328
|
+
"name": tag.name,
|
|
329
|
+
"kind": tag.kind,
|
|
330
|
+
"context": context
|
|
331
|
+
})
|
|
332
|
+
|
|
333
|
+
return {"results": results}
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
log.exception(f"Error searching identifiers in project '{project_root}': {e}")
|
|
337
|
+
return {"error": f"Error searching identifiers: {str(e)}"}
|
|
338
|
+
|
|
339
|
+
@mcp.tool()
|
|
340
|
+
async def search_identifiers(
|
|
341
|
+
project_root: str,
|
|
342
|
+
query: str,
|
|
343
|
+
max_results: int = 50,
|
|
344
|
+
context_lines: int = 2,
|
|
345
|
+
include_definitions: bool = True,
|
|
346
|
+
include_references: bool = True
|
|
347
|
+
) -> Dict[str, Any]:
|
|
348
|
+
"""Search for identifiers in code files. Get back a list of matching identifiers with their file, line number, and context.
|
|
349
|
+
When searching, just use the identifier name without any special characters, prefixes or suffixes. The search is
|
|
350
|
+
case-insensitive.
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
project_root: Root directory of the project to search. (must be an absolute path!)
|
|
354
|
+
query: Search query (identifier name)
|
|
355
|
+
max_results: Maximum number of results to return
|
|
356
|
+
context_lines: Number of lines of context to show
|
|
357
|
+
include_definitions: Whether to include definition occurrences
|
|
358
|
+
include_references: Whether to include reference occurrences
|
|
359
|
+
|
|
360
|
+
Returns:
|
|
361
|
+
Dictionary containing search results or error message
|
|
362
|
+
"""
|
|
363
|
+
return await _search_identifiers_impl(project_root, query, max_results, context_lines, include_definitions, include_references)
|
|
364
|
+
|
|
365
|
+
async def _warm_up_cache_impl(project_root: str) -> Dict[str, Any]:
|
|
366
|
+
"""Implementation of warm_up_cache without MCP decoration for direct calling.
|
|
367
|
+
|
|
368
|
+
Performs background indexing of the project to populate the cache.
|
|
369
|
+
This should be called when the application starts to avoid delays
|
|
370
|
+
when the user first runs search queries.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
project_root: Root directory of the project to index
|
|
374
|
+
|
|
375
|
+
Returns:
|
|
376
|
+
Dictionary containing status or error message
|
|
377
|
+
"""
|
|
378
|
+
if not os.path.isdir(project_root):
|
|
379
|
+
return {"error": f"Project root directory not found: {project_root}"}
|
|
380
|
+
|
|
381
|
+
try:
|
|
382
|
+
log.info(f"Starting background cache warm-up for project: {project_root}")
|
|
383
|
+
|
|
384
|
+
# Initialize RepoMap with indexing-specific settings
|
|
385
|
+
repo_map = RepoMap(
|
|
386
|
+
root=project_root,
|
|
387
|
+
token_counter_func=lambda text: count_tokens(text, "gpt-4"),
|
|
388
|
+
file_reader_func=read_text,
|
|
389
|
+
output_handler_funcs={'info': log.info, 'warning': log.warning, 'error': log.error},
|
|
390
|
+
verbose=False,
|
|
391
|
+
exclude_unranked=True
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
# Find all source files in the project
|
|
395
|
+
all_files = find_src_files(project_root)
|
|
396
|
+
log.info(f"Found {len(all_files)} files to index")
|
|
397
|
+
|
|
398
|
+
# Process files in batches to avoid overwhelming the system
|
|
399
|
+
batch_size = 50
|
|
400
|
+
indexed_files = 0
|
|
401
|
+
|
|
402
|
+
for i in range(0, len(all_files), batch_size):
|
|
403
|
+
batch_files = all_files[i:i + batch_size]
|
|
404
|
+
|
|
405
|
+
for file_path in batch_files:
|
|
406
|
+
try:
|
|
407
|
+
rel_path = str(Path(file_path).relative_to(project_root))
|
|
408
|
+
# This call to get_tags will populate the cache for this file
|
|
409
|
+
tags = repo_map.get_tags(file_path, rel_path)
|
|
410
|
+
indexed_files += 1
|
|
411
|
+
|
|
412
|
+
# Log progress every 100 files
|
|
413
|
+
if indexed_files % 100 == 0:
|
|
414
|
+
log.info(f"Cache warm-up progress: {indexed_files}/{len(all_files)} files indexed")
|
|
415
|
+
|
|
416
|
+
except Exception as file_error:
|
|
417
|
+
# Log individual file errors but continue processing
|
|
418
|
+
log.warning(f"Failed to index file {file_path}: {file_error}")
|
|
419
|
+
|
|
420
|
+
# Small delay between batches to be nice to the system
|
|
421
|
+
await asyncio.sleep(0.01)
|
|
422
|
+
|
|
423
|
+
log.info(f"Cache warm-up completed. Indexed {indexed_files} files.")
|
|
424
|
+
|
|
425
|
+
return {
|
|
426
|
+
"status": "completed",
|
|
427
|
+
"files_indexed": indexed_files,
|
|
428
|
+
"total_files": len(all_files)
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
except Exception as e:
|
|
432
|
+
log.exception(f"Error during cache warm-up for project '{project_root}': {e}")
|
|
433
|
+
return {"error": f"Error during cache warm-up: {str(e)}"}
|
|
434
|
+
|
|
435
|
+
@mcp.tool()
|
|
436
|
+
async def warm_up_cache(project_root: str) -> Dict[str, Any]:
|
|
437
|
+
"""Warm up the cache by indexing all files in the project.
|
|
438
|
+
|
|
439
|
+
This is a background operation that should be called when the application
|
|
440
|
+
starts to ensure that subsequent search queries are fast.
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
project_root: Root directory of the project to index (must be an absolute path!)
|
|
444
|
+
|
|
445
|
+
Returns:
|
|
446
|
+
Dictionary containing status information or error message
|
|
447
|
+
"""
|
|
448
|
+
return await _warm_up_cache_impl(project_root)
|
|
449
|
+
|
|
450
|
+
def handle_interactive_query(query_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
451
|
+
"""Handle interactive queries from the TypeScript client"""
|
|
452
|
+
try:
|
|
453
|
+
if 'tool' not in query_data:
|
|
454
|
+
return {"error": "Missing 'tool' field in query"}
|
|
455
|
+
|
|
456
|
+
tool_name = query_data['tool']
|
|
457
|
+
args = query_data.get('args', {})
|
|
458
|
+
|
|
459
|
+
if tool_name == 'repo_map':
|
|
460
|
+
return asyncio.run(_repo_map_impl(**args))
|
|
461
|
+
elif tool_name == 'search_identifiers':
|
|
462
|
+
return asyncio.run(_search_identifiers_impl(**args))
|
|
463
|
+
elif tool_name == 'warm_up_cache':
|
|
464
|
+
return asyncio.run(_warm_up_cache_impl(**args))
|
|
465
|
+
else:
|
|
466
|
+
return {"error": f"Unknown tool: {tool_name}"}
|
|
467
|
+
except Exception as e:
|
|
468
|
+
log.exception(f"Error handling interactive query: {e}")
|
|
469
|
+
return {"error": str(e)}
|
|
470
|
+
|
|
471
|
+
def interactive_mode():
|
|
472
|
+
"""Run in interactive mode for direct TypeScript communication"""
|
|
473
|
+
log.info("Starting RepoMapper in terminal interactive mode...")
|
|
474
|
+
|
|
475
|
+
try:
|
|
476
|
+
while True:
|
|
477
|
+
# Read line from stdin
|
|
478
|
+
line = input()
|
|
479
|
+
if not line.strip():
|
|
480
|
+
continue
|
|
481
|
+
|
|
482
|
+
try:
|
|
483
|
+
# Parse the JSON query
|
|
484
|
+
query_data = json.loads(line)
|
|
485
|
+
query_id = query_data.get('id', 'unknown')
|
|
486
|
+
query = query_data.get('query', {})
|
|
487
|
+
|
|
488
|
+
# Handle the query
|
|
489
|
+
result = handle_interactive_query(query)
|
|
490
|
+
|
|
491
|
+
# Send response back
|
|
492
|
+
response = {
|
|
493
|
+
"id": query_id,
|
|
494
|
+
"result": result
|
|
495
|
+
}
|
|
496
|
+
print(json.dumps(response), flush=True)
|
|
497
|
+
|
|
498
|
+
except json.JSONDecodeError as e:
|
|
499
|
+
error_response = {
|
|
500
|
+
"id": "unknown",
|
|
501
|
+
"error": f"Invalid JSON: {str(e)}"
|
|
502
|
+
}
|
|
503
|
+
print(json.dumps(error_response), flush=True)
|
|
504
|
+
except EOFError:
|
|
505
|
+
# Client disconnected
|
|
506
|
+
break
|
|
507
|
+
except Exception as e:
|
|
508
|
+
error_response = {
|
|
509
|
+
"id": query_data.get('id', 'unknown') if 'query_data' in locals() else 'unknown',
|
|
510
|
+
"error": str(e)
|
|
511
|
+
}
|
|
512
|
+
print(json.dumps(error_response), flush=True)
|
|
513
|
+
|
|
514
|
+
except KeyboardInterrupt:
|
|
515
|
+
log.debug("Received interrupt signal, shutting down...")
|
|
516
|
+
except Exception as e:
|
|
517
|
+
log.exception(f"Fatal error in interactive mode: {e}")
|
|
518
|
+
|
|
519
|
+
# --- Main Entry Point ---
|
|
520
|
+
def main():
|
|
521
|
+
import sys
|
|
522
|
+
import argparse
|
|
523
|
+
|
|
524
|
+
startup_log.info("Parsing command line arguments...")
|
|
525
|
+
parser = argparse.ArgumentParser(description='RepoMapper Server')
|
|
526
|
+
parser.add_argument('--interactive', action='store_true',
|
|
527
|
+
help='Force interactive mode for direct stdin/stdout JSON communication')
|
|
528
|
+
parser.add_argument('--mcp-server', action='store_true',
|
|
529
|
+
help='Force MCP server mode')
|
|
530
|
+
|
|
531
|
+
args = parser.parse_args()
|
|
532
|
+
startup_log.info(f"Arguments: {args}")
|
|
533
|
+
|
|
534
|
+
# Determine which mode to run in
|
|
535
|
+
if args.interactive:
|
|
536
|
+
# Explicitly requested interactive mode
|
|
537
|
+
startup_log.info("🔄 Running in interactive mode...")
|
|
538
|
+
interactive_mode()
|
|
539
|
+
elif args.mcp_server:
|
|
540
|
+
# Explicitly requested MCP server mode
|
|
541
|
+
startup_log.info("🚀 Running in MCP server mode...")
|
|
542
|
+
sys.stderr.flush() # Force immediate output to stderr
|
|
543
|
+
|
|
544
|
+
try:
|
|
545
|
+
startup_log.info("🔧 Initializing FastMCP server...")
|
|
546
|
+
mcp.run()
|
|
547
|
+
except Exception as e:
|
|
548
|
+
startup_log.error(f"❌ FastMCP server failed to start: {e}")
|
|
549
|
+
startup_log.error(f"Error type: {type(e).__name__}")
|
|
550
|
+
import traceback
|
|
551
|
+
startup_log.error(f"Traceback: {traceback.format_exc()}")
|
|
552
|
+
sys.exit(1)
|
|
553
|
+
else:
|
|
554
|
+
# Auto-detect based on stdin
|
|
555
|
+
startup_log.info("🔍 Auto-detecting mode based on stdin...")
|
|
556
|
+
if sys.stdin.isatty():
|
|
557
|
+
# Running from terminal - use interactive mode for direct communication
|
|
558
|
+
startup_log.info("📺 TTY detected, using interactive mode")
|
|
559
|
+
interactive_mode()
|
|
560
|
+
else:
|
|
561
|
+
# Running as subprocess - default to interactive mode for TypeScript tools
|
|
562
|
+
# (This fixes the issue where piped stdin was incorrectly starting MCP server)
|
|
563
|
+
startup_log.info("🔗 Non-TTY detected, using interactive mode for subprocess")
|
|
564
|
+
interactive_mode()
|
|
565
|
+
|
|
566
|
+
if __name__ == "__main__":
|
|
567
|
+
try:
|
|
568
|
+
startup_log.info("🎬 Starting main function...")
|
|
569
|
+
main()
|
|
570
|
+
except KeyboardInterrupt:
|
|
571
|
+
startup_log.info("🛑 Interrupted by user")
|
|
572
|
+
sys.exit(0)
|
|
573
|
+
except Exception as e:
|
|
574
|
+
startup_log.error(f"💥 Fatal error in main: {e}")
|
|
575
|
+
import traceback
|
|
576
|
+
startup_log.error(f"Traceback: {traceback.format_exc()}")
|
|
577
|
+
sys.exit(1)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
These scm files are all adapted from the github repositories listed here:
|
|
2
|
+
|
|
3
|
+
https://github.com/Goldziher/tree-sitter-language-pack/blob/main/sources/language_definitions.json
|
|
4
|
+
|
|
5
|
+
See this URL for information on the licenses of each repo:
|
|
6
|
+
|
|
7
|
+
https://github.com/Goldziher/tree-sitter-language-pack/
|
|
8
|
+
|
|
9
|
+
(RepoMapper stole these from Aider)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
(struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class
|
|
2
|
+
|
|
3
|
+
(declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class
|
|
4
|
+
|
|
5
|
+
(function_declarator declarator: (identifier) @name.definition.function) @definition.function
|
|
6
|
+
|
|
7
|
+
(type_definition declarator: (type_identifier) @name.definition.type) @definition.type
|
|
8
|
+
|
|
9
|
+
(enum_specifier name: (type_identifier) @name.definition.type) @definition.type
|