jarvis-ai-assistant 0.1.131__py3-none-any.whl → 0.1.132__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +48 -29
- jarvis/jarvis_agent/patch.py +61 -43
- jarvis/jarvis_agent/shell_input_handler.py +1 -1
- jarvis/jarvis_code_agent/code_agent.py +87 -86
- jarvis/jarvis_dev/main.py +335 -626
- jarvis/jarvis_git_squash/main.py +10 -31
- jarvis/jarvis_multi_agent/__init__.py +19 -28
- jarvis/jarvis_platform/ai8.py +7 -32
- jarvis/jarvis_platform/base.py +2 -7
- jarvis/jarvis_platform/kimi.py +3 -144
- jarvis/jarvis_platform/ollama.py +54 -68
- jarvis/jarvis_platform/openai.py +0 -4
- jarvis/jarvis_platform/oyi.py +0 -75
- jarvis/jarvis_platform/yuanbao.py +264 -0
- jarvis/jarvis_rag/file_processors.py +138 -0
- jarvis/jarvis_rag/main.py +1305 -425
- jarvis/jarvis_tools/ask_codebase.py +205 -39
- jarvis/jarvis_tools/code_review.py +125 -99
- jarvis/jarvis_tools/execute_python_script.py +58 -0
- jarvis/jarvis_tools/execute_shell.py +13 -26
- jarvis/jarvis_tools/execute_shell_script.py +1 -1
- jarvis/jarvis_tools/file_analyzer.py +271 -0
- jarvis/jarvis_tools/file_operation.py +1 -1
- jarvis/jarvis_tools/find_caller.py +213 -0
- jarvis/jarvis_tools/find_symbol.py +211 -0
- jarvis/jarvis_tools/function_analyzer.py +248 -0
- jarvis/jarvis_tools/git_commiter.py +4 -4
- jarvis/jarvis_tools/methodology.py +89 -48
- jarvis/jarvis_tools/project_analyzer.py +220 -0
- jarvis/jarvis_tools/read_code.py +23 -2
- jarvis/jarvis_tools/read_webpage.py +195 -81
- jarvis/jarvis_tools/registry.py +132 -11
- jarvis/jarvis_tools/search_web.py +55 -10
- jarvis/jarvis_tools/tool_generator.py +6 -8
- jarvis/jarvis_utils/__init__.py +1 -0
- jarvis/jarvis_utils/config.py +67 -3
- jarvis/jarvis_utils/embedding.py +344 -45
- jarvis/jarvis_utils/git_utils.py +9 -1
- jarvis/jarvis_utils/input.py +7 -6
- jarvis/jarvis_utils/methodology.py +379 -7
- jarvis/jarvis_utils/output.py +5 -3
- jarvis/jarvis_utils/utils.py +59 -7
- {jarvis_ai_assistant-0.1.131.dist-info → jarvis_ai_assistant-0.1.132.dist-info}/METADATA +3 -2
- jarvis_ai_assistant-0.1.132.dist-info/RECORD +82 -0
- {jarvis_ai_assistant-0.1.131.dist-info → jarvis_ai_assistant-0.1.132.dist-info}/entry_points.txt +2 -0
- jarvis/jarvis_codebase/__init__.py +0 -0
- jarvis/jarvis_codebase/main.py +0 -1011
- jarvis/jarvis_tools/treesitter_analyzer.py +0 -331
- jarvis/jarvis_treesitter/README.md +0 -104
- jarvis/jarvis_treesitter/__init__.py +0 -20
- jarvis/jarvis_treesitter/database.py +0 -258
- jarvis/jarvis_treesitter/example.py +0 -115
- jarvis/jarvis_treesitter/grammar_builder.py +0 -182
- jarvis/jarvis_treesitter/language.py +0 -117
- jarvis/jarvis_treesitter/symbol.py +0 -31
- jarvis/jarvis_treesitter/tools_usage.md +0 -121
- jarvis_ai_assistant-0.1.131.dist-info/RECORD +0 -85
- {jarvis_ai_assistant-0.1.131.dist-info → jarvis_ai_assistant-0.1.132.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.131.dist-info → jarvis_ai_assistant-0.1.132.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.131.dist-info → jarvis_ai_assistant-0.1.132.dist-info}/top_level.txt +0 -0
|
@@ -1,258 +0,0 @@
|
|
|
1
|
-
"""Tree-sitter based code database implementation."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import logging
|
|
5
|
-
from typing import Dict, List, Optional, Set, Tuple
|
|
6
|
-
from tree_sitter import Language, Node, Parser, Tree
|
|
7
|
-
from .symbol import Symbol, SymbolType, SymbolLocation
|
|
8
|
-
from .language import LanguageType, detect_language, get_language_config
|
|
9
|
-
from .grammar_builder import GrammarBuilder, setup_default_grammars, DEFAULT_GRAMMAR_DIR
|
|
10
|
-
|
|
11
|
-
# Setup logging
|
|
12
|
-
logger = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
|
-
class CodeDatabase:
|
|
15
|
-
"""A database for storing and querying code symbols using tree-sitter."""
|
|
16
|
-
|
|
17
|
-
def __init__(self, grammar_dir: Optional[str] = None, auto_download: bool = True):
|
|
18
|
-
"""Initialize the code database.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
grammar_dir: Directory containing tree-sitter grammar files.
|
|
22
|
-
If None, uses the default directory (~/.jarvis/treesitter).
|
|
23
|
-
auto_download: Whether to automatically download missing grammar files.
|
|
24
|
-
"""
|
|
25
|
-
self.parser = Parser()
|
|
26
|
-
self.languages: Dict[LanguageType, Language] = {}
|
|
27
|
-
self.file_languages: Dict[str, LanguageType] = {}
|
|
28
|
-
|
|
29
|
-
# Use default grammar directory if not provided
|
|
30
|
-
if grammar_dir is None:
|
|
31
|
-
grammar_dir = DEFAULT_GRAMMAR_DIR
|
|
32
|
-
if auto_download:
|
|
33
|
-
grammar_dir = setup_default_grammars()
|
|
34
|
-
|
|
35
|
-
# Create grammar builder
|
|
36
|
-
self.grammar_builder = GrammarBuilder(grammar_dir)
|
|
37
|
-
|
|
38
|
-
# Load all supported language grammars
|
|
39
|
-
for lang_type in LanguageType:
|
|
40
|
-
try:
|
|
41
|
-
if auto_download:
|
|
42
|
-
# Ensure grammar exists (download if needed)
|
|
43
|
-
grammar_path = self.grammar_builder.ensure_grammar(lang_type)
|
|
44
|
-
else:
|
|
45
|
-
# Just check if grammar file exists
|
|
46
|
-
config = get_language_config(lang_type)
|
|
47
|
-
grammar_path = os.path.join(grammar_dir, config.grammar_file)
|
|
48
|
-
if not os.path.exists(grammar_path):
|
|
49
|
-
logger.warning(f"Grammar file for {lang_type.value} not found: {grammar_path}")
|
|
50
|
-
continue
|
|
51
|
-
|
|
52
|
-
# Load the language
|
|
53
|
-
config = get_language_config(lang_type)
|
|
54
|
-
self.languages[lang_type] = Language(grammar_path, config.name)
|
|
55
|
-
logger.info(f"Loaded language grammar for {lang_type.value}")
|
|
56
|
-
except Exception as e:
|
|
57
|
-
logger.error(f"Failed to load grammar for {lang_type.value}: {str(e)}")
|
|
58
|
-
|
|
59
|
-
# Symbol storage
|
|
60
|
-
self.symbols: Dict[str, List[Symbol]] = {}
|
|
61
|
-
self.file_trees: Dict[str, Tuple[Tree, LanguageType]] = {}
|
|
62
|
-
|
|
63
|
-
def index_file(self, file_path: str) -> None:
|
|
64
|
-
"""Index a source code file.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
file_path: Path to the source code file
|
|
68
|
-
"""
|
|
69
|
-
# Detect language
|
|
70
|
-
lang_type = detect_language(file_path)
|
|
71
|
-
if not lang_type:
|
|
72
|
-
raise ValueError(f"Could not detect language for file: {file_path}")
|
|
73
|
-
|
|
74
|
-
# Check if language is supported
|
|
75
|
-
if lang_type not in self.languages:
|
|
76
|
-
if not hasattr(self, 'grammar_builder'):
|
|
77
|
-
raise ValueError(f"Unsupported language for file: {file_path}")
|
|
78
|
-
|
|
79
|
-
# Try to build the grammar on-demand
|
|
80
|
-
try:
|
|
81
|
-
grammar_path = self.grammar_builder.ensure_grammar(lang_type)
|
|
82
|
-
config = get_language_config(lang_type)
|
|
83
|
-
self.languages[lang_type] = Language(grammar_path, config.name)
|
|
84
|
-
logger.info(f"Built and loaded language grammar for {lang_type.value}")
|
|
85
|
-
except Exception as e:
|
|
86
|
-
raise ValueError(f"Failed to build grammar for {lang_type.value}: {str(e)}")
|
|
87
|
-
|
|
88
|
-
# Set language for parsing
|
|
89
|
-
self.parser.set_language(self.languages[lang_type])
|
|
90
|
-
self.file_languages[file_path] = lang_type
|
|
91
|
-
|
|
92
|
-
# Parse file
|
|
93
|
-
with open(file_path, 'rb') as f:
|
|
94
|
-
source_code = f.read()
|
|
95
|
-
|
|
96
|
-
tree = self.parser.parse(source_code)
|
|
97
|
-
self.file_trees[file_path] = (tree, lang_type)
|
|
98
|
-
|
|
99
|
-
# Extract symbols from the tree
|
|
100
|
-
self._extract_symbols(tree, file_path, lang_type)
|
|
101
|
-
|
|
102
|
-
def _extract_symbols(self, tree: Tree, file_path: str, lang_type: LanguageType) -> None:
|
|
103
|
-
"""Extract symbols from a tree-sitter tree.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
tree: The tree-sitter tree
|
|
107
|
-
file_path: Path to the source file
|
|
108
|
-
lang_type: The language type
|
|
109
|
-
"""
|
|
110
|
-
config = get_language_config(lang_type)
|
|
111
|
-
|
|
112
|
-
def visit_node(node: Node):
|
|
113
|
-
if not node:
|
|
114
|
-
return
|
|
115
|
-
|
|
116
|
-
# Extract symbols based on language-specific patterns
|
|
117
|
-
for symbol_type, patterns in config.symbol_patterns.items():
|
|
118
|
-
if node.type in patterns:
|
|
119
|
-
name_node = None
|
|
120
|
-
|
|
121
|
-
# Get the name node based on language-specific rules
|
|
122
|
-
if lang_type == LanguageType.PYTHON:
|
|
123
|
-
name_node = node.child_by_field_name('name')
|
|
124
|
-
elif lang_type in (LanguageType.C, LanguageType.CPP):
|
|
125
|
-
if node.type == 'function_definition':
|
|
126
|
-
name_node = node.child_by_field_name('declarator')
|
|
127
|
-
elif node.type in ('struct_specifier', 'class_specifier'):
|
|
128
|
-
name_node = node.child_by_field_name('name')
|
|
129
|
-
elif lang_type == LanguageType.GO:
|
|
130
|
-
if node.type in ('function_declaration', 'method_declaration'):
|
|
131
|
-
name_node = node.child_by_field_name('name')
|
|
132
|
-
elif lang_type == LanguageType.RUST:
|
|
133
|
-
if node.type in ('function_item', 'struct_item', 'enum_item', 'trait_item'):
|
|
134
|
-
name_node = node.child_by_field_name('name')
|
|
135
|
-
|
|
136
|
-
if name_node and name_node.type == 'identifier':
|
|
137
|
-
symbol = Symbol(
|
|
138
|
-
name=name_node.text.decode(),
|
|
139
|
-
type=SymbolType(symbol_type),
|
|
140
|
-
location=SymbolLocation(
|
|
141
|
-
file_path=file_path,
|
|
142
|
-
start_line=node.start_point[0] + 1,
|
|
143
|
-
start_column=node.start_point[1] + 1,
|
|
144
|
-
end_line=node.end_point[0] + 1,
|
|
145
|
-
end_column=node.end_point[1] + 1
|
|
146
|
-
)
|
|
147
|
-
)
|
|
148
|
-
self._add_symbol(symbol)
|
|
149
|
-
|
|
150
|
-
# Recursively visit children
|
|
151
|
-
for child in node.children:
|
|
152
|
-
visit_node(child)
|
|
153
|
-
|
|
154
|
-
visit_node(tree.root_node)
|
|
155
|
-
|
|
156
|
-
def _add_symbol(self, symbol: Symbol) -> None:
|
|
157
|
-
"""Add a symbol to the database.
|
|
158
|
-
|
|
159
|
-
Args:
|
|
160
|
-
symbol: The symbol to add
|
|
161
|
-
"""
|
|
162
|
-
if symbol.name not in self.symbols:
|
|
163
|
-
self.symbols[symbol.name] = []
|
|
164
|
-
self.symbols[symbol.name].append(symbol)
|
|
165
|
-
|
|
166
|
-
def find_symbol(self, name: str) -> List[Symbol]:
|
|
167
|
-
"""Find all occurrences of a symbol by name.
|
|
168
|
-
|
|
169
|
-
Args:
|
|
170
|
-
name: The symbol name to search for
|
|
171
|
-
|
|
172
|
-
Returns:
|
|
173
|
-
List of matching symbols
|
|
174
|
-
"""
|
|
175
|
-
return self.symbols.get(name, [])
|
|
176
|
-
|
|
177
|
-
def find_references(self, symbol: Symbol) -> List[Symbol]:
|
|
178
|
-
"""Find all references to a symbol.
|
|
179
|
-
|
|
180
|
-
Args:
|
|
181
|
-
symbol: The symbol to find references for
|
|
182
|
-
|
|
183
|
-
Returns:
|
|
184
|
-
List of reference symbols
|
|
185
|
-
"""
|
|
186
|
-
references = []
|
|
187
|
-
for file_path, (tree, lang_type) in self.file_trees.items():
|
|
188
|
-
def find_refs(node: Node):
|
|
189
|
-
if not node:
|
|
190
|
-
return
|
|
191
|
-
|
|
192
|
-
if node.type == 'identifier' and node.text.decode() == symbol.name:
|
|
193
|
-
ref_symbol = Symbol(
|
|
194
|
-
name=node.text.decode(),
|
|
195
|
-
type=SymbolType.REFERENCE,
|
|
196
|
-
location=SymbolLocation(
|
|
197
|
-
file_path=file_path,
|
|
198
|
-
start_line=node.start_point[0] + 1,
|
|
199
|
-
start_column=node.start_point[1] + 1,
|
|
200
|
-
end_line=node.end_point[0] + 1,
|
|
201
|
-
end_column=node.end_point[1] + 1
|
|
202
|
-
)
|
|
203
|
-
)
|
|
204
|
-
references.append(ref_symbol)
|
|
205
|
-
|
|
206
|
-
for child in node.children:
|
|
207
|
-
find_refs(child)
|
|
208
|
-
|
|
209
|
-
find_refs(tree.root_node)
|
|
210
|
-
|
|
211
|
-
return references
|
|
212
|
-
|
|
213
|
-
def find_callers(self, function_symbol: Symbol) -> List[Symbol]:
|
|
214
|
-
"""Find all callers of a function.
|
|
215
|
-
|
|
216
|
-
Args:
|
|
217
|
-
function_symbol: The function symbol to find callers for
|
|
218
|
-
|
|
219
|
-
Returns:
|
|
220
|
-
List of caller symbols
|
|
221
|
-
"""
|
|
222
|
-
callers = []
|
|
223
|
-
for file_path, (tree, lang_type) in self.file_trees.items():
|
|
224
|
-
def find_calls(node: Node):
|
|
225
|
-
if not node:
|
|
226
|
-
return
|
|
227
|
-
|
|
228
|
-
# Language-specific call patterns
|
|
229
|
-
call_patterns = {
|
|
230
|
-
LanguageType.PYTHON: ('call', 'function'),
|
|
231
|
-
LanguageType.C: ('call_expression', 'function'),
|
|
232
|
-
LanguageType.CPP: ('call_expression', 'function'),
|
|
233
|
-
LanguageType.GO: ('call_expression', 'function'),
|
|
234
|
-
LanguageType.RUST: ('call_expression', 'function'),
|
|
235
|
-
}
|
|
236
|
-
|
|
237
|
-
if node.type == call_patterns[lang_type][0]:
|
|
238
|
-
func_node = node.child_by_field_name(call_patterns[lang_type][1])
|
|
239
|
-
if func_node and func_node.type == 'identifier' and func_node.text.decode() == function_symbol.name:
|
|
240
|
-
caller_symbol = Symbol(
|
|
241
|
-
name=func_node.text.decode(),
|
|
242
|
-
type=SymbolType.FUNCTION_CALL,
|
|
243
|
-
location=SymbolLocation(
|
|
244
|
-
file_path=file_path,
|
|
245
|
-
start_line=node.start_point[0] + 1,
|
|
246
|
-
start_column=node.start_point[1] + 1,
|
|
247
|
-
end_line=node.end_point[0] + 1,
|
|
248
|
-
end_column=node.end_point[1] + 1
|
|
249
|
-
)
|
|
250
|
-
)
|
|
251
|
-
callers.append(caller_symbol)
|
|
252
|
-
|
|
253
|
-
for child in node.children:
|
|
254
|
-
find_calls(child)
|
|
255
|
-
|
|
256
|
-
find_calls(tree.root_node)
|
|
257
|
-
|
|
258
|
-
return callers
|
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Example script demonstrating the use of the tree-sitter code database."""
|
|
3
|
-
|
|
4
|
-
import os
|
|
5
|
-
import sys
|
|
6
|
-
import logging
|
|
7
|
-
import argparse
|
|
8
|
-
from pathlib import Path
|
|
9
|
-
from typing import List, Optional, Set
|
|
10
|
-
|
|
11
|
-
from jarvis.jarvis_treesitter import (
|
|
12
|
-
CodeDatabase,
|
|
13
|
-
SymbolType,
|
|
14
|
-
setup_default_grammars,
|
|
15
|
-
DEFAULT_GRAMMAR_DIR
|
|
16
|
-
)
|
|
17
|
-
|
|
18
|
-
# Setup logging
|
|
19
|
-
logging.basicConfig(
|
|
20
|
-
level=logging.INFO,
|
|
21
|
-
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
22
|
-
)
|
|
23
|
-
logger = logging.getLogger(__name__)
|
|
24
|
-
|
|
25
|
-
def index_directory(db: CodeDatabase, directory: str, extensions: Optional[Set[str]] = None) -> int:
|
|
26
|
-
"""Index all supported files in a directory.
|
|
27
|
-
|
|
28
|
-
Args:
|
|
29
|
-
db: The code database
|
|
30
|
-
directory: Directory to index
|
|
31
|
-
extensions: Optional set of file extensions to index (e.g., {'.py', '.c'})
|
|
32
|
-
|
|
33
|
-
Returns:
|
|
34
|
-
Number of files indexed
|
|
35
|
-
"""
|
|
36
|
-
count = 0
|
|
37
|
-
for root, _, files in os.walk(directory):
|
|
38
|
-
for file in files:
|
|
39
|
-
if extensions and not any(file.endswith(ext) for ext in extensions):
|
|
40
|
-
continue
|
|
41
|
-
|
|
42
|
-
file_path = os.path.join(root, file)
|
|
43
|
-
try:
|
|
44
|
-
db.index_file(file_path)
|
|
45
|
-
count += 1
|
|
46
|
-
logger.info(f"Indexed file: {file_path}")
|
|
47
|
-
except Exception as e:
|
|
48
|
-
logger.warning(f"Failed to index file {file_path}: {str(e)}")
|
|
49
|
-
|
|
50
|
-
return count
|
|
51
|
-
|
|
52
|
-
def find_symbol(db: CodeDatabase, symbol_name: str) -> None:
|
|
53
|
-
"""Find and print all occurrences of a symbol.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
db: The code database
|
|
57
|
-
symbol_name: Symbol name to search for
|
|
58
|
-
"""
|
|
59
|
-
symbols = db.find_symbol(symbol_name)
|
|
60
|
-
|
|
61
|
-
if not symbols:
|
|
62
|
-
print(f"No symbols found with name: {symbol_name}")
|
|
63
|
-
return
|
|
64
|
-
|
|
65
|
-
print(f"Found {len(symbols)} symbols with name: {symbol_name}")
|
|
66
|
-
for i, symbol in enumerate(symbols):
|
|
67
|
-
print(f"\n[{i+1}] {symbol.type.value}: {symbol.name}")
|
|
68
|
-
print(f" Location: {symbol.location.file_path}:{symbol.location.start_line}:{symbol.location.start_column}")
|
|
69
|
-
|
|
70
|
-
# Find references for this symbol
|
|
71
|
-
refs = db.find_references(symbol)
|
|
72
|
-
print(f" References: {len(refs)}")
|
|
73
|
-
for j, ref in enumerate(refs[:5]): # Show first 5 references
|
|
74
|
-
print(f" [{j+1}] {ref.location.file_path}:{ref.location.start_line}:{ref.location.start_column}")
|
|
75
|
-
|
|
76
|
-
if len(refs) > 5:
|
|
77
|
-
print(f" ... and {len(refs) - 5} more")
|
|
78
|
-
|
|
79
|
-
# Find callers if it's a function
|
|
80
|
-
if symbol.type == SymbolType.FUNCTION:
|
|
81
|
-
callers = db.find_callers(symbol)
|
|
82
|
-
print(f" Callers: {len(callers)}")
|
|
83
|
-
for j, caller in enumerate(callers[:5]): # Show first 5 callers
|
|
84
|
-
print(f" [{j+1}] {caller.location.file_path}:{caller.location.start_line}:{caller.location.start_column}")
|
|
85
|
-
|
|
86
|
-
if len(callers) > 5:
|
|
87
|
-
print(f" ... and {len(callers) - 5} more")
|
|
88
|
-
|
|
89
|
-
def main() -> None:
|
|
90
|
-
"""Main entry point."""
|
|
91
|
-
parser = argparse.ArgumentParser(description="Tree-sitter code database example")
|
|
92
|
-
parser.add_argument("--dir", "-d", type=str, default=".", help="Directory to index")
|
|
93
|
-
parser.add_argument("--ext", "-e", type=str, nargs="*", help="File extensions to index (e.g., .py .c)")
|
|
94
|
-
parser.add_argument("--symbol", "-s", type=str, help="Symbol name to search for")
|
|
95
|
-
parser.add_argument("--grammar-dir", "-g", type=str, default=DEFAULT_GRAMMAR_DIR,
|
|
96
|
-
help=f"Directory containing grammar files (default: {DEFAULT_GRAMMAR_DIR})")
|
|
97
|
-
parser.add_argument("--no-download", action="store_true", help="Don't download missing grammars")
|
|
98
|
-
args = parser.parse_args()
|
|
99
|
-
|
|
100
|
-
# Create code database
|
|
101
|
-
db = CodeDatabase(grammar_dir=args.grammar_dir, auto_download=not args.no_download)
|
|
102
|
-
|
|
103
|
-
# Process extensions
|
|
104
|
-
extensions = set(args.ext) if args.ext else None
|
|
105
|
-
|
|
106
|
-
# Index directory
|
|
107
|
-
count = index_directory(db, args.dir, extensions)
|
|
108
|
-
print(f"Indexed {count} files in {args.dir}")
|
|
109
|
-
|
|
110
|
-
# Search for symbol if specified
|
|
111
|
-
if args.symbol:
|
|
112
|
-
find_symbol(db, args.symbol)
|
|
113
|
-
|
|
114
|
-
if __name__ == "__main__":
|
|
115
|
-
main()
|
|
@@ -1,182 +0,0 @@
|
|
|
1
|
-
"""Functionality for downloading and building tree-sitter grammars."""
|
|
2
|
-
|
|
3
|
-
import os
|
|
4
|
-
import subprocess
|
|
5
|
-
import tempfile
|
|
6
|
-
import shutil
|
|
7
|
-
from typing import Dict, List, Optional
|
|
8
|
-
import logging
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
|
|
11
|
-
from .language import LanguageType, get_language_config
|
|
12
|
-
|
|
13
|
-
# Setup logging
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
# Default grammar directory
|
|
17
|
-
DEFAULT_GRAMMAR_DIR = os.path.expanduser("~/.jarvis/treesitter")
|
|
18
|
-
|
|
19
|
-
# Tree-sitter grammar repositories
|
|
20
|
-
GRAMMAR_REPOS = {
|
|
21
|
-
LanguageType.PYTHON: "https://github.com/tree-sitter/tree-sitter-python",
|
|
22
|
-
LanguageType.C: "https://github.com/tree-sitter/tree-sitter-c",
|
|
23
|
-
LanguageType.CPP: "https://github.com/tree-sitter/tree-sitter-cpp",
|
|
24
|
-
LanguageType.GO: "https://github.com/tree-sitter/tree-sitter-go",
|
|
25
|
-
LanguageType.RUST: "https://github.com/tree-sitter/tree-sitter-rust",
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
class GrammarBuilder:
|
|
29
|
-
"""Handles downloading and building tree-sitter grammar files."""
|
|
30
|
-
|
|
31
|
-
def __init__(self, grammar_dir: str = DEFAULT_GRAMMAR_DIR):
|
|
32
|
-
"""Initialize the grammar builder.
|
|
33
|
-
|
|
34
|
-
Args:
|
|
35
|
-
grammar_dir: Directory to store built grammar files.
|
|
36
|
-
Defaults to ~/.jarvis/treesitter
|
|
37
|
-
"""
|
|
38
|
-
self.grammar_dir = grammar_dir
|
|
39
|
-
os.makedirs(grammar_dir, exist_ok=True)
|
|
40
|
-
|
|
41
|
-
def ensure_grammar(self, lang_type: LanguageType) -> str:
|
|
42
|
-
"""Ensure the grammar file for a language exists, downloading and building if necessary.
|
|
43
|
-
|
|
44
|
-
Args:
|
|
45
|
-
lang_type: The language type
|
|
46
|
-
|
|
47
|
-
Returns:
|
|
48
|
-
Path to the grammar file
|
|
49
|
-
"""
|
|
50
|
-
config = get_language_config(lang_type)
|
|
51
|
-
grammar_path = os.path.join(self.grammar_dir, config.grammar_file)
|
|
52
|
-
|
|
53
|
-
# Check if grammar file already exists
|
|
54
|
-
if os.path.exists(grammar_path):
|
|
55
|
-
logger.info(f"Grammar file for {lang_type.value} already exists at {grammar_path}")
|
|
56
|
-
return grammar_path
|
|
57
|
-
|
|
58
|
-
# Download and build the grammar
|
|
59
|
-
logger.info(f"Building grammar for {lang_type.value}")
|
|
60
|
-
return self._build_grammar(lang_type)
|
|
61
|
-
|
|
62
|
-
def ensure_all_grammars(self) -> Dict[LanguageType, str]:
|
|
63
|
-
"""Ensure grammar files for all supported languages exist.
|
|
64
|
-
|
|
65
|
-
Returns:
|
|
66
|
-
Dictionary mapping language types to grammar file paths
|
|
67
|
-
"""
|
|
68
|
-
result = {}
|
|
69
|
-
for lang_type in LanguageType:
|
|
70
|
-
try:
|
|
71
|
-
path = self.ensure_grammar(lang_type)
|
|
72
|
-
result[lang_type] = path
|
|
73
|
-
except Exception as e:
|
|
74
|
-
logger.error(f"Failed to build grammar for {lang_type.value}: {str(e)}")
|
|
75
|
-
|
|
76
|
-
return result
|
|
77
|
-
|
|
78
|
-
def _build_grammar(self, lang_type: LanguageType) -> str:
|
|
79
|
-
"""Download and build the grammar for a language.
|
|
80
|
-
|
|
81
|
-
Args:
|
|
82
|
-
lang_type: The language type
|
|
83
|
-
|
|
84
|
-
Returns:
|
|
85
|
-
Path to the built grammar file
|
|
86
|
-
|
|
87
|
-
Raises:
|
|
88
|
-
RuntimeError: If grammar building fails
|
|
89
|
-
"""
|
|
90
|
-
config = get_language_config(lang_type)
|
|
91
|
-
repo_url = GRAMMAR_REPOS.get(lang_type)
|
|
92
|
-
|
|
93
|
-
if not repo_url:
|
|
94
|
-
raise ValueError(f"No repository URL defined for language {lang_type.value}")
|
|
95
|
-
|
|
96
|
-
# Create temporary directory
|
|
97
|
-
with tempfile.TemporaryDirectory() as temp_dir:
|
|
98
|
-
# Clone the repository
|
|
99
|
-
logger.info(f"Cloning {repo_url}")
|
|
100
|
-
result = subprocess.run(
|
|
101
|
-
["git", "clone", "--depth", "1", repo_url, temp_dir],
|
|
102
|
-
check=False,
|
|
103
|
-
capture_output=True,
|
|
104
|
-
text=True
|
|
105
|
-
)
|
|
106
|
-
|
|
107
|
-
if result.returncode != 0:
|
|
108
|
-
raise RuntimeError(f"Failed to clone repository {repo_url}: {result.stderr}")
|
|
109
|
-
|
|
110
|
-
# Build the grammar
|
|
111
|
-
grammar_path = os.path.join(self.grammar_dir, config.grammar_file)
|
|
112
|
-
|
|
113
|
-
# Create build script
|
|
114
|
-
build_script = self._create_build_script(temp_dir, lang_type.value, grammar_path)
|
|
115
|
-
|
|
116
|
-
# Execute build script
|
|
117
|
-
logger.info(f"Building grammar for {lang_type.value}")
|
|
118
|
-
result = subprocess.run(
|
|
119
|
-
["python", build_script],
|
|
120
|
-
check=False,
|
|
121
|
-
capture_output=True,
|
|
122
|
-
text=True
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
if result.returncode != 0:
|
|
126
|
-
raise RuntimeError(f"Failed to build grammar for {lang_type.value}: {result.stderr}")
|
|
127
|
-
|
|
128
|
-
# Verify file exists
|
|
129
|
-
if not os.path.exists(grammar_path):
|
|
130
|
-
raise RuntimeError(f"Grammar file {grammar_path} was not created")
|
|
131
|
-
|
|
132
|
-
logger.info(f"Successfully built grammar for {lang_type.value}: {grammar_path}")
|
|
133
|
-
return grammar_path
|
|
134
|
-
|
|
135
|
-
def _create_build_script(self, repo_dir: str, lang_name: str, output_path: str) -> str:
|
|
136
|
-
"""Create a Python script to build the grammar.
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
repo_dir: Path to the cloned repository
|
|
140
|
-
lang_name: Language name
|
|
141
|
-
output_path: Output path for the built grammar
|
|
142
|
-
|
|
143
|
-
Returns:
|
|
144
|
-
Path to the build script
|
|
145
|
-
"""
|
|
146
|
-
script_path = os.path.join(repo_dir, "build_grammar.py")
|
|
147
|
-
|
|
148
|
-
with open(script_path, "w") as f:
|
|
149
|
-
f.write(f'''
|
|
150
|
-
import os
|
|
151
|
-
from tree_sitter import Language
|
|
152
|
-
|
|
153
|
-
# Ensure output directory exists
|
|
154
|
-
os.makedirs(os.path.dirname("{output_path}"), exist_ok=True)
|
|
155
|
-
|
|
156
|
-
# Build the language
|
|
157
|
-
Language.build_library(
|
|
158
|
-
"{output_path}",
|
|
159
|
-
[
|
|
160
|
-
"{repo_dir}"
|
|
161
|
-
]
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
print(f"Built grammar: {output_path}")
|
|
165
|
-
''')
|
|
166
|
-
|
|
167
|
-
return script_path
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def setup_default_grammars() -> str:
|
|
171
|
-
"""Set up default grammars in ~/.jarvis/treesitter directory.
|
|
172
|
-
|
|
173
|
-
Returns:
|
|
174
|
-
Path to the grammar directory
|
|
175
|
-
"""
|
|
176
|
-
grammar_dir = DEFAULT_GRAMMAR_DIR
|
|
177
|
-
os.makedirs(grammar_dir, exist_ok=True)
|
|
178
|
-
|
|
179
|
-
builder = GrammarBuilder(grammar_dir)
|
|
180
|
-
builder.ensure_all_grammars()
|
|
181
|
-
|
|
182
|
-
return grammar_dir
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
"""Language-specific configurations for tree-sitter."""
|
|
2
|
-
|
|
3
|
-
from enum import Enum
|
|
4
|
-
from typing import Dict, List, Optional, Tuple
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
|
|
7
|
-
class LanguageType(Enum):
|
|
8
|
-
"""Supported programming languages."""
|
|
9
|
-
PYTHON = "python"
|
|
10
|
-
C = "c"
|
|
11
|
-
CPP = "cpp"
|
|
12
|
-
GO = "go"
|
|
13
|
-
RUST = "rust"
|
|
14
|
-
|
|
15
|
-
@dataclass
|
|
16
|
-
class LanguageConfig:
|
|
17
|
-
"""Configuration for a specific programming language."""
|
|
18
|
-
name: str
|
|
19
|
-
grammar_file: str
|
|
20
|
-
file_extensions: List[str]
|
|
21
|
-
symbol_patterns: Dict[str, List[str]] # Maps symbol types to tree-sitter node types
|
|
22
|
-
|
|
23
|
-
# Language-specific configurations
|
|
24
|
-
LANGUAGE_CONFIGS: Dict[LanguageType, LanguageConfig] = {
|
|
25
|
-
LanguageType.PYTHON: LanguageConfig(
|
|
26
|
-
name="python",
|
|
27
|
-
grammar_file="python.so",
|
|
28
|
-
file_extensions=[".py"],
|
|
29
|
-
symbol_patterns={
|
|
30
|
-
"function": ["function_definition"],
|
|
31
|
-
"class": ["class_definition"],
|
|
32
|
-
"variable": ["assignment"],
|
|
33
|
-
"import": ["import_statement", "import_from_statement"],
|
|
34
|
-
"method": ["function_definition"],
|
|
35
|
-
}
|
|
36
|
-
),
|
|
37
|
-
LanguageType.C: LanguageConfig(
|
|
38
|
-
name="c",
|
|
39
|
-
grammar_file="c.so",
|
|
40
|
-
file_extensions=[".c", ".h"],
|
|
41
|
-
symbol_patterns={
|
|
42
|
-
"function": ["function_definition"],
|
|
43
|
-
"struct": ["struct_specifier"],
|
|
44
|
-
"enum": ["enum_specifier"],
|
|
45
|
-
"typedef": ["type_definition"],
|
|
46
|
-
"macro": ["preproc_def"],
|
|
47
|
-
"variable": ["declaration"],
|
|
48
|
-
}
|
|
49
|
-
),
|
|
50
|
-
LanguageType.CPP: LanguageConfig(
|
|
51
|
-
name="cpp",
|
|
52
|
-
grammar_file="cpp.so",
|
|
53
|
-
file_extensions=[".cpp", ".hpp", ".cc", ".hh"],
|
|
54
|
-
symbol_patterns={
|
|
55
|
-
"function": ["function_definition", "method_definition"],
|
|
56
|
-
"class": ["class_specifier"],
|
|
57
|
-
"struct": ["struct_specifier"],
|
|
58
|
-
"enum": ["enum_specifier"],
|
|
59
|
-
"namespace": ["namespace_definition"],
|
|
60
|
-
"template": ["template_declaration"],
|
|
61
|
-
"variable": ["declaration"],
|
|
62
|
-
}
|
|
63
|
-
),
|
|
64
|
-
LanguageType.GO: LanguageConfig(
|
|
65
|
-
name="go",
|
|
66
|
-
grammar_file="go.so",
|
|
67
|
-
file_extensions=[".go"],
|
|
68
|
-
symbol_patterns={
|
|
69
|
-
"function": ["function_declaration", "method_declaration"],
|
|
70
|
-
"struct": ["type_declaration"],
|
|
71
|
-
"interface": ["type_declaration"],
|
|
72
|
-
"package": ["package_clause"],
|
|
73
|
-
"import": ["import_declaration"],
|
|
74
|
-
"variable": ["var_declaration", "short_var_declaration"],
|
|
75
|
-
}
|
|
76
|
-
),
|
|
77
|
-
LanguageType.RUST: LanguageConfig(
|
|
78
|
-
name="rust",
|
|
79
|
-
grammar_file="rust.so",
|
|
80
|
-
file_extensions=[".rs"],
|
|
81
|
-
symbol_patterns={
|
|
82
|
-
"function": ["function_item"],
|
|
83
|
-
"struct": ["struct_item"],
|
|
84
|
-
"enum": ["enum_item"],
|
|
85
|
-
"trait": ["trait_item"],
|
|
86
|
-
"impl": ["impl_item"],
|
|
87
|
-
"module": ["mod_item"],
|
|
88
|
-
"variable": ["let_declaration", "const_item", "static_item"],
|
|
89
|
-
}
|
|
90
|
-
),
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
def detect_language(file_path: str) -> Optional[LanguageType]:
|
|
94
|
-
"""Detect the programming language of a file based on its extension.
|
|
95
|
-
|
|
96
|
-
Args:
|
|
97
|
-
file_path: Path to the source file
|
|
98
|
-
|
|
99
|
-
Returns:
|
|
100
|
-
The detected language type or None if not supported
|
|
101
|
-
"""
|
|
102
|
-
extension = file_path.lower().split('.')[-1]
|
|
103
|
-
for lang_type, config in LANGUAGE_CONFIGS.items():
|
|
104
|
-
if f".{extension}" in config.file_extensions:
|
|
105
|
-
return lang_type
|
|
106
|
-
return None
|
|
107
|
-
|
|
108
|
-
def get_language_config(lang_type: LanguageType) -> LanguageConfig:
|
|
109
|
-
"""Get the configuration for a specific language.
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
lang_type: The language type
|
|
113
|
-
|
|
114
|
-
Returns:
|
|
115
|
-
The language configuration
|
|
116
|
-
"""
|
|
117
|
-
return LANGUAGE_CONFIGS[lang_type]
|