jarvis-ai-assistant 0.1.129__py3-none-any.whl → 0.1.131__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +41 -27
- jarvis/jarvis_agent/builtin_input_handler.py +73 -0
- jarvis/{jarvis_code_agent → jarvis_agent}/file_input_handler.py +1 -1
- jarvis/jarvis_agent/main.py +1 -1
- jarvis/jarvis_agent/patch.py +461 -0
- jarvis/{jarvis_code_agent → jarvis_agent}/shell_input_handler.py +0 -1
- jarvis/jarvis_code_agent/code_agent.py +94 -89
- jarvis/jarvis_codebase/main.py +5 -5
- jarvis/jarvis_dev/main.py +833 -741
- jarvis/jarvis_git_squash/main.py +1 -1
- jarvis/jarvis_lsp/base.py +2 -26
- jarvis/jarvis_lsp/cpp.py +2 -14
- jarvis/jarvis_lsp/go.py +0 -13
- jarvis/jarvis_lsp/python.py +1 -30
- jarvis/jarvis_lsp/registry.py +10 -14
- jarvis/jarvis_lsp/rust.py +0 -12
- jarvis/jarvis_multi_agent/__init__.py +63 -53
- jarvis/jarvis_platform/registry.py +1 -2
- jarvis/jarvis_platform_manager/main.py +3 -3
- jarvis/jarvis_rag/main.py +1 -1
- jarvis/jarvis_tools/ask_codebase.py +40 -20
- jarvis/jarvis_tools/code_review.py +180 -143
- jarvis/jarvis_tools/create_code_agent.py +76 -72
- jarvis/jarvis_tools/create_sub_agent.py +31 -21
- jarvis/jarvis_tools/execute_shell.py +2 -2
- jarvis/jarvis_tools/execute_shell_script.py +1 -1
- jarvis/jarvis_tools/file_operation.py +2 -2
- jarvis/jarvis_tools/git_commiter.py +88 -68
- jarvis/jarvis_tools/lsp_find_definition.py +83 -67
- jarvis/jarvis_tools/lsp_find_references.py +62 -46
- jarvis/jarvis_tools/lsp_get_diagnostics.py +90 -74
- jarvis/jarvis_tools/methodology.py +3 -3
- jarvis/jarvis_tools/read_code.py +2 -2
- jarvis/jarvis_tools/search_web.py +18 -20
- jarvis/jarvis_tools/tool_generator.py +1 -1
- jarvis/jarvis_tools/treesitter_analyzer.py +331 -0
- jarvis/jarvis_treesitter/README.md +104 -0
- jarvis/jarvis_treesitter/__init__.py +20 -0
- jarvis/jarvis_treesitter/database.py +258 -0
- jarvis/jarvis_treesitter/example.py +115 -0
- jarvis/jarvis_treesitter/grammar_builder.py +182 -0
- jarvis/jarvis_treesitter/language.py +117 -0
- jarvis/jarvis_treesitter/symbol.py +31 -0
- jarvis/jarvis_treesitter/tools_usage.md +121 -0
- jarvis/jarvis_utils/git_utils.py +10 -2
- jarvis/jarvis_utils/input.py +3 -1
- jarvis/jarvis_utils/methodology.py +1 -1
- jarvis/jarvis_utils/output.py +2 -2
- jarvis/jarvis_utils/utils.py +3 -3
- {jarvis_ai_assistant-0.1.129.dist-info → jarvis_ai_assistant-0.1.131.dist-info}/METADATA +2 -4
- jarvis_ai_assistant-0.1.131.dist-info/RECORD +85 -0
- jarvis/jarvis_code_agent/builtin_input_handler.py +0 -43
- jarvis/jarvis_code_agent/patch.py +0 -276
- jarvis/jarvis_tools/lsp_get_document_symbols.py +0 -87
- jarvis/jarvis_tools/lsp_prepare_rename.py +0 -130
- jarvis_ai_assistant-0.1.129.dist-info/RECORD +0 -78
- {jarvis_ai_assistant-0.1.129.dist-info → jarvis_ai_assistant-0.1.131.dist-info}/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.129.dist-info → jarvis_ai_assistant-0.1.131.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.129.dist-info → jarvis_ai_assistant-0.1.131.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.1.129.dist-info → jarvis_ai_assistant-0.1.131.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
# Tree-sitter 代码数据库
|
|
2
|
+
|
|
3
|
+
基于 tree-sitter 的代码分析工具,支持快速查询符号的定义位置、声明位置、引用位置和调用关系。
|
|
4
|
+
|
|
5
|
+
## 功能特点
|
|
6
|
+
|
|
7
|
+
- 支持多种编程语言:Python、C、C++、Go、Rust
|
|
8
|
+
- 自动下载和编译语言语法文件
|
|
9
|
+
- 查找符号定义
|
|
10
|
+
- 查找符号引用
|
|
11
|
+
- 查找函数调用者
|
|
12
|
+
|
|
13
|
+
## 安装
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
pip install -r requirements.txt
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## 使用方法
|
|
20
|
+
|
|
21
|
+
### 基本用法
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
from jarvis.jarvis_treesitter import CodeDatabase
|
|
25
|
+
|
|
26
|
+
# 初始化代码数据库(自动下载所需的语法文件)
|
|
27
|
+
db = CodeDatabase() # 语法文件将保存到 ~/.jarvis/treesitter 目录
|
|
28
|
+
|
|
29
|
+
# 索引源文件
|
|
30
|
+
db.index_file("path/to/file.py")
|
|
31
|
+
|
|
32
|
+
# 查找符号
|
|
33
|
+
symbols = db.find_symbol("function_name")
|
|
34
|
+
|
|
35
|
+
# 查找符号引用
|
|
36
|
+
references = db.find_references(symbols[0])
|
|
37
|
+
|
|
38
|
+
# 查找函数调用者
|
|
39
|
+
callers = db.find_callers(symbols[0])
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
### 自定义语法文件位置
|
|
43
|
+
|
|
44
|
+
虽然默认会使用 `~/.jarvis/treesitter` 目录,但您仍然可以指定自定义目录:
|
|
45
|
+
|
|
46
|
+
```python
|
|
47
|
+
from jarvis.jarvis_treesitter import CodeDatabase
|
|
48
|
+
|
|
49
|
+
# 使用自定义语法文件目录
|
|
50
|
+
db = CodeDatabase(grammar_dir="/path/to/grammars")
|
|
51
|
+
|
|
52
|
+
# 不自动下载缺失的语法文件
|
|
53
|
+
db = CodeDatabase(auto_download=False)
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 手动下载语法文件
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from jarvis.jarvis_treesitter import setup_default_grammars, GrammarBuilder, LanguageType, DEFAULT_GRAMMAR_DIR
|
|
60
|
+
|
|
61
|
+
# 下载所有支持的语言的语法文件到默认目录 (~/.jarvis/treesitter)
|
|
62
|
+
setup_default_grammars()
|
|
63
|
+
|
|
64
|
+
# 或者使用自定义目录
|
|
65
|
+
grammar_dir = "/path/to/grammars"
|
|
66
|
+
builder = GrammarBuilder(grammar_dir)
|
|
67
|
+
builder.ensure_all_grammars() # 下载所有语言
|
|
68
|
+
builder.ensure_grammar(LanguageType.PYTHON) # 只下载特定语言
|
|
69
|
+
|
|
70
|
+
# 查看默认语法文件目录
|
|
71
|
+
print(DEFAULT_GRAMMAR_DIR) # 输出: ~/.jarvis/treesitter
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## 命令行工具
|
|
75
|
+
|
|
76
|
+
提供了一个示例脚本 `example.py` 演示基本用法:
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# 索引当前目录并查找名为 "main" 的符号
|
|
80
|
+
python -m jarvis.jarvis_treesitter.example --dir . --symbol main
|
|
81
|
+
|
|
82
|
+
# 只索引Python文件
|
|
83
|
+
python -m jarvis.jarvis_treesitter.example --dir . --ext .py --symbol main
|
|
84
|
+
|
|
85
|
+
# 使用自定义语法文件目录(默认是 ~/.jarvis/treesitter)
|
|
86
|
+
python -m jarvis.jarvis_treesitter.example --dir . --grammar-dir /path/to/grammars --symbol main
|
|
87
|
+
|
|
88
|
+
# 不自动下载语法文件
|
|
89
|
+
python -m jarvis.jarvis_treesitter.example --dir . --no-download --symbol main
|
|
90
|
+
```
|
|
91
|
+
|
|
92
|
+
## 语法文件位置
|
|
93
|
+
|
|
94
|
+
默认情况下,所有tree-sitter语法文件将保存在 `~/.jarvis/treesitter` 目录中。这些文件只需要下载和编译一次,后续使用时会自动加载。
|
|
95
|
+
|
|
96
|
+
## 支持的语言
|
|
97
|
+
|
|
98
|
+
| 语言 | 文件扩展名 | 支持的符号类型 |
|
|
99
|
+
|--------|--------------------------|--------------------------------------------------|
|
|
100
|
+
| Python | .py | 函数、类、变量、导入、方法 |
|
|
101
|
+
| C | .c, .h | 函数、结构体、枚举、类型定义、宏、变量 |
|
|
102
|
+
| C++ | .cpp, .hpp, .cc, .hh | 函数、类、结构体、枚举、命名空间、模板、变量 |
|
|
103
|
+
| Go | .go | 函数、结构体、接口、包、导入、变量 |
|
|
104
|
+
| Rust | .rs | 函数、结构体、枚举、特征、实现、模块、变量 |
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
"""Tree-sitter based code database for fast symbol lookup."""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
|
|
5
|
+
from .database import CodeDatabase
|
|
6
|
+
from .symbol import Symbol, SymbolType, SymbolLocation
|
|
7
|
+
from .language import LanguageType, LanguageConfig
|
|
8
|
+
from .grammar_builder import GrammarBuilder, setup_default_grammars, DEFAULT_GRAMMAR_DIR
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"CodeDatabase",
|
|
12
|
+
"Symbol",
|
|
13
|
+
"SymbolType",
|
|
14
|
+
"SymbolLocation",
|
|
15
|
+
"LanguageType",
|
|
16
|
+
"LanguageConfig",
|
|
17
|
+
"GrammarBuilder",
|
|
18
|
+
"setup_default_grammars",
|
|
19
|
+
"DEFAULT_GRAMMAR_DIR",
|
|
20
|
+
]
|
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""Tree-sitter based code database implementation."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import logging
|
|
5
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
6
|
+
from tree_sitter import Language, Node, Parser, Tree
|
|
7
|
+
from .symbol import Symbol, SymbolType, SymbolLocation
|
|
8
|
+
from .language import LanguageType, detect_language, get_language_config
|
|
9
|
+
from .grammar_builder import GrammarBuilder, setup_default_grammars, DEFAULT_GRAMMAR_DIR
|
|
10
|
+
|
|
11
|
+
# Setup logging
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
class CodeDatabase:
|
|
15
|
+
"""A database for storing and querying code symbols using tree-sitter."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, grammar_dir: Optional[str] = None, auto_download: bool = True):
|
|
18
|
+
"""Initialize the code database.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
grammar_dir: Directory containing tree-sitter grammar files.
|
|
22
|
+
If None, uses the default directory (~/.jarvis/treesitter).
|
|
23
|
+
auto_download: Whether to automatically download missing grammar files.
|
|
24
|
+
"""
|
|
25
|
+
self.parser = Parser()
|
|
26
|
+
self.languages: Dict[LanguageType, Language] = {}
|
|
27
|
+
self.file_languages: Dict[str, LanguageType] = {}
|
|
28
|
+
|
|
29
|
+
# Use default grammar directory if not provided
|
|
30
|
+
if grammar_dir is None:
|
|
31
|
+
grammar_dir = DEFAULT_GRAMMAR_DIR
|
|
32
|
+
if auto_download:
|
|
33
|
+
grammar_dir = setup_default_grammars()
|
|
34
|
+
|
|
35
|
+
# Create grammar builder
|
|
36
|
+
self.grammar_builder = GrammarBuilder(grammar_dir)
|
|
37
|
+
|
|
38
|
+
# Load all supported language grammars
|
|
39
|
+
for lang_type in LanguageType:
|
|
40
|
+
try:
|
|
41
|
+
if auto_download:
|
|
42
|
+
# Ensure grammar exists (download if needed)
|
|
43
|
+
grammar_path = self.grammar_builder.ensure_grammar(lang_type)
|
|
44
|
+
else:
|
|
45
|
+
# Just check if grammar file exists
|
|
46
|
+
config = get_language_config(lang_type)
|
|
47
|
+
grammar_path = os.path.join(grammar_dir, config.grammar_file)
|
|
48
|
+
if not os.path.exists(grammar_path):
|
|
49
|
+
logger.warning(f"Grammar file for {lang_type.value} not found: {grammar_path}")
|
|
50
|
+
continue
|
|
51
|
+
|
|
52
|
+
# Load the language
|
|
53
|
+
config = get_language_config(lang_type)
|
|
54
|
+
self.languages[lang_type] = Language(grammar_path, config.name)
|
|
55
|
+
logger.info(f"Loaded language grammar for {lang_type.value}")
|
|
56
|
+
except Exception as e:
|
|
57
|
+
logger.error(f"Failed to load grammar for {lang_type.value}: {str(e)}")
|
|
58
|
+
|
|
59
|
+
# Symbol storage
|
|
60
|
+
self.symbols: Dict[str, List[Symbol]] = {}
|
|
61
|
+
self.file_trees: Dict[str, Tuple[Tree, LanguageType]] = {}
|
|
62
|
+
|
|
63
|
+
def index_file(self, file_path: str) -> None:
|
|
64
|
+
"""Index a source code file.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
file_path: Path to the source code file
|
|
68
|
+
"""
|
|
69
|
+
# Detect language
|
|
70
|
+
lang_type = detect_language(file_path)
|
|
71
|
+
if not lang_type:
|
|
72
|
+
raise ValueError(f"Could not detect language for file: {file_path}")
|
|
73
|
+
|
|
74
|
+
# Check if language is supported
|
|
75
|
+
if lang_type not in self.languages:
|
|
76
|
+
if not hasattr(self, 'grammar_builder'):
|
|
77
|
+
raise ValueError(f"Unsupported language for file: {file_path}")
|
|
78
|
+
|
|
79
|
+
# Try to build the grammar on-demand
|
|
80
|
+
try:
|
|
81
|
+
grammar_path = self.grammar_builder.ensure_grammar(lang_type)
|
|
82
|
+
config = get_language_config(lang_type)
|
|
83
|
+
self.languages[lang_type] = Language(grammar_path, config.name)
|
|
84
|
+
logger.info(f"Built and loaded language grammar for {lang_type.value}")
|
|
85
|
+
except Exception as e:
|
|
86
|
+
raise ValueError(f"Failed to build grammar for {lang_type.value}: {str(e)}")
|
|
87
|
+
|
|
88
|
+
# Set language for parsing
|
|
89
|
+
self.parser.set_language(self.languages[lang_type])
|
|
90
|
+
self.file_languages[file_path] = lang_type
|
|
91
|
+
|
|
92
|
+
# Parse file
|
|
93
|
+
with open(file_path, 'rb') as f:
|
|
94
|
+
source_code = f.read()
|
|
95
|
+
|
|
96
|
+
tree = self.parser.parse(source_code)
|
|
97
|
+
self.file_trees[file_path] = (tree, lang_type)
|
|
98
|
+
|
|
99
|
+
# Extract symbols from the tree
|
|
100
|
+
self._extract_symbols(tree, file_path, lang_type)
|
|
101
|
+
|
|
102
|
+
def _extract_symbols(self, tree: Tree, file_path: str, lang_type: LanguageType) -> None:
|
|
103
|
+
"""Extract symbols from a tree-sitter tree.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
tree: The tree-sitter tree
|
|
107
|
+
file_path: Path to the source file
|
|
108
|
+
lang_type: The language type
|
|
109
|
+
"""
|
|
110
|
+
config = get_language_config(lang_type)
|
|
111
|
+
|
|
112
|
+
def visit_node(node: Node):
|
|
113
|
+
if not node:
|
|
114
|
+
return
|
|
115
|
+
|
|
116
|
+
# Extract symbols based on language-specific patterns
|
|
117
|
+
for symbol_type, patterns in config.symbol_patterns.items():
|
|
118
|
+
if node.type in patterns:
|
|
119
|
+
name_node = None
|
|
120
|
+
|
|
121
|
+
# Get the name node based on language-specific rules
|
|
122
|
+
if lang_type == LanguageType.PYTHON:
|
|
123
|
+
name_node = node.child_by_field_name('name')
|
|
124
|
+
elif lang_type in (LanguageType.C, LanguageType.CPP):
|
|
125
|
+
if node.type == 'function_definition':
|
|
126
|
+
name_node = node.child_by_field_name('declarator')
|
|
127
|
+
elif node.type in ('struct_specifier', 'class_specifier'):
|
|
128
|
+
name_node = node.child_by_field_name('name')
|
|
129
|
+
elif lang_type == LanguageType.GO:
|
|
130
|
+
if node.type in ('function_declaration', 'method_declaration'):
|
|
131
|
+
name_node = node.child_by_field_name('name')
|
|
132
|
+
elif lang_type == LanguageType.RUST:
|
|
133
|
+
if node.type in ('function_item', 'struct_item', 'enum_item', 'trait_item'):
|
|
134
|
+
name_node = node.child_by_field_name('name')
|
|
135
|
+
|
|
136
|
+
if name_node and name_node.type == 'identifier':
|
|
137
|
+
symbol = Symbol(
|
|
138
|
+
name=name_node.text.decode(),
|
|
139
|
+
type=SymbolType(symbol_type),
|
|
140
|
+
location=SymbolLocation(
|
|
141
|
+
file_path=file_path,
|
|
142
|
+
start_line=node.start_point[0] + 1,
|
|
143
|
+
start_column=node.start_point[1] + 1,
|
|
144
|
+
end_line=node.end_point[0] + 1,
|
|
145
|
+
end_column=node.end_point[1] + 1
|
|
146
|
+
)
|
|
147
|
+
)
|
|
148
|
+
self._add_symbol(symbol)
|
|
149
|
+
|
|
150
|
+
# Recursively visit children
|
|
151
|
+
for child in node.children:
|
|
152
|
+
visit_node(child)
|
|
153
|
+
|
|
154
|
+
visit_node(tree.root_node)
|
|
155
|
+
|
|
156
|
+
def _add_symbol(self, symbol: Symbol) -> None:
|
|
157
|
+
"""Add a symbol to the database.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
symbol: The symbol to add
|
|
161
|
+
"""
|
|
162
|
+
if symbol.name not in self.symbols:
|
|
163
|
+
self.symbols[symbol.name] = []
|
|
164
|
+
self.symbols[symbol.name].append(symbol)
|
|
165
|
+
|
|
166
|
+
def find_symbol(self, name: str) -> List[Symbol]:
|
|
167
|
+
"""Find all occurrences of a symbol by name.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
name: The symbol name to search for
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
List of matching symbols
|
|
174
|
+
"""
|
|
175
|
+
return self.symbols.get(name, [])
|
|
176
|
+
|
|
177
|
+
def find_references(self, symbol: Symbol) -> List[Symbol]:
|
|
178
|
+
"""Find all references to a symbol.
|
|
179
|
+
|
|
180
|
+
Args:
|
|
181
|
+
symbol: The symbol to find references for
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
List of reference symbols
|
|
185
|
+
"""
|
|
186
|
+
references = []
|
|
187
|
+
for file_path, (tree, lang_type) in self.file_trees.items():
|
|
188
|
+
def find_refs(node: Node):
|
|
189
|
+
if not node:
|
|
190
|
+
return
|
|
191
|
+
|
|
192
|
+
if node.type == 'identifier' and node.text.decode() == symbol.name:
|
|
193
|
+
ref_symbol = Symbol(
|
|
194
|
+
name=node.text.decode(),
|
|
195
|
+
type=SymbolType.REFERENCE,
|
|
196
|
+
location=SymbolLocation(
|
|
197
|
+
file_path=file_path,
|
|
198
|
+
start_line=node.start_point[0] + 1,
|
|
199
|
+
start_column=node.start_point[1] + 1,
|
|
200
|
+
end_line=node.end_point[0] + 1,
|
|
201
|
+
end_column=node.end_point[1] + 1
|
|
202
|
+
)
|
|
203
|
+
)
|
|
204
|
+
references.append(ref_symbol)
|
|
205
|
+
|
|
206
|
+
for child in node.children:
|
|
207
|
+
find_refs(child)
|
|
208
|
+
|
|
209
|
+
find_refs(tree.root_node)
|
|
210
|
+
|
|
211
|
+
return references
|
|
212
|
+
|
|
213
|
+
def find_callers(self, function_symbol: Symbol) -> List[Symbol]:
|
|
214
|
+
"""Find all callers of a function.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
function_symbol: The function symbol to find callers for
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List of caller symbols
|
|
221
|
+
"""
|
|
222
|
+
callers = []
|
|
223
|
+
for file_path, (tree, lang_type) in self.file_trees.items():
|
|
224
|
+
def find_calls(node: Node):
|
|
225
|
+
if not node:
|
|
226
|
+
return
|
|
227
|
+
|
|
228
|
+
# Language-specific call patterns
|
|
229
|
+
call_patterns = {
|
|
230
|
+
LanguageType.PYTHON: ('call', 'function'),
|
|
231
|
+
LanguageType.C: ('call_expression', 'function'),
|
|
232
|
+
LanguageType.CPP: ('call_expression', 'function'),
|
|
233
|
+
LanguageType.GO: ('call_expression', 'function'),
|
|
234
|
+
LanguageType.RUST: ('call_expression', 'function'),
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if node.type == call_patterns[lang_type][0]:
|
|
238
|
+
func_node = node.child_by_field_name(call_patterns[lang_type][1])
|
|
239
|
+
if func_node and func_node.type == 'identifier' and func_node.text.decode() == function_symbol.name:
|
|
240
|
+
caller_symbol = Symbol(
|
|
241
|
+
name=func_node.text.decode(),
|
|
242
|
+
type=SymbolType.FUNCTION_CALL,
|
|
243
|
+
location=SymbolLocation(
|
|
244
|
+
file_path=file_path,
|
|
245
|
+
start_line=node.start_point[0] + 1,
|
|
246
|
+
start_column=node.start_point[1] + 1,
|
|
247
|
+
end_line=node.end_point[0] + 1,
|
|
248
|
+
end_column=node.end_point[1] + 1
|
|
249
|
+
)
|
|
250
|
+
)
|
|
251
|
+
callers.append(caller_symbol)
|
|
252
|
+
|
|
253
|
+
for child in node.children:
|
|
254
|
+
find_calls(child)
|
|
255
|
+
|
|
256
|
+
find_calls(tree.root_node)
|
|
257
|
+
|
|
258
|
+
return callers
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Example script demonstrating the use of the tree-sitter code database."""
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import logging
|
|
7
|
+
import argparse
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import List, Optional, Set
|
|
10
|
+
|
|
11
|
+
from jarvis.jarvis_treesitter import (
|
|
12
|
+
CodeDatabase,
|
|
13
|
+
SymbolType,
|
|
14
|
+
setup_default_grammars,
|
|
15
|
+
DEFAULT_GRAMMAR_DIR
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
# Setup logging
|
|
19
|
+
logging.basicConfig(
|
|
20
|
+
level=logging.INFO,
|
|
21
|
+
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
22
|
+
)
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
def index_directory(db: CodeDatabase, directory: str, extensions: Optional[Set[str]] = None) -> int:
|
|
26
|
+
"""Index all supported files in a directory.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
db: The code database
|
|
30
|
+
directory: Directory to index
|
|
31
|
+
extensions: Optional set of file extensions to index (e.g., {'.py', '.c'})
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Number of files indexed
|
|
35
|
+
"""
|
|
36
|
+
count = 0
|
|
37
|
+
for root, _, files in os.walk(directory):
|
|
38
|
+
for file in files:
|
|
39
|
+
if extensions and not any(file.endswith(ext) for ext in extensions):
|
|
40
|
+
continue
|
|
41
|
+
|
|
42
|
+
file_path = os.path.join(root, file)
|
|
43
|
+
try:
|
|
44
|
+
db.index_file(file_path)
|
|
45
|
+
count += 1
|
|
46
|
+
logger.info(f"Indexed file: {file_path}")
|
|
47
|
+
except Exception as e:
|
|
48
|
+
logger.warning(f"Failed to index file {file_path}: {str(e)}")
|
|
49
|
+
|
|
50
|
+
return count
|
|
51
|
+
|
|
52
|
+
def find_symbol(db: CodeDatabase, symbol_name: str) -> None:
|
|
53
|
+
"""Find and print all occurrences of a symbol.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
db: The code database
|
|
57
|
+
symbol_name: Symbol name to search for
|
|
58
|
+
"""
|
|
59
|
+
symbols = db.find_symbol(symbol_name)
|
|
60
|
+
|
|
61
|
+
if not symbols:
|
|
62
|
+
print(f"No symbols found with name: {symbol_name}")
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
print(f"Found {len(symbols)} symbols with name: {symbol_name}")
|
|
66
|
+
for i, symbol in enumerate(symbols):
|
|
67
|
+
print(f"\n[{i+1}] {symbol.type.value}: {symbol.name}")
|
|
68
|
+
print(f" Location: {symbol.location.file_path}:{symbol.location.start_line}:{symbol.location.start_column}")
|
|
69
|
+
|
|
70
|
+
# Find references for this symbol
|
|
71
|
+
refs = db.find_references(symbol)
|
|
72
|
+
print(f" References: {len(refs)}")
|
|
73
|
+
for j, ref in enumerate(refs[:5]): # Show first 5 references
|
|
74
|
+
print(f" [{j+1}] {ref.location.file_path}:{ref.location.start_line}:{ref.location.start_column}")
|
|
75
|
+
|
|
76
|
+
if len(refs) > 5:
|
|
77
|
+
print(f" ... and {len(refs) - 5} more")
|
|
78
|
+
|
|
79
|
+
# Find callers if it's a function
|
|
80
|
+
if symbol.type == SymbolType.FUNCTION:
|
|
81
|
+
callers = db.find_callers(symbol)
|
|
82
|
+
print(f" Callers: {len(callers)}")
|
|
83
|
+
for j, caller in enumerate(callers[:5]): # Show first 5 callers
|
|
84
|
+
print(f" [{j+1}] {caller.location.file_path}:{caller.location.start_line}:{caller.location.start_column}")
|
|
85
|
+
|
|
86
|
+
if len(callers) > 5:
|
|
87
|
+
print(f" ... and {len(callers) - 5} more")
|
|
88
|
+
|
|
89
|
+
def main() -> None:
|
|
90
|
+
"""Main entry point."""
|
|
91
|
+
parser = argparse.ArgumentParser(description="Tree-sitter code database example")
|
|
92
|
+
parser.add_argument("--dir", "-d", type=str, default=".", help="Directory to index")
|
|
93
|
+
parser.add_argument("--ext", "-e", type=str, nargs="*", help="File extensions to index (e.g., .py .c)")
|
|
94
|
+
parser.add_argument("--symbol", "-s", type=str, help="Symbol name to search for")
|
|
95
|
+
parser.add_argument("--grammar-dir", "-g", type=str, default=DEFAULT_GRAMMAR_DIR,
|
|
96
|
+
help=f"Directory containing grammar files (default: {DEFAULT_GRAMMAR_DIR})")
|
|
97
|
+
parser.add_argument("--no-download", action="store_true", help="Don't download missing grammars")
|
|
98
|
+
args = parser.parse_args()
|
|
99
|
+
|
|
100
|
+
# Create code database
|
|
101
|
+
db = CodeDatabase(grammar_dir=args.grammar_dir, auto_download=not args.no_download)
|
|
102
|
+
|
|
103
|
+
# Process extensions
|
|
104
|
+
extensions = set(args.ext) if args.ext else None
|
|
105
|
+
|
|
106
|
+
# Index directory
|
|
107
|
+
count = index_directory(db, args.dir, extensions)
|
|
108
|
+
print(f"Indexed {count} files in {args.dir}")
|
|
109
|
+
|
|
110
|
+
# Search for symbol if specified
|
|
111
|
+
if args.symbol:
|
|
112
|
+
find_symbol(db, args.symbol)
|
|
113
|
+
|
|
114
|
+
if __name__ == "__main__":
|
|
115
|
+
main()
|
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""Functionality for downloading and building tree-sitter grammars."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
import tempfile
|
|
6
|
+
import shutil
|
|
7
|
+
from typing import Dict, List, Optional
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
from .language import LanguageType, get_language_config
|
|
12
|
+
|
|
13
|
+
# Setup logging
|
|
14
|
+
logger = logging.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
# Default grammar directory
|
|
17
|
+
DEFAULT_GRAMMAR_DIR = os.path.expanduser("~/.jarvis/treesitter")
|
|
18
|
+
|
|
19
|
+
# Tree-sitter grammar repositories
|
|
20
|
+
GRAMMAR_REPOS = {
|
|
21
|
+
LanguageType.PYTHON: "https://github.com/tree-sitter/tree-sitter-python",
|
|
22
|
+
LanguageType.C: "https://github.com/tree-sitter/tree-sitter-c",
|
|
23
|
+
LanguageType.CPP: "https://github.com/tree-sitter/tree-sitter-cpp",
|
|
24
|
+
LanguageType.GO: "https://github.com/tree-sitter/tree-sitter-go",
|
|
25
|
+
LanguageType.RUST: "https://github.com/tree-sitter/tree-sitter-rust",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
class GrammarBuilder:
|
|
29
|
+
"""Handles downloading and building tree-sitter grammar files."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, grammar_dir: str = DEFAULT_GRAMMAR_DIR):
|
|
32
|
+
"""Initialize the grammar builder.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
grammar_dir: Directory to store built grammar files.
|
|
36
|
+
Defaults to ~/.jarvis/treesitter
|
|
37
|
+
"""
|
|
38
|
+
self.grammar_dir = grammar_dir
|
|
39
|
+
os.makedirs(grammar_dir, exist_ok=True)
|
|
40
|
+
|
|
41
|
+
def ensure_grammar(self, lang_type: LanguageType) -> str:
|
|
42
|
+
"""Ensure the grammar file for a language exists, downloading and building if necessary.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
lang_type: The language type
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
Path to the grammar file
|
|
49
|
+
"""
|
|
50
|
+
config = get_language_config(lang_type)
|
|
51
|
+
grammar_path = os.path.join(self.grammar_dir, config.grammar_file)
|
|
52
|
+
|
|
53
|
+
# Check if grammar file already exists
|
|
54
|
+
if os.path.exists(grammar_path):
|
|
55
|
+
logger.info(f"Grammar file for {lang_type.value} already exists at {grammar_path}")
|
|
56
|
+
return grammar_path
|
|
57
|
+
|
|
58
|
+
# Download and build the grammar
|
|
59
|
+
logger.info(f"Building grammar for {lang_type.value}")
|
|
60
|
+
return self._build_grammar(lang_type)
|
|
61
|
+
|
|
62
|
+
def ensure_all_grammars(self) -> Dict[LanguageType, str]:
|
|
63
|
+
"""Ensure grammar files for all supported languages exist.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Dictionary mapping language types to grammar file paths
|
|
67
|
+
"""
|
|
68
|
+
result = {}
|
|
69
|
+
for lang_type in LanguageType:
|
|
70
|
+
try:
|
|
71
|
+
path = self.ensure_grammar(lang_type)
|
|
72
|
+
result[lang_type] = path
|
|
73
|
+
except Exception as e:
|
|
74
|
+
logger.error(f"Failed to build grammar for {lang_type.value}: {str(e)}")
|
|
75
|
+
|
|
76
|
+
return result
|
|
77
|
+
|
|
78
|
+
def _build_grammar(self, lang_type: LanguageType) -> str:
|
|
79
|
+
"""Download and build the grammar for a language.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
lang_type: The language type
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
Path to the built grammar file
|
|
86
|
+
|
|
87
|
+
Raises:
|
|
88
|
+
RuntimeError: If grammar building fails
|
|
89
|
+
"""
|
|
90
|
+
config = get_language_config(lang_type)
|
|
91
|
+
repo_url = GRAMMAR_REPOS.get(lang_type)
|
|
92
|
+
|
|
93
|
+
if not repo_url:
|
|
94
|
+
raise ValueError(f"No repository URL defined for language {lang_type.value}")
|
|
95
|
+
|
|
96
|
+
# Create temporary directory
|
|
97
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
98
|
+
# Clone the repository
|
|
99
|
+
logger.info(f"Cloning {repo_url}")
|
|
100
|
+
result = subprocess.run(
|
|
101
|
+
["git", "clone", "--depth", "1", repo_url, temp_dir],
|
|
102
|
+
check=False,
|
|
103
|
+
capture_output=True,
|
|
104
|
+
text=True
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
if result.returncode != 0:
|
|
108
|
+
raise RuntimeError(f"Failed to clone repository {repo_url}: {result.stderr}")
|
|
109
|
+
|
|
110
|
+
# Build the grammar
|
|
111
|
+
grammar_path = os.path.join(self.grammar_dir, config.grammar_file)
|
|
112
|
+
|
|
113
|
+
# Create build script
|
|
114
|
+
build_script = self._create_build_script(temp_dir, lang_type.value, grammar_path)
|
|
115
|
+
|
|
116
|
+
# Execute build script
|
|
117
|
+
logger.info(f"Building grammar for {lang_type.value}")
|
|
118
|
+
result = subprocess.run(
|
|
119
|
+
["python", build_script],
|
|
120
|
+
check=False,
|
|
121
|
+
capture_output=True,
|
|
122
|
+
text=True
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
if result.returncode != 0:
|
|
126
|
+
raise RuntimeError(f"Failed to build grammar for {lang_type.value}: {result.stderr}")
|
|
127
|
+
|
|
128
|
+
# Verify file exists
|
|
129
|
+
if not os.path.exists(grammar_path):
|
|
130
|
+
raise RuntimeError(f"Grammar file {grammar_path} was not created")
|
|
131
|
+
|
|
132
|
+
logger.info(f"Successfully built grammar for {lang_type.value}: {grammar_path}")
|
|
133
|
+
return grammar_path
|
|
134
|
+
|
|
135
|
+
def _create_build_script(self, repo_dir: str, lang_name: str, output_path: str) -> str:
|
|
136
|
+
"""Create a Python script to build the grammar.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
repo_dir: Path to the cloned repository
|
|
140
|
+
lang_name: Language name
|
|
141
|
+
output_path: Output path for the built grammar
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Path to the build script
|
|
145
|
+
"""
|
|
146
|
+
script_path = os.path.join(repo_dir, "build_grammar.py")
|
|
147
|
+
|
|
148
|
+
with open(script_path, "w") as f:
|
|
149
|
+
f.write(f'''
|
|
150
|
+
import os
|
|
151
|
+
from tree_sitter import Language
|
|
152
|
+
|
|
153
|
+
# Ensure output directory exists
|
|
154
|
+
os.makedirs(os.path.dirname("{output_path}"), exist_ok=True)
|
|
155
|
+
|
|
156
|
+
# Build the language
|
|
157
|
+
Language.build_library(
|
|
158
|
+
"{output_path}",
|
|
159
|
+
[
|
|
160
|
+
"{repo_dir}"
|
|
161
|
+
]
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
print(f"Built grammar: {output_path}")
|
|
165
|
+
''')
|
|
166
|
+
|
|
167
|
+
return script_path
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def setup_default_grammars() -> str:
|
|
171
|
+
"""Set up default grammars in ~/.jarvis/treesitter directory.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Path to the grammar directory
|
|
175
|
+
"""
|
|
176
|
+
grammar_dir = DEFAULT_GRAMMAR_DIR
|
|
177
|
+
os.makedirs(grammar_dir, exist_ok=True)
|
|
178
|
+
|
|
179
|
+
builder = GrammarBuilder(grammar_dir)
|
|
180
|
+
builder.ensure_all_grammars()
|
|
181
|
+
|
|
182
|
+
return grammar_dir
|