nexus-dev 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus_dev/__init__.py +4 -0
- nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev/agents/__init__.py +20 -0
- nexus_dev/agents/agent_config.py +97 -0
- nexus_dev/agents/agent_executor.py +197 -0
- nexus_dev/agents/agent_manager.py +104 -0
- nexus_dev/agents/prompt_factory.py +91 -0
- nexus_dev/chunkers/__init__.py +168 -0
- nexus_dev/chunkers/base.py +202 -0
- nexus_dev/chunkers/docs_chunker.py +291 -0
- nexus_dev/chunkers/java_chunker.py +343 -0
- nexus_dev/chunkers/javascript_chunker.py +312 -0
- nexus_dev/chunkers/python_chunker.py +308 -0
- nexus_dev/cli.py +2017 -0
- nexus_dev/config.py +261 -0
- nexus_dev/database.py +569 -0
- nexus_dev/embeddings.py +703 -0
- nexus_dev/gateway/__init__.py +10 -0
- nexus_dev/gateway/connection_manager.py +348 -0
- nexus_dev/github_importer.py +247 -0
- nexus_dev/mcp_client.py +281 -0
- nexus_dev/mcp_config.py +184 -0
- nexus_dev/schemas/mcp_config_schema.json +166 -0
- nexus_dev/server.py +1866 -0
- nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.dist-info/METADATA +668 -0
- nexus_dev-3.3.1.dist-info/RECORD +48 -0
- nexus_dev-3.3.1.dist-info/WHEEL +4 -0
- nexus_dev-3.3.1.dist-info/entry_points.txt +14 -0
- nexus_dev-3.3.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,312 @@
|
|
|
1
|
+
"""JavaScript/TypeScript code chunker using tree-sitter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from tree_sitter_language_pack import get_parser
|
|
8
|
+
|
|
9
|
+
from .base import BaseChunker, ChunkType, CodeChunk
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JavaScriptChunker(BaseChunker):
|
|
13
|
+
"""Tree-sitter based chunker for JavaScript and TypeScript files.
|
|
14
|
+
|
|
15
|
+
Extracts functions, classes, methods, and arrow functions as semantic chunks.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def supported_extensions(self) -> list[str]:
|
|
20
|
+
return [".js", ".jsx", ".mjs", ".cjs"]
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
"""Initialize the JavaScript parser."""
|
|
24
|
+
self._parser = get_parser("javascript")
|
|
25
|
+
|
|
26
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
27
|
+
"""Parse JavaScript file and extract semantic chunks.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
file_path: Path to the JavaScript file.
|
|
31
|
+
content: File content.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of code chunks.
|
|
35
|
+
"""
|
|
36
|
+
if not content.strip():
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
41
|
+
except Exception:
|
|
42
|
+
return [self._create_module_chunk(file_path, content, "javascript")]
|
|
43
|
+
|
|
44
|
+
chunks: list[CodeChunk] = []
|
|
45
|
+
lines = content.split("\n")
|
|
46
|
+
|
|
47
|
+
self._walk_tree(tree.root_node, lines, file_path, chunks, "javascript", parent=None)
|
|
48
|
+
|
|
49
|
+
if not chunks:
|
|
50
|
+
return [self._create_module_chunk(file_path, content, "javascript")]
|
|
51
|
+
|
|
52
|
+
return chunks
|
|
53
|
+
|
|
54
|
+
def _walk_tree(
|
|
55
|
+
self,
|
|
56
|
+
node: Any,
|
|
57
|
+
lines: list[str],
|
|
58
|
+
file_path: str,
|
|
59
|
+
chunks: list[CodeChunk],
|
|
60
|
+
language: str,
|
|
61
|
+
parent: str | None,
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Recursively walk the AST to find functions and classes.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
node: Current AST node.
|
|
67
|
+
lines: Source lines.
|
|
68
|
+
file_path: File path.
|
|
69
|
+
chunks: List to append chunks to.
|
|
70
|
+
language: Language identifier.
|
|
71
|
+
parent: Parent class name if inside a class.
|
|
72
|
+
"""
|
|
73
|
+
if node.type == "function_declaration":
|
|
74
|
+
chunk = self._extract_function(node, lines, file_path, language, parent)
|
|
75
|
+
if chunk:
|
|
76
|
+
chunks.append(chunk)
|
|
77
|
+
|
|
78
|
+
elif node.type == "class_declaration":
|
|
79
|
+
class_name = self._get_class_name(node)
|
|
80
|
+
chunk = self._extract_class(node, lines, file_path, language)
|
|
81
|
+
if chunk:
|
|
82
|
+
chunks.append(chunk)
|
|
83
|
+
|
|
84
|
+
# Extract methods
|
|
85
|
+
for child in node.children:
|
|
86
|
+
if child.type == "class_body":
|
|
87
|
+
for method in child.children:
|
|
88
|
+
if method.type == "method_definition":
|
|
89
|
+
method_chunk = self._extract_method(
|
|
90
|
+
method, lines, file_path, language, class_name
|
|
91
|
+
)
|
|
92
|
+
if method_chunk:
|
|
93
|
+
chunks.append(method_chunk)
|
|
94
|
+
|
|
95
|
+
elif node.type == "lexical_declaration":
|
|
96
|
+
# Handle const/let with arrow functions: const foo = () => {}
|
|
97
|
+
chunk = self._extract_arrow_function(node, lines, file_path, language)
|
|
98
|
+
if chunk:
|
|
99
|
+
chunks.append(chunk)
|
|
100
|
+
|
|
101
|
+
elif node.type == "variable_declaration":
|
|
102
|
+
# Handle var with arrow functions
|
|
103
|
+
chunk = self._extract_arrow_function(node, lines, file_path, language)
|
|
104
|
+
if chunk:
|
|
105
|
+
chunks.append(chunk)
|
|
106
|
+
|
|
107
|
+
elif node.type == "export_statement":
|
|
108
|
+
# Handle exported declarations
|
|
109
|
+
for child in node.children:
|
|
110
|
+
self._walk_tree(child, lines, file_path, chunks, language, parent)
|
|
111
|
+
|
|
112
|
+
else:
|
|
113
|
+
for child in node.children:
|
|
114
|
+
self._walk_tree(child, lines, file_path, chunks, language, parent)
|
|
115
|
+
|
|
116
|
+
def _get_function_name(self, node: Any) -> str:
|
|
117
|
+
"""Get function name from declaration."""
|
|
118
|
+
for child in node.children:
|
|
119
|
+
if child.type == "identifier":
|
|
120
|
+
return child.text.decode("utf-8")
|
|
121
|
+
return "anonymous"
|
|
122
|
+
|
|
123
|
+
def _get_class_name(self, node: Any) -> str:
|
|
124
|
+
"""Get class name from declaration."""
|
|
125
|
+
for child in node.children:
|
|
126
|
+
if child.type == "identifier":
|
|
127
|
+
return child.text.decode("utf-8")
|
|
128
|
+
return "AnonymousClass"
|
|
129
|
+
|
|
130
|
+
def _get_method_name(self, node: Any) -> str:
|
|
131
|
+
"""Get method name from definition."""
|
|
132
|
+
for child in node.children:
|
|
133
|
+
if child.type == "property_identifier":
|
|
134
|
+
return child.text.decode("utf-8")
|
|
135
|
+
return "anonymous"
|
|
136
|
+
|
|
137
|
+
def _extract_function(
|
|
138
|
+
self,
|
|
139
|
+
node: Any,
|
|
140
|
+
lines: list[str],
|
|
141
|
+
file_path: str,
|
|
142
|
+
language: str,
|
|
143
|
+
parent: str | None,
|
|
144
|
+
) -> CodeChunk | None:
|
|
145
|
+
"""Extract a function declaration as a chunk."""
|
|
146
|
+
try:
|
|
147
|
+
name = self._get_function_name(node)
|
|
148
|
+
start_line = node.start_point[0]
|
|
149
|
+
end_line = node.end_point[0]
|
|
150
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
151
|
+
signature = lines[start_line].strip()
|
|
152
|
+
|
|
153
|
+
return CodeChunk(
|
|
154
|
+
content=content,
|
|
155
|
+
chunk_type=ChunkType.FUNCTION,
|
|
156
|
+
name=name,
|
|
157
|
+
start_line=start_line + 1,
|
|
158
|
+
end_line=end_line + 1,
|
|
159
|
+
language=language,
|
|
160
|
+
file_path=file_path,
|
|
161
|
+
parent=parent,
|
|
162
|
+
signature=signature,
|
|
163
|
+
)
|
|
164
|
+
except Exception:
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
def _extract_class(
|
|
168
|
+
self,
|
|
169
|
+
node: Any,
|
|
170
|
+
lines: list[str],
|
|
171
|
+
file_path: str,
|
|
172
|
+
language: str,
|
|
173
|
+
) -> CodeChunk | None:
|
|
174
|
+
"""Extract a class as a chunk."""
|
|
175
|
+
try:
|
|
176
|
+
name = self._get_class_name(node)
|
|
177
|
+
start_line = node.start_point[0]
|
|
178
|
+
end_line = node.end_point[0]
|
|
179
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
180
|
+
signature = lines[start_line].strip()
|
|
181
|
+
|
|
182
|
+
return CodeChunk(
|
|
183
|
+
content=content,
|
|
184
|
+
chunk_type=ChunkType.CLASS,
|
|
185
|
+
name=name,
|
|
186
|
+
start_line=start_line + 1,
|
|
187
|
+
end_line=end_line + 1,
|
|
188
|
+
language=language,
|
|
189
|
+
file_path=file_path,
|
|
190
|
+
signature=signature,
|
|
191
|
+
)
|
|
192
|
+
except Exception:
|
|
193
|
+
return None
|
|
194
|
+
|
|
195
|
+
def _extract_method(
|
|
196
|
+
self,
|
|
197
|
+
node: Any,
|
|
198
|
+
lines: list[str],
|
|
199
|
+
file_path: str,
|
|
200
|
+
language: str,
|
|
201
|
+
parent: str,
|
|
202
|
+
) -> CodeChunk | None:
|
|
203
|
+
"""Extract a class method as a chunk."""
|
|
204
|
+
try:
|
|
205
|
+
name = self._get_method_name(node)
|
|
206
|
+
start_line = node.start_point[0]
|
|
207
|
+
end_line = node.end_point[0]
|
|
208
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
209
|
+
signature = lines[start_line].strip()
|
|
210
|
+
|
|
211
|
+
return CodeChunk(
|
|
212
|
+
content=content,
|
|
213
|
+
chunk_type=ChunkType.METHOD,
|
|
214
|
+
name=name,
|
|
215
|
+
start_line=start_line + 1,
|
|
216
|
+
end_line=end_line + 1,
|
|
217
|
+
language=language,
|
|
218
|
+
file_path=file_path,
|
|
219
|
+
parent=parent,
|
|
220
|
+
signature=signature,
|
|
221
|
+
)
|
|
222
|
+
except Exception:
|
|
223
|
+
return None
|
|
224
|
+
|
|
225
|
+
def _extract_arrow_function(
|
|
226
|
+
self,
|
|
227
|
+
node: Any,
|
|
228
|
+
lines: list[str],
|
|
229
|
+
file_path: str,
|
|
230
|
+
language: str,
|
|
231
|
+
) -> CodeChunk | None:
|
|
232
|
+
"""Extract an arrow function from const/let/var declaration."""
|
|
233
|
+
try:
|
|
234
|
+
# Look for pattern: const name = () => {} or const name = function() {}
|
|
235
|
+
for child in node.children:
|
|
236
|
+
if child.type == "variable_declarator":
|
|
237
|
+
name = None
|
|
238
|
+
has_function = False
|
|
239
|
+
|
|
240
|
+
for subchild in child.children:
|
|
241
|
+
if subchild.type == "identifier":
|
|
242
|
+
name = subchild.text.decode("utf-8")
|
|
243
|
+
elif subchild.type in ("arrow_function", "function_expression"):
|
|
244
|
+
has_function = True
|
|
245
|
+
|
|
246
|
+
if name and has_function:
|
|
247
|
+
start_line = node.start_point[0]
|
|
248
|
+
end_line = node.end_point[0]
|
|
249
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
250
|
+
signature = lines[start_line].strip()
|
|
251
|
+
|
|
252
|
+
return CodeChunk(
|
|
253
|
+
content=content,
|
|
254
|
+
chunk_type=ChunkType.FUNCTION,
|
|
255
|
+
name=name,
|
|
256
|
+
start_line=start_line + 1,
|
|
257
|
+
end_line=end_line + 1,
|
|
258
|
+
language=language,
|
|
259
|
+
file_path=file_path,
|
|
260
|
+
signature=signature,
|
|
261
|
+
)
|
|
262
|
+
except Exception:
|
|
263
|
+
pass
|
|
264
|
+
return None
|
|
265
|
+
|
|
266
|
+
def _create_module_chunk(self, file_path: str, content: str, language: str) -> CodeChunk:
|
|
267
|
+
"""Create a module-level chunk for the entire file."""
|
|
268
|
+
name = file_path.split("/")[-1] if "/" in file_path else file_path
|
|
269
|
+
return CodeChunk(
|
|
270
|
+
content=content,
|
|
271
|
+
chunk_type=ChunkType.MODULE,
|
|
272
|
+
name=name,
|
|
273
|
+
start_line=1,
|
|
274
|
+
end_line=content.count("\n") + 1,
|
|
275
|
+
language=language,
|
|
276
|
+
file_path=file_path,
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
class TypeScriptChunker(JavaScriptChunker):
|
|
281
|
+
"""Tree-sitter based chunker for TypeScript files.
|
|
282
|
+
|
|
283
|
+
Inherits from JavaScriptChunker with TypeScript-specific parser.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def supported_extensions(self) -> list[str]:
|
|
288
|
+
return [".ts", ".tsx", ".mts", ".cts"]
|
|
289
|
+
|
|
290
|
+
def __init__(self) -> None:
|
|
291
|
+
"""Initialize the TypeScript parser."""
|
|
292
|
+
self._parser = get_parser("typescript")
|
|
293
|
+
|
|
294
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
295
|
+
"""Parse TypeScript file and extract semantic chunks."""
|
|
296
|
+
if not content.strip():
|
|
297
|
+
return []
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
301
|
+
except Exception:
|
|
302
|
+
return [self._create_module_chunk(file_path, content, "typescript")]
|
|
303
|
+
|
|
304
|
+
chunks: list[CodeChunk] = []
|
|
305
|
+
lines = content.split("\n")
|
|
306
|
+
|
|
307
|
+
self._walk_tree(tree.root_node, lines, file_path, chunks, "typescript", parent=None)
|
|
308
|
+
|
|
309
|
+
if not chunks:
|
|
310
|
+
return [self._create_module_chunk(file_path, content, "typescript")]
|
|
311
|
+
|
|
312
|
+
return chunks
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
"""Python code chunker using tree-sitter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from tree_sitter_language_pack import get_parser
|
|
8
|
+
|
|
9
|
+
from .base import BaseChunker, ChunkType, CodeChunk
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class PythonChunker(BaseChunker):
|
|
13
|
+
"""Tree-sitter based chunker for Python files.
|
|
14
|
+
|
|
15
|
+
Extracts functions, classes, and methods as semantic chunks.
|
|
16
|
+
Preserves docstrings and signatures for better search quality.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
@property
|
|
20
|
+
def supported_extensions(self) -> list[str]:
|
|
21
|
+
return [".py", ".pyw"]
|
|
22
|
+
|
|
23
|
+
def __init__(self) -> None:
|
|
24
|
+
"""Initialize the Python parser."""
|
|
25
|
+
self._parser = get_parser("python")
|
|
26
|
+
|
|
27
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
28
|
+
"""Parse Python file and extract semantic chunks.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
file_path: Path to the Python file.
|
|
32
|
+
content: File content.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
List of code chunks (functions, classes, methods).
|
|
36
|
+
"""
|
|
37
|
+
if not content.strip():
|
|
38
|
+
return []
|
|
39
|
+
|
|
40
|
+
try:
|
|
41
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
42
|
+
except Exception:
|
|
43
|
+
# Fall back to returning whole file as single chunk
|
|
44
|
+
return [self._create_module_chunk(file_path, content)]
|
|
45
|
+
|
|
46
|
+
chunks: list[CodeChunk] = []
|
|
47
|
+
lines = content.split("\n")
|
|
48
|
+
|
|
49
|
+
# Extract imports for context
|
|
50
|
+
imports = self._extract_imports(tree.root_node, lines)
|
|
51
|
+
|
|
52
|
+
# Walk the tree to find functions and classes
|
|
53
|
+
self._walk_tree(tree.root_node, lines, file_path, chunks, imports, parent=None)
|
|
54
|
+
|
|
55
|
+
# If no chunks found, return whole file as module
|
|
56
|
+
if not chunks:
|
|
57
|
+
return [self._create_module_chunk(file_path, content)]
|
|
58
|
+
|
|
59
|
+
return chunks
|
|
60
|
+
|
|
61
|
+
def _walk_tree(
|
|
62
|
+
self,
|
|
63
|
+
node: Any,
|
|
64
|
+
lines: list[str],
|
|
65
|
+
file_path: str,
|
|
66
|
+
chunks: list[CodeChunk],
|
|
67
|
+
imports: list[str],
|
|
68
|
+
parent: str | None,
|
|
69
|
+
) -> None:
|
|
70
|
+
"""Recursively walk the AST to find functions and classes.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
node: Current AST node.
|
|
74
|
+
lines: Source lines.
|
|
75
|
+
file_path: File path.
|
|
76
|
+
chunks: List to append chunks to.
|
|
77
|
+
imports: List of import statements.
|
|
78
|
+
parent: Parent class name if inside a class.
|
|
79
|
+
"""
|
|
80
|
+
if node.type == "function_definition" or node.type == "async_function_definition":
|
|
81
|
+
chunk = self._extract_function(node, lines, file_path, imports, parent)
|
|
82
|
+
if chunk:
|
|
83
|
+
chunks.append(chunk)
|
|
84
|
+
|
|
85
|
+
elif node.type == "class_definition":
|
|
86
|
+
class_name = self._get_node_name(node)
|
|
87
|
+
|
|
88
|
+
# Extract the whole class as one chunk
|
|
89
|
+
chunk = self._extract_class(node, lines, file_path, imports)
|
|
90
|
+
if chunk:
|
|
91
|
+
chunks.append(chunk)
|
|
92
|
+
|
|
93
|
+
# Also extract individual methods
|
|
94
|
+
for child in node.children:
|
|
95
|
+
if child.type == "block":
|
|
96
|
+
for block_child in child.children:
|
|
97
|
+
if block_child.type in ("function_definition", "async_function_definition"):
|
|
98
|
+
method_chunk = self._extract_function(
|
|
99
|
+
block_child, lines, file_path, imports, class_name
|
|
100
|
+
)
|
|
101
|
+
if method_chunk:
|
|
102
|
+
chunks.append(method_chunk)
|
|
103
|
+
|
|
104
|
+
else:
|
|
105
|
+
# Recurse into other nodes
|
|
106
|
+
for child in node.children:
|
|
107
|
+
self._walk_tree(child, lines, file_path, chunks, imports, parent)
|
|
108
|
+
|
|
109
|
+
def _extract_imports(self, root: Any, lines: list[str]) -> list[str]:
|
|
110
|
+
"""Extract import statements from the module.
|
|
111
|
+
|
|
112
|
+
Args:
|
|
113
|
+
root: Root AST node.
|
|
114
|
+
lines: Source lines.
|
|
115
|
+
|
|
116
|
+
Returns:
|
|
117
|
+
List of import statements.
|
|
118
|
+
"""
|
|
119
|
+
imports = []
|
|
120
|
+
for child in root.children:
|
|
121
|
+
if child.type in ("import_statement", "import_from_statement"):
|
|
122
|
+
start = child.start_point[0]
|
|
123
|
+
end = child.end_point[0]
|
|
124
|
+
import_text = "\n".join(lines[start : end + 1])
|
|
125
|
+
imports.append(import_text)
|
|
126
|
+
return imports
|
|
127
|
+
|
|
128
|
+
def _get_node_name(self, node: Any) -> str:
|
|
129
|
+
"""Get the name from a function or class definition.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
node: AST node.
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
Name of the function/class.
|
|
136
|
+
"""
|
|
137
|
+
for child in node.children:
|
|
138
|
+
if child.type == "identifier":
|
|
139
|
+
return child.text.decode("utf-8")
|
|
140
|
+
return "unknown"
|
|
141
|
+
|
|
142
|
+
def _get_docstring(self, node: Any, lines: list[str]) -> str | None:
|
|
143
|
+
"""Extract docstring from a function or class.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
node: AST node.
|
|
147
|
+
lines: Source lines.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
Docstring text or None.
|
|
151
|
+
"""
|
|
152
|
+
for child in node.children:
|
|
153
|
+
if child.type == "block":
|
|
154
|
+
for block_child in child.children:
|
|
155
|
+
if block_child.type == "expression_statement":
|
|
156
|
+
for expr_child in block_child.children:
|
|
157
|
+
if expr_child.type == "string":
|
|
158
|
+
start = expr_child.start_point[0]
|
|
159
|
+
end = expr_child.end_point[0]
|
|
160
|
+
docstring = "\n".join(lines[start : end + 1])
|
|
161
|
+
# Clean up the docstring (remove triple quotes)
|
|
162
|
+
docstring = docstring.strip()
|
|
163
|
+
for quote in ('"""', "'''"):
|
|
164
|
+
if docstring.startswith(quote):
|
|
165
|
+
docstring = docstring[3:]
|
|
166
|
+
if docstring.endswith(quote):
|
|
167
|
+
docstring = docstring[:-3]
|
|
168
|
+
return docstring.strip()
|
|
169
|
+
break
|
|
170
|
+
break
|
|
171
|
+
return None
|
|
172
|
+
|
|
173
|
+
def _get_signature(self, node: Any, lines: list[str]) -> str:
|
|
174
|
+
"""Extract function/method signature.
|
|
175
|
+
|
|
176
|
+
Args:
|
|
177
|
+
node: AST node.
|
|
178
|
+
lines: Source lines.
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
Signature string.
|
|
182
|
+
"""
|
|
183
|
+
# Get the first line(s) up to the colon
|
|
184
|
+
start_line = node.start_point[0]
|
|
185
|
+
sig_parts = []
|
|
186
|
+
for i in range(start_line, min(start_line + 5, len(lines))):
|
|
187
|
+
line = lines[i]
|
|
188
|
+
sig_parts.append(line)
|
|
189
|
+
if ":" in line and not line.strip().endswith(":"):
|
|
190
|
+
continue
|
|
191
|
+
if line.rstrip().endswith(":"):
|
|
192
|
+
break
|
|
193
|
+
|
|
194
|
+
signature = " ".join(sig_parts)
|
|
195
|
+
# Clean up
|
|
196
|
+
if ":" in signature:
|
|
197
|
+
signature = signature[: signature.rfind(":") + 1]
|
|
198
|
+
return signature.strip()
|
|
199
|
+
|
|
200
|
+
def _extract_function(
|
|
201
|
+
self,
|
|
202
|
+
node: Any,
|
|
203
|
+
lines: list[str],
|
|
204
|
+
file_path: str,
|
|
205
|
+
imports: list[str],
|
|
206
|
+
parent: str | None,
|
|
207
|
+
) -> CodeChunk | None:
|
|
208
|
+
"""Extract a function as a chunk.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
node: Function AST node.
|
|
212
|
+
lines: Source lines.
|
|
213
|
+
file_path: File path.
|
|
214
|
+
imports: Import statements.
|
|
215
|
+
parent: Parent class name if method.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
CodeChunk or None if extraction failed.
|
|
219
|
+
"""
|
|
220
|
+
try:
|
|
221
|
+
name = self._get_node_name(node)
|
|
222
|
+
start_line = node.start_point[0]
|
|
223
|
+
end_line = node.end_point[0]
|
|
224
|
+
|
|
225
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
226
|
+
docstring = self._get_docstring(node, lines)
|
|
227
|
+
signature = self._get_signature(node, lines)
|
|
228
|
+
|
|
229
|
+
chunk_type = ChunkType.METHOD if parent else ChunkType.FUNCTION
|
|
230
|
+
|
|
231
|
+
return CodeChunk(
|
|
232
|
+
content=content,
|
|
233
|
+
chunk_type=chunk_type,
|
|
234
|
+
name=name,
|
|
235
|
+
start_line=start_line + 1,
|
|
236
|
+
end_line=end_line + 1,
|
|
237
|
+
language="python",
|
|
238
|
+
file_path=file_path,
|
|
239
|
+
parent=parent,
|
|
240
|
+
docstring=docstring,
|
|
241
|
+
imports=imports[:5], # Limit imports for context
|
|
242
|
+
signature=signature,
|
|
243
|
+
)
|
|
244
|
+
except Exception:
|
|
245
|
+
return None
|
|
246
|
+
|
|
247
|
+
def _extract_class(
|
|
248
|
+
self,
|
|
249
|
+
node: Any,
|
|
250
|
+
lines: list[str],
|
|
251
|
+
file_path: str,
|
|
252
|
+
imports: list[str],
|
|
253
|
+
) -> CodeChunk | None:
|
|
254
|
+
"""Extract a class as a chunk.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
node: Class AST node.
|
|
258
|
+
lines: Source lines.
|
|
259
|
+
file_path: File path.
|
|
260
|
+
imports: Import statements.
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
CodeChunk or None if extraction failed.
|
|
264
|
+
"""
|
|
265
|
+
try:
|
|
266
|
+
name = self._get_node_name(node)
|
|
267
|
+
start_line = node.start_point[0]
|
|
268
|
+
end_line = node.end_point[0]
|
|
269
|
+
|
|
270
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
271
|
+
docstring = self._get_docstring(node, lines)
|
|
272
|
+
signature = lines[start_line].strip()
|
|
273
|
+
|
|
274
|
+
return CodeChunk(
|
|
275
|
+
content=content,
|
|
276
|
+
chunk_type=ChunkType.CLASS,
|
|
277
|
+
name=name,
|
|
278
|
+
start_line=start_line + 1,
|
|
279
|
+
end_line=end_line + 1,
|
|
280
|
+
language="python",
|
|
281
|
+
file_path=file_path,
|
|
282
|
+
docstring=docstring,
|
|
283
|
+
imports=imports[:5],
|
|
284
|
+
signature=signature,
|
|
285
|
+
)
|
|
286
|
+
except Exception:
|
|
287
|
+
return None
|
|
288
|
+
|
|
289
|
+
def _create_module_chunk(self, file_path: str, content: str) -> CodeChunk:
|
|
290
|
+
"""Create a module-level chunk for the entire file.
|
|
291
|
+
|
|
292
|
+
Args:
|
|
293
|
+
file_path: File path.
|
|
294
|
+
content: File content.
|
|
295
|
+
|
|
296
|
+
Returns:
|
|
297
|
+
Module chunk.
|
|
298
|
+
"""
|
|
299
|
+
name = file_path.split("/")[-1] if "/" in file_path else file_path
|
|
300
|
+
return CodeChunk(
|
|
301
|
+
content=content,
|
|
302
|
+
chunk_type=ChunkType.MODULE,
|
|
303
|
+
name=name,
|
|
304
|
+
start_line=1,
|
|
305
|
+
end_line=content.count("\n") + 1,
|
|
306
|
+
language="python",
|
|
307
|
+
file_path=file_path,
|
|
308
|
+
)
|