nexus-dev 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nexus-dev might be problematic. Click here for more details.
- nexus_dev/__init__.py +4 -0
- nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev/agents/__init__.py +20 -0
- nexus_dev/agents/agent_config.py +97 -0
- nexus_dev/agents/agent_executor.py +197 -0
- nexus_dev/agents/agent_manager.py +104 -0
- nexus_dev/agents/prompt_factory.py +91 -0
- nexus_dev/chunkers/__init__.py +168 -0
- nexus_dev/chunkers/base.py +202 -0
- nexus_dev/chunkers/docs_chunker.py +291 -0
- nexus_dev/chunkers/java_chunker.py +343 -0
- nexus_dev/chunkers/javascript_chunker.py +312 -0
- nexus_dev/chunkers/python_chunker.py +308 -0
- nexus_dev/cli.py +1673 -0
- nexus_dev/config.py +253 -0
- nexus_dev/database.py +558 -0
- nexus_dev/embeddings.py +585 -0
- nexus_dev/gateway/__init__.py +10 -0
- nexus_dev/gateway/connection_manager.py +348 -0
- nexus_dev/github_importer.py +247 -0
- nexus_dev/mcp_client.py +281 -0
- nexus_dev/mcp_config.py +184 -0
- nexus_dev/schemas/mcp_config_schema.json +166 -0
- nexus_dev/server.py +1866 -0
- nexus_dev/templates/pre-commit-hook +33 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev-3.2.0.data/data/nexus_dev/templates/pre-commit-hook +33 -0
- nexus_dev-3.2.0.dist-info/METADATA +636 -0
- nexus_dev-3.2.0.dist-info/RECORD +48 -0
- nexus_dev-3.2.0.dist-info/WHEEL +4 -0
- nexus_dev-3.2.0.dist-info/entry_points.txt +12 -0
- nexus_dev-3.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,291 @@
|
|
|
1
|
+
"""Documentation chunker for Markdown and RST files."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
|
|
7
|
+
from .base import BaseChunker, ChunkType, CodeChunk
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class DocumentationChunker(BaseChunker):
|
|
11
|
+
"""Chunker for documentation files (Markdown, RST, plain text).
|
|
12
|
+
|
|
13
|
+
Splits documentation by headers/sections while keeping code blocks intact.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def supported_extensions(self) -> list[str]:
|
|
18
|
+
return [".md", ".markdown", ".rst", ".txt"]
|
|
19
|
+
|
|
20
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
21
|
+
"""Parse documentation file and extract sections as chunks.
|
|
22
|
+
|
|
23
|
+
Args:
|
|
24
|
+
file_path: Path to the documentation file.
|
|
25
|
+
content: File content.
|
|
26
|
+
|
|
27
|
+
Returns:
|
|
28
|
+
List of documentation chunks split by headers.
|
|
29
|
+
"""
|
|
30
|
+
if not content.strip():
|
|
31
|
+
return []
|
|
32
|
+
|
|
33
|
+
ext = file_path.split(".")[-1].lower()
|
|
34
|
+
|
|
35
|
+
if ext in ("md", "markdown"):
|
|
36
|
+
return self._chunk_markdown(file_path, content)
|
|
37
|
+
elif ext == "rst":
|
|
38
|
+
return self._chunk_rst(file_path, content)
|
|
39
|
+
else:
|
|
40
|
+
return self._chunk_plain_text(file_path, content)
|
|
41
|
+
|
|
42
|
+
def _chunk_markdown(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
43
|
+
"""Chunk Markdown content by headers.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
file_path: File path.
|
|
47
|
+
content: Markdown content.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
List of section chunks.
|
|
51
|
+
"""
|
|
52
|
+
lines = content.split("\n")
|
|
53
|
+
chunks: list[CodeChunk] = []
|
|
54
|
+
|
|
55
|
+
# Pattern for Markdown headers (## or ###)
|
|
56
|
+
header_pattern = re.compile(r"^(#{1,6})\s+(.+)$")
|
|
57
|
+
|
|
58
|
+
current_section: list[str] = []
|
|
59
|
+
current_header = ""
|
|
60
|
+
current_start_line = 1
|
|
61
|
+
parent_header: str | None = None
|
|
62
|
+
|
|
63
|
+
for i, line in enumerate(lines):
|
|
64
|
+
match = header_pattern.match(line)
|
|
65
|
+
|
|
66
|
+
if match:
|
|
67
|
+
# Save previous section if it has content
|
|
68
|
+
if current_section:
|
|
69
|
+
section_content = "\n".join(current_section).strip()
|
|
70
|
+
if section_content:
|
|
71
|
+
chunks.append(
|
|
72
|
+
CodeChunk(
|
|
73
|
+
content=section_content,
|
|
74
|
+
chunk_type=ChunkType.SECTION,
|
|
75
|
+
name=current_header or file_path.split("/")[-1],
|
|
76
|
+
start_line=current_start_line,
|
|
77
|
+
end_line=i,
|
|
78
|
+
language="markdown",
|
|
79
|
+
file_path=file_path,
|
|
80
|
+
parent=parent_header,
|
|
81
|
+
)
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# Start new section
|
|
85
|
+
level = len(match.group(1))
|
|
86
|
+
header_text = match.group(2).strip()
|
|
87
|
+
|
|
88
|
+
# Track parent for level 2+ headers
|
|
89
|
+
if level == 1:
|
|
90
|
+
parent_header = None
|
|
91
|
+
elif level == 2:
|
|
92
|
+
parent_header = current_header if current_header else None
|
|
93
|
+
|
|
94
|
+
current_header = header_text
|
|
95
|
+
current_section = [line]
|
|
96
|
+
current_start_line = i + 1
|
|
97
|
+
else:
|
|
98
|
+
current_section.append(line)
|
|
99
|
+
|
|
100
|
+
# Save final section
|
|
101
|
+
if current_section:
|
|
102
|
+
section_content = "\n".join(current_section).strip()
|
|
103
|
+
if section_content:
|
|
104
|
+
chunks.append(
|
|
105
|
+
CodeChunk(
|
|
106
|
+
content=section_content,
|
|
107
|
+
chunk_type=ChunkType.SECTION,
|
|
108
|
+
name=current_header or file_path.split("/")[-1],
|
|
109
|
+
start_line=current_start_line,
|
|
110
|
+
end_line=len(lines),
|
|
111
|
+
language="markdown",
|
|
112
|
+
file_path=file_path,
|
|
113
|
+
parent=parent_header,
|
|
114
|
+
)
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
# If no headers found, return whole file as one chunk
|
|
118
|
+
if not chunks:
|
|
119
|
+
chunks.append(
|
|
120
|
+
CodeChunk(
|
|
121
|
+
content=content,
|
|
122
|
+
chunk_type=ChunkType.DOCUMENTATION,
|
|
123
|
+
name=file_path.split("/")[-1] if "/" in file_path else file_path,
|
|
124
|
+
start_line=1,
|
|
125
|
+
end_line=len(lines),
|
|
126
|
+
language="markdown",
|
|
127
|
+
file_path=file_path,
|
|
128
|
+
)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return chunks
|
|
132
|
+
|
|
133
|
+
def _chunk_rst(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
134
|
+
"""Chunk RST content by title underlines.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
file_path: File path.
|
|
138
|
+
content: RST content.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of section chunks.
|
|
142
|
+
"""
|
|
143
|
+
lines = content.split("\n")
|
|
144
|
+
chunks: list[CodeChunk] = []
|
|
145
|
+
|
|
146
|
+
# RST title underlines: =, -, ~, ^, ", etc.
|
|
147
|
+
underline_pattern = re.compile(r"^[=\-~^\"\'`\*\+#]+$")
|
|
148
|
+
|
|
149
|
+
current_section: list[str] = []
|
|
150
|
+
current_header = ""
|
|
151
|
+
current_start_line = 1
|
|
152
|
+
|
|
153
|
+
i = 0
|
|
154
|
+
while i < len(lines):
|
|
155
|
+
# Check if next line is an underline (making current line a header)
|
|
156
|
+
if (
|
|
157
|
+
i + 1 < len(lines)
|
|
158
|
+
and lines[i].strip()
|
|
159
|
+
and underline_pattern.match(lines[i + 1].strip())
|
|
160
|
+
and len(lines[i + 1].strip()) >= len(lines[i].strip())
|
|
161
|
+
):
|
|
162
|
+
# Save previous section
|
|
163
|
+
if current_section:
|
|
164
|
+
section_content = "\n".join(current_section).strip()
|
|
165
|
+
if section_content:
|
|
166
|
+
chunks.append(
|
|
167
|
+
CodeChunk(
|
|
168
|
+
content=section_content,
|
|
169
|
+
chunk_type=ChunkType.SECTION,
|
|
170
|
+
name=current_header or file_path.split("/")[-1],
|
|
171
|
+
start_line=current_start_line,
|
|
172
|
+
end_line=i,
|
|
173
|
+
language="rst",
|
|
174
|
+
file_path=file_path,
|
|
175
|
+
)
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
# Start new section with header
|
|
179
|
+
current_header = lines[i].strip()
|
|
180
|
+
current_section = [lines[i], lines[i + 1]]
|
|
181
|
+
current_start_line = i + 1
|
|
182
|
+
i += 2
|
|
183
|
+
else:
|
|
184
|
+
current_section.append(lines[i])
|
|
185
|
+
i += 1
|
|
186
|
+
|
|
187
|
+
# Save final section
|
|
188
|
+
if current_section:
|
|
189
|
+
section_content = "\n".join(current_section).strip()
|
|
190
|
+
if section_content:
|
|
191
|
+
chunks.append(
|
|
192
|
+
CodeChunk(
|
|
193
|
+
content=section_content,
|
|
194
|
+
chunk_type=ChunkType.SECTION,
|
|
195
|
+
name=current_header or file_path.split("/")[-1],
|
|
196
|
+
start_line=current_start_line,
|
|
197
|
+
end_line=len(lines),
|
|
198
|
+
language="rst",
|
|
199
|
+
file_path=file_path,
|
|
200
|
+
)
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if not chunks:
|
|
204
|
+
chunks.append(
|
|
205
|
+
CodeChunk(
|
|
206
|
+
content=content,
|
|
207
|
+
chunk_type=ChunkType.DOCUMENTATION,
|
|
208
|
+
name=file_path.split("/")[-1] if "/" in file_path else file_path,
|
|
209
|
+
start_line=1,
|
|
210
|
+
end_line=len(lines),
|
|
211
|
+
language="rst",
|
|
212
|
+
file_path=file_path,
|
|
213
|
+
)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
return chunks
|
|
217
|
+
|
|
218
|
+
def _chunk_plain_text(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
219
|
+
"""Chunk plain text by paragraph breaks or fixed size.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
file_path: File path.
|
|
223
|
+
content: Text content.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
List of text chunks.
|
|
227
|
+
"""
|
|
228
|
+
# Split by double newlines (paragraphs)
|
|
229
|
+
paragraphs = re.split(r"\n\s*\n", content)
|
|
230
|
+
|
|
231
|
+
chunks: list[CodeChunk] = []
|
|
232
|
+
current_chunk: list[str] = []
|
|
233
|
+
current_length = 0
|
|
234
|
+
chunk_index = 0
|
|
235
|
+
max_chunk_size = 1500
|
|
236
|
+
|
|
237
|
+
for para in paragraphs:
|
|
238
|
+
para = para.strip()
|
|
239
|
+
if not para:
|
|
240
|
+
continue
|
|
241
|
+
|
|
242
|
+
if current_length + len(para) > max_chunk_size and current_chunk:
|
|
243
|
+
# Save current chunk
|
|
244
|
+
chunk_content = "\n\n".join(current_chunk)
|
|
245
|
+
chunks.append(
|
|
246
|
+
CodeChunk(
|
|
247
|
+
content=chunk_content,
|
|
248
|
+
chunk_type=ChunkType.DOCUMENTATION,
|
|
249
|
+
name=f"{file_path.split('/')[-1]}:section_{chunk_index}",
|
|
250
|
+
start_line=1, # Not tracking exact lines for plain text
|
|
251
|
+
end_line=1,
|
|
252
|
+
language="text",
|
|
253
|
+
file_path=file_path,
|
|
254
|
+
)
|
|
255
|
+
)
|
|
256
|
+
current_chunk = []
|
|
257
|
+
current_length = 0
|
|
258
|
+
chunk_index += 1
|
|
259
|
+
|
|
260
|
+
current_chunk.append(para)
|
|
261
|
+
current_length += len(para)
|
|
262
|
+
|
|
263
|
+
# Save final chunk
|
|
264
|
+
if current_chunk:
|
|
265
|
+
chunk_content = "\n\n".join(current_chunk)
|
|
266
|
+
chunks.append(
|
|
267
|
+
CodeChunk(
|
|
268
|
+
content=chunk_content,
|
|
269
|
+
chunk_type=ChunkType.DOCUMENTATION,
|
|
270
|
+
name=f"{file_path.split('/')[-1]}:section_{chunk_index}",
|
|
271
|
+
start_line=1,
|
|
272
|
+
end_line=1,
|
|
273
|
+
language="text",
|
|
274
|
+
file_path=file_path,
|
|
275
|
+
)
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
if not chunks:
|
|
279
|
+
chunks.append(
|
|
280
|
+
CodeChunk(
|
|
281
|
+
content=content,
|
|
282
|
+
chunk_type=ChunkType.DOCUMENTATION,
|
|
283
|
+
name=file_path.split("/")[-1] if "/" in file_path else file_path,
|
|
284
|
+
start_line=1,
|
|
285
|
+
end_line=content.count("\n") + 1,
|
|
286
|
+
language="text",
|
|
287
|
+
file_path=file_path,
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
return chunks
|
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""Java code chunker using tree-sitter."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from tree_sitter_language_pack import get_parser
|
|
8
|
+
|
|
9
|
+
from .base import BaseChunker, ChunkType, CodeChunk
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class JavaChunker(BaseChunker):
|
|
13
|
+
"""Tree-sitter based chunker for Java files.
|
|
14
|
+
|
|
15
|
+
Extracts classes, interfaces, methods, and constructors as semantic chunks.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def supported_extensions(self) -> list[str]:
|
|
20
|
+
return [".java"]
|
|
21
|
+
|
|
22
|
+
def __init__(self) -> None:
|
|
23
|
+
"""Initialize the Java parser."""
|
|
24
|
+
self._parser = get_parser("java")
|
|
25
|
+
|
|
26
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
27
|
+
"""Parse Java file and extract semantic chunks.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
file_path: Path to the Java file.
|
|
31
|
+
content: File content.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
List of code chunks.
|
|
35
|
+
"""
|
|
36
|
+
if not content.strip():
|
|
37
|
+
return []
|
|
38
|
+
|
|
39
|
+
try:
|
|
40
|
+
tree = self._parser.parse(content.encode("utf-8"))
|
|
41
|
+
except Exception:
|
|
42
|
+
return [self._create_module_chunk(file_path, content)]
|
|
43
|
+
|
|
44
|
+
chunks: list[CodeChunk] = []
|
|
45
|
+
lines = content.split("\n")
|
|
46
|
+
|
|
47
|
+
# Extract package and imports for context
|
|
48
|
+
imports = self._extract_imports(tree.root_node, lines)
|
|
49
|
+
|
|
50
|
+
self._walk_tree(tree.root_node, lines, file_path, chunks, imports, parent=None)
|
|
51
|
+
|
|
52
|
+
if not chunks:
|
|
53
|
+
return [self._create_module_chunk(file_path, content)]
|
|
54
|
+
|
|
55
|
+
return chunks
|
|
56
|
+
|
|
57
|
+
def _walk_tree(
|
|
58
|
+
self,
|
|
59
|
+
node: Any,
|
|
60
|
+
lines: list[str],
|
|
61
|
+
file_path: str,
|
|
62
|
+
chunks: list[CodeChunk],
|
|
63
|
+
imports: list[str],
|
|
64
|
+
parent: str | None,
|
|
65
|
+
) -> None:
|
|
66
|
+
"""Recursively walk the AST to find classes and methods.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
node: Current AST node.
|
|
70
|
+
lines: Source lines.
|
|
71
|
+
file_path: File path.
|
|
72
|
+
chunks: List to append chunks to.
|
|
73
|
+
imports: Package and import statements.
|
|
74
|
+
parent: Parent class name if inside a class.
|
|
75
|
+
"""
|
|
76
|
+
if node.type == "class_declaration":
|
|
77
|
+
class_name = self._get_identifier(node)
|
|
78
|
+
|
|
79
|
+
# Extract the whole class
|
|
80
|
+
chunk = self._extract_class(node, lines, file_path, imports)
|
|
81
|
+
if chunk:
|
|
82
|
+
chunks.append(chunk)
|
|
83
|
+
|
|
84
|
+
# Extract methods and constructors
|
|
85
|
+
for child in node.children:
|
|
86
|
+
if child.type == "class_body":
|
|
87
|
+
for member in child.children:
|
|
88
|
+
if member.type == "method_declaration":
|
|
89
|
+
method_chunk = self._extract_method(
|
|
90
|
+
member, lines, file_path, imports, class_name
|
|
91
|
+
)
|
|
92
|
+
if method_chunk:
|
|
93
|
+
chunks.append(method_chunk)
|
|
94
|
+
elif member.type == "constructor_declaration":
|
|
95
|
+
constructor_chunk = self._extract_constructor(
|
|
96
|
+
member, lines, file_path, imports, class_name
|
|
97
|
+
)
|
|
98
|
+
if constructor_chunk:
|
|
99
|
+
chunks.append(constructor_chunk)
|
|
100
|
+
# Recurse for inner classes
|
|
101
|
+
elif member.type == "class_declaration":
|
|
102
|
+
self._walk_tree(member, lines, file_path, chunks, imports, class_name)
|
|
103
|
+
|
|
104
|
+
elif node.type == "interface_declaration":
|
|
105
|
+
chunk = self._extract_interface(node, lines, file_path, imports)
|
|
106
|
+
if chunk:
|
|
107
|
+
chunks.append(chunk)
|
|
108
|
+
|
|
109
|
+
elif node.type == "enum_declaration":
|
|
110
|
+
chunk = self._extract_enum(node, lines, file_path, imports)
|
|
111
|
+
if chunk:
|
|
112
|
+
chunks.append(chunk)
|
|
113
|
+
|
|
114
|
+
else:
|
|
115
|
+
for child in node.children:
|
|
116
|
+
self._walk_tree(child, lines, file_path, chunks, imports, parent)
|
|
117
|
+
|
|
118
|
+
def _extract_imports(self, root: Any, lines: list[str]) -> list[str]:
|
|
119
|
+
"""Extract package and import statements.
|
|
120
|
+
|
|
121
|
+
Args:
|
|
122
|
+
root: Root AST node.
|
|
123
|
+
lines: Source lines.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
List of package and import statements.
|
|
127
|
+
"""
|
|
128
|
+
imports = []
|
|
129
|
+
for child in root.children:
|
|
130
|
+
if child.type == "package_declaration" or child.type == "import_declaration":
|
|
131
|
+
start = child.start_point[0]
|
|
132
|
+
end = child.end_point[0]
|
|
133
|
+
imports.append("\n".join(lines[start : end + 1]))
|
|
134
|
+
return imports
|
|
135
|
+
|
|
136
|
+
def _get_identifier(self, node: Any) -> str:
|
|
137
|
+
"""Get identifier (name) from a declaration."""
|
|
138
|
+
for child in node.children:
|
|
139
|
+
if child.type == "identifier":
|
|
140
|
+
return child.text.decode("utf-8")
|
|
141
|
+
return "Unknown"
|
|
142
|
+
|
|
143
|
+
def _get_javadoc(self, node: Any, lines: list[str]) -> str | None:
|
|
144
|
+
"""Extract Javadoc comment if present before the node.
|
|
145
|
+
|
|
146
|
+
Args:
|
|
147
|
+
node: AST node.
|
|
148
|
+
lines: Source lines.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Javadoc text or None.
|
|
152
|
+
"""
|
|
153
|
+
start_line = node.start_point[0]
|
|
154
|
+
if start_line == 0:
|
|
155
|
+
return None
|
|
156
|
+
|
|
157
|
+
# Look for block comment ending just before this node
|
|
158
|
+
for i in range(start_line - 1, max(-1, start_line - 20), -1):
|
|
159
|
+
line = lines[i].strip()
|
|
160
|
+
if line.endswith("*/"):
|
|
161
|
+
# Found end of comment, find start
|
|
162
|
+
doc_lines: list[str] = []
|
|
163
|
+
for j in range(i, max(-1, i - 50), -1):
|
|
164
|
+
doc_lines.insert(0, lines[j])
|
|
165
|
+
if lines[j].strip().startswith("/**"):
|
|
166
|
+
return "\n".join(doc_lines)
|
|
167
|
+
elif line and not line.startswith("*") and not line.startswith("@"):
|
|
168
|
+
break
|
|
169
|
+
|
|
170
|
+
return None
|
|
171
|
+
|
|
172
|
+
def _extract_class(
|
|
173
|
+
self,
|
|
174
|
+
node: Any,
|
|
175
|
+
lines: list[str],
|
|
176
|
+
file_path: str,
|
|
177
|
+
imports: list[str],
|
|
178
|
+
) -> CodeChunk | None:
|
|
179
|
+
"""Extract a class as a chunk."""
|
|
180
|
+
try:
|
|
181
|
+
name = self._get_identifier(node)
|
|
182
|
+
start_line = node.start_point[0]
|
|
183
|
+
end_line = node.end_point[0]
|
|
184
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
185
|
+
docstring = self._get_javadoc(node, lines)
|
|
186
|
+
|
|
187
|
+
# Build signature from first line
|
|
188
|
+
signature = lines[start_line].strip()
|
|
189
|
+
|
|
190
|
+
return CodeChunk(
|
|
191
|
+
content=content,
|
|
192
|
+
chunk_type=ChunkType.CLASS,
|
|
193
|
+
name=name,
|
|
194
|
+
start_line=start_line + 1,
|
|
195
|
+
end_line=end_line + 1,
|
|
196
|
+
language="java",
|
|
197
|
+
file_path=file_path,
|
|
198
|
+
docstring=docstring,
|
|
199
|
+
imports=imports[:5],
|
|
200
|
+
signature=signature,
|
|
201
|
+
)
|
|
202
|
+
except Exception:
|
|
203
|
+
return None
|
|
204
|
+
|
|
205
|
+
def _extract_interface(
|
|
206
|
+
self,
|
|
207
|
+
node: Any,
|
|
208
|
+
lines: list[str],
|
|
209
|
+
file_path: str,
|
|
210
|
+
imports: list[str],
|
|
211
|
+
) -> CodeChunk | None:
|
|
212
|
+
"""Extract an interface as a chunk."""
|
|
213
|
+
try:
|
|
214
|
+
name = self._get_identifier(node)
|
|
215
|
+
start_line = node.start_point[0]
|
|
216
|
+
end_line = node.end_point[0]
|
|
217
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
218
|
+
docstring = self._get_javadoc(node, lines)
|
|
219
|
+
signature = lines[start_line].strip()
|
|
220
|
+
|
|
221
|
+
return CodeChunk(
|
|
222
|
+
content=content,
|
|
223
|
+
chunk_type=ChunkType.CLASS, # Treat interface as class type
|
|
224
|
+
name=name,
|
|
225
|
+
start_line=start_line + 1,
|
|
226
|
+
end_line=end_line + 1,
|
|
227
|
+
language="java",
|
|
228
|
+
file_path=file_path,
|
|
229
|
+
docstring=docstring,
|
|
230
|
+
imports=imports[:5],
|
|
231
|
+
signature=signature,
|
|
232
|
+
)
|
|
233
|
+
except Exception:
|
|
234
|
+
return None
|
|
235
|
+
|
|
236
|
+
def _extract_enum(
|
|
237
|
+
self,
|
|
238
|
+
node: Any,
|
|
239
|
+
lines: list[str],
|
|
240
|
+
file_path: str,
|
|
241
|
+
imports: list[str],
|
|
242
|
+
) -> CodeChunk | None:
|
|
243
|
+
"""Extract an enum as a chunk."""
|
|
244
|
+
try:
|
|
245
|
+
name = self._get_identifier(node)
|
|
246
|
+
start_line = node.start_point[0]
|
|
247
|
+
end_line = node.end_point[0]
|
|
248
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
249
|
+
docstring = self._get_javadoc(node, lines)
|
|
250
|
+
signature = lines[start_line].strip()
|
|
251
|
+
|
|
252
|
+
return CodeChunk(
|
|
253
|
+
content=content,
|
|
254
|
+
chunk_type=ChunkType.CLASS,
|
|
255
|
+
name=name,
|
|
256
|
+
start_line=start_line + 1,
|
|
257
|
+
end_line=end_line + 1,
|
|
258
|
+
language="java",
|
|
259
|
+
file_path=file_path,
|
|
260
|
+
docstring=docstring,
|
|
261
|
+
imports=imports[:5],
|
|
262
|
+
signature=signature,
|
|
263
|
+
)
|
|
264
|
+
except Exception:
|
|
265
|
+
return None
|
|
266
|
+
|
|
267
|
+
def _extract_method(
|
|
268
|
+
self,
|
|
269
|
+
node: Any,
|
|
270
|
+
lines: list[str],
|
|
271
|
+
file_path: str,
|
|
272
|
+
imports: list[str],
|
|
273
|
+
parent: str,
|
|
274
|
+
) -> CodeChunk | None:
|
|
275
|
+
"""Extract a method as a chunk."""
|
|
276
|
+
try:
|
|
277
|
+
name = self._get_identifier(node)
|
|
278
|
+
start_line = node.start_point[0]
|
|
279
|
+
end_line = node.end_point[0]
|
|
280
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
281
|
+
docstring = self._get_javadoc(node, lines)
|
|
282
|
+
signature = lines[start_line].strip()
|
|
283
|
+
|
|
284
|
+
return CodeChunk(
|
|
285
|
+
content=content,
|
|
286
|
+
chunk_type=ChunkType.METHOD,
|
|
287
|
+
name=name,
|
|
288
|
+
start_line=start_line + 1,
|
|
289
|
+
end_line=end_line + 1,
|
|
290
|
+
language="java",
|
|
291
|
+
file_path=file_path,
|
|
292
|
+
parent=parent,
|
|
293
|
+
docstring=docstring,
|
|
294
|
+
imports=imports[:3],
|
|
295
|
+
signature=signature,
|
|
296
|
+
)
|
|
297
|
+
except Exception:
|
|
298
|
+
return None
|
|
299
|
+
|
|
300
|
+
def _extract_constructor(
|
|
301
|
+
self,
|
|
302
|
+
node: Any,
|
|
303
|
+
lines: list[str],
|
|
304
|
+
file_path: str,
|
|
305
|
+
imports: list[str],
|
|
306
|
+
parent: str,
|
|
307
|
+
) -> CodeChunk | None:
|
|
308
|
+
"""Extract a constructor as a chunk."""
|
|
309
|
+
try:
|
|
310
|
+
start_line = node.start_point[0]
|
|
311
|
+
end_line = node.end_point[0]
|
|
312
|
+
content = "\n".join(lines[start_line : end_line + 1])
|
|
313
|
+
docstring = self._get_javadoc(node, lines)
|
|
314
|
+
signature = lines[start_line].strip()
|
|
315
|
+
|
|
316
|
+
return CodeChunk(
|
|
317
|
+
content=content,
|
|
318
|
+
chunk_type=ChunkType.METHOD,
|
|
319
|
+
name=f"{parent}", # Constructor has same name as class
|
|
320
|
+
start_line=start_line + 1,
|
|
321
|
+
end_line=end_line + 1,
|
|
322
|
+
language="java",
|
|
323
|
+
file_path=file_path,
|
|
324
|
+
parent=parent,
|
|
325
|
+
docstring=docstring,
|
|
326
|
+
imports=imports[:3],
|
|
327
|
+
signature=signature,
|
|
328
|
+
)
|
|
329
|
+
except Exception:
|
|
330
|
+
return None
|
|
331
|
+
|
|
332
|
+
def _create_module_chunk(self, file_path: str, content: str) -> CodeChunk:
|
|
333
|
+
"""Create a module-level chunk for the entire file."""
|
|
334
|
+
name = file_path.split("/")[-1] if "/" in file_path else file_path
|
|
335
|
+
return CodeChunk(
|
|
336
|
+
content=content,
|
|
337
|
+
chunk_type=ChunkType.MODULE,
|
|
338
|
+
name=name,
|
|
339
|
+
start_line=1,
|
|
340
|
+
end_line=content.count("\n") + 1,
|
|
341
|
+
language="java",
|
|
342
|
+
file_path=file_path,
|
|
343
|
+
)
|