nexus-dev 3.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nexus_dev/__init__.py +4 -0
- nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev/agents/__init__.py +20 -0
- nexus_dev/agents/agent_config.py +97 -0
- nexus_dev/agents/agent_executor.py +197 -0
- nexus_dev/agents/agent_manager.py +104 -0
- nexus_dev/agents/prompt_factory.py +91 -0
- nexus_dev/chunkers/__init__.py +168 -0
- nexus_dev/chunkers/base.py +202 -0
- nexus_dev/chunkers/docs_chunker.py +291 -0
- nexus_dev/chunkers/java_chunker.py +343 -0
- nexus_dev/chunkers/javascript_chunker.py +312 -0
- nexus_dev/chunkers/python_chunker.py +308 -0
- nexus_dev/cli.py +2017 -0
- nexus_dev/config.py +261 -0
- nexus_dev/database.py +569 -0
- nexus_dev/embeddings.py +703 -0
- nexus_dev/gateway/__init__.py +10 -0
- nexus_dev/gateway/connection_manager.py +348 -0
- nexus_dev/github_importer.py +247 -0
- nexus_dev/mcp_client.py +281 -0
- nexus_dev/mcp_config.py +184 -0
- nexus_dev/schemas/mcp_config_schema.json +166 -0
- nexus_dev/server.py +1866 -0
- nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/__init__.py +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/api_designer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/code_reviewer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/debug_detective.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/doc_writer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/performance_optimizer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/refactor_architect.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/security_auditor.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/agent_templates/test_engineer.yaml +26 -0
- nexus_dev-3.3.1.data/data/nexus_dev/templates/pre-commit-hook +56 -0
- nexus_dev-3.3.1.dist-info/METADATA +668 -0
- nexus_dev-3.3.1.dist-info/RECORD +48 -0
- nexus_dev-3.3.1.dist-info/WHEEL +4 -0
- nexus_dev-3.3.1.dist-info/entry_points.txt +14 -0
- nexus_dev-3.3.1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Generate structured prompts using XML tags."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from .agent_config import AgentConfig
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class PromptFactory:
|
|
9
|
+
"""Build system prompts with XML structure for Claude/Gemini.
|
|
10
|
+
|
|
11
|
+
This factory generates prompts using XML tags that are well-understood
|
|
12
|
+
by modern LLMs like Claude and Gemini. The structure clearly separates:
|
|
13
|
+
- Role definition (identity, goal, tone)
|
|
14
|
+
- Backstory (expertise and background)
|
|
15
|
+
- Memory (RAG context from the project)
|
|
16
|
+
- Available tools
|
|
17
|
+
- Instructions
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def build(
|
|
22
|
+
agent: AgentConfig,
|
|
23
|
+
context_items: list[str],
|
|
24
|
+
available_tools: list[str] | None = None,
|
|
25
|
+
) -> str:
|
|
26
|
+
"""Build the complete system prompt.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
agent: Agent configuration.
|
|
30
|
+
context_items: RAG search results (text snippets).
|
|
31
|
+
available_tools: List of tool names the agent can use.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
Formatted system prompt with XML structure.
|
|
35
|
+
"""
|
|
36
|
+
# Memory block from RAG
|
|
37
|
+
memory_block = ""
|
|
38
|
+
if context_items:
|
|
39
|
+
items_str = "\n".join([f"- {item}" for item in context_items])
|
|
40
|
+
memory_block = f"""
|
|
41
|
+
<nexus_memory>
|
|
42
|
+
Project context from RAG (use this to inform your responses):
|
|
43
|
+
{items_str}
|
|
44
|
+
</nexus_memory>
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
# Tools block
|
|
48
|
+
tools_block = ""
|
|
49
|
+
if available_tools:
|
|
50
|
+
tools_str = ", ".join(available_tools)
|
|
51
|
+
tools_block = f"""
|
|
52
|
+
<available_tools>
|
|
53
|
+
You can use these tools: {tools_str}
|
|
54
|
+
</available_tools>
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
return f"""<role_definition>
|
|
58
|
+
You are {agent.display_name}.
|
|
59
|
+
ROLE: {agent.profile.role}
|
|
60
|
+
OBJECTIVE: {agent.profile.goal}
|
|
61
|
+
TONE: {agent.profile.tone}
|
|
62
|
+
</role_definition>
|
|
63
|
+
|
|
64
|
+
<backstory>
|
|
65
|
+
{agent.profile.backstory}
|
|
66
|
+
</backstory>
|
|
67
|
+
{memory_block}{tools_block}
|
|
68
|
+
<instructions>
|
|
69
|
+
CRITICAL RAG USAGE POLICY:
|
|
70
|
+
- You MUST use search_knowledge, search_code, search_docs, or search_lessons
|
|
71
|
+
BEFORE answering ANY question about the project.
|
|
72
|
+
- Do NOT rely on your internal knowledge or the <nexus_memory> context alone
|
|
73
|
+
when the user asks about specific implementations, configurations, or docs.
|
|
74
|
+
- If your first search yields no results, try:
|
|
75
|
+
1. Broadening your search query
|
|
76
|
+
2. Searching different content types (code vs docs vs lessons)
|
|
77
|
+
3. Breaking down the question into smaller searchable parts
|
|
78
|
+
- Only after exhausting RAG searches should you answer based on general
|
|
79
|
+
knowledge, and you must acknowledge that you couldn't find project-specific
|
|
80
|
+
information.
|
|
81
|
+
|
|
82
|
+
WORKFLOW:
|
|
83
|
+
1. Analyze the user's request carefully.
|
|
84
|
+
2. If the request involves project-specific information, SEARCH FIRST using
|
|
85
|
+
RAG tools.
|
|
86
|
+
3. Use your retrieved context and <nexus_memory> to provide accurate,
|
|
87
|
+
project-specific responses.
|
|
88
|
+
4. If you need to perform actions, use the available tools.
|
|
89
|
+
5. Be concise but thorough.
|
|
90
|
+
</instructions>
|
|
91
|
+
"""
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
"""Chunker registry and utilities."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from .base import BaseChunker, ChunkType, CodeChunk, FallbackChunker
|
|
9
|
+
from .docs_chunker import DocumentationChunker
|
|
10
|
+
from .java_chunker import JavaChunker
|
|
11
|
+
from .javascript_chunker import JavaScriptChunker, TypeScriptChunker
|
|
12
|
+
from .python_chunker import PythonChunker
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
"BaseChunker",
|
|
19
|
+
"ChunkType",
|
|
20
|
+
"CodeChunk",
|
|
21
|
+
"ChunkerRegistry",
|
|
22
|
+
"DocumentationChunker",
|
|
23
|
+
"FallbackChunker",
|
|
24
|
+
"JavaChunker",
|
|
25
|
+
"JavaScriptChunker",
|
|
26
|
+
"PythonChunker",
|
|
27
|
+
"TypeScriptChunker",
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class ChunkerRegistry:
|
|
32
|
+
"""Registry for file extension to chunker mapping.
|
|
33
|
+
|
|
34
|
+
The registry automatically maps file extensions to appropriate chunkers.
|
|
35
|
+
Use `get_chunker()` to get a chunker for a specific file, or
|
|
36
|
+
`chunk_file()` to directly chunk a file.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
_chunkers: dict[str, BaseChunker] = {}
|
|
40
|
+
_initialized: bool = False
|
|
41
|
+
|
|
42
|
+
@classmethod
|
|
43
|
+
def _ensure_initialized(cls) -> None:
|
|
44
|
+
"""Ensure default chunkers are registered."""
|
|
45
|
+
if not cls._initialized:
|
|
46
|
+
cls._register_default_chunkers()
|
|
47
|
+
cls._initialized = True
|
|
48
|
+
|
|
49
|
+
@classmethod
|
|
50
|
+
def _register_default_chunkers(cls) -> None:
|
|
51
|
+
"""Register all built-in chunkers."""
|
|
52
|
+
cls.register(PythonChunker())
|
|
53
|
+
cls.register(JavaScriptChunker())
|
|
54
|
+
cls.register(TypeScriptChunker())
|
|
55
|
+
cls.register(JavaChunker())
|
|
56
|
+
cls.register(DocumentationChunker())
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def register(cls, chunker: BaseChunker) -> None:
|
|
60
|
+
"""Register a chunker for its supported extensions.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
chunker: Chunker instance to register.
|
|
64
|
+
"""
|
|
65
|
+
for ext in chunker.supported_extensions:
|
|
66
|
+
ext_lower = ext.lower()
|
|
67
|
+
if not ext_lower.startswith("."):
|
|
68
|
+
ext_lower = f".{ext_lower}"
|
|
69
|
+
cls._chunkers[ext_lower] = chunker
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def get_chunker(cls, file_path: str | Path) -> BaseChunker | None:
|
|
73
|
+
"""Get the appropriate chunker for a file.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
file_path: Path to the file.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
Chunker instance or None if no specific chunker matches.
|
|
80
|
+
"""
|
|
81
|
+
cls._ensure_initialized()
|
|
82
|
+
|
|
83
|
+
path = Path(file_path)
|
|
84
|
+
ext = path.suffix.lower()
|
|
85
|
+
|
|
86
|
+
return cls._chunkers.get(ext)
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def chunk_file(cls, file_path: str | Path, content: str) -> list[CodeChunk]:
|
|
90
|
+
"""Chunk a file using the appropriate chunker.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
file_path: Path to the file.
|
|
94
|
+
content: File content.
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
List of code chunks.
|
|
98
|
+
"""
|
|
99
|
+
cls._ensure_initialized()
|
|
100
|
+
|
|
101
|
+
file_path_str = str(file_path)
|
|
102
|
+
chunker = cls.get_chunker(file_path)
|
|
103
|
+
|
|
104
|
+
if chunker is not None:
|
|
105
|
+
return chunker.chunk_file(file_path_str, content)
|
|
106
|
+
|
|
107
|
+
# Use fallback chunker for unknown types
|
|
108
|
+
return FallbackChunker().chunk_file(file_path_str, content)
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def get_supported_extensions(cls) -> list[str]:
|
|
112
|
+
"""Get list of all supported file extensions.
|
|
113
|
+
|
|
114
|
+
Returns:
|
|
115
|
+
List of supported extensions.
|
|
116
|
+
"""
|
|
117
|
+
cls._ensure_initialized()
|
|
118
|
+
return list(cls._chunkers.keys())
|
|
119
|
+
|
|
120
|
+
@classmethod
|
|
121
|
+
def is_supported(cls, file_path: str | Path) -> bool:
|
|
122
|
+
"""Check if a file type is supported.
|
|
123
|
+
|
|
124
|
+
Args:
|
|
125
|
+
file_path: Path to check.
|
|
126
|
+
|
|
127
|
+
Returns:
|
|
128
|
+
True if the file type has a registered chunker.
|
|
129
|
+
"""
|
|
130
|
+
cls._ensure_initialized()
|
|
131
|
+
|
|
132
|
+
path = Path(file_path)
|
|
133
|
+
ext = path.suffix.lower()
|
|
134
|
+
|
|
135
|
+
return ext in cls._chunkers
|
|
136
|
+
|
|
137
|
+
@classmethod
|
|
138
|
+
def get_language(cls, file_path: str | Path) -> str:
|
|
139
|
+
"""Get the language identifier for a file.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
file_path: Path to the file.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
Language identifier string.
|
|
146
|
+
"""
|
|
147
|
+
ext_to_language = {
|
|
148
|
+
".py": "python",
|
|
149
|
+
".pyw": "python",
|
|
150
|
+
".js": "javascript",
|
|
151
|
+
".jsx": "javascript",
|
|
152
|
+
".mjs": "javascript",
|
|
153
|
+
".cjs": "javascript",
|
|
154
|
+
".ts": "typescript",
|
|
155
|
+
".tsx": "typescript",
|
|
156
|
+
".mts": "typescript",
|
|
157
|
+
".cts": "typescript",
|
|
158
|
+
".java": "java",
|
|
159
|
+
".md": "markdown",
|
|
160
|
+
".markdown": "markdown",
|
|
161
|
+
".rst": "rst",
|
|
162
|
+
".txt": "text",
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
path = Path(file_path)
|
|
166
|
+
ext = path.suffix.lower()
|
|
167
|
+
|
|
168
|
+
return ext_to_language.get(ext, "unknown")
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""Base classes for code chunkers."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from enum import Enum
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ChunkType(str, Enum):
|
|
11
|
+
"""Type of code chunk."""
|
|
12
|
+
|
|
13
|
+
FUNCTION = "function"
|
|
14
|
+
CLASS = "class"
|
|
15
|
+
METHOD = "method"
|
|
16
|
+
MODULE = "module"
|
|
17
|
+
DOCUMENTATION = "documentation"
|
|
18
|
+
SECTION = "section" # For documentation sections
|
|
19
|
+
LESSON = "lesson"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class CodeChunk:
|
|
24
|
+
"""Represents a semantic code chunk.
|
|
25
|
+
|
|
26
|
+
Attributes:
|
|
27
|
+
content: The actual code/text content.
|
|
28
|
+
chunk_type: Type of chunk (function, class, etc.).
|
|
29
|
+
name: Name of the code element.
|
|
30
|
+
start_line: Starting line number (1-indexed).
|
|
31
|
+
end_line: Ending line number (1-indexed).
|
|
32
|
+
language: Programming language identifier.
|
|
33
|
+
file_path: Source file path.
|
|
34
|
+
parent: Parent element name (e.g., class name for methods).
|
|
35
|
+
docstring: Extracted docstring if available.
|
|
36
|
+
imports: List of imports used in this chunk (for context).
|
|
37
|
+
signature: Function/method signature.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
content: str
|
|
41
|
+
chunk_type: ChunkType
|
|
42
|
+
name: str
|
|
43
|
+
start_line: int
|
|
44
|
+
end_line: int
|
|
45
|
+
language: str
|
|
46
|
+
file_path: str = ""
|
|
47
|
+
parent: str | None = None
|
|
48
|
+
docstring: str | None = None
|
|
49
|
+
imports: list[str] = field(default_factory=list)
|
|
50
|
+
signature: str | None = None
|
|
51
|
+
|
|
52
|
+
def get_searchable_text(self) -> str:
|
|
53
|
+
"""Get text optimized for embedding and search.
|
|
54
|
+
|
|
55
|
+
Combines content with metadata for better semantic matching.
|
|
56
|
+
"""
|
|
57
|
+
parts = []
|
|
58
|
+
|
|
59
|
+
# Add signature/name for context
|
|
60
|
+
if self.signature:
|
|
61
|
+
parts.append(f"# {self.signature}")
|
|
62
|
+
elif self.name:
|
|
63
|
+
parts.append(f"# {self.chunk_type.value}: {self.name}")
|
|
64
|
+
|
|
65
|
+
# Add docstring if available
|
|
66
|
+
if self.docstring:
|
|
67
|
+
parts.append(self.docstring)
|
|
68
|
+
|
|
69
|
+
# Add content
|
|
70
|
+
parts.append(self.content)
|
|
71
|
+
|
|
72
|
+
return "\n".join(parts)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class BaseChunker(ABC):
|
|
76
|
+
"""Abstract base class for all code chunkers.
|
|
77
|
+
|
|
78
|
+
To add support for a new language:
|
|
79
|
+
1. Create a new class inheriting from BaseChunker
|
|
80
|
+
2. Implement supported_extensions property
|
|
81
|
+
3. Implement chunk_file and chunk_content methods
|
|
82
|
+
4. Register the chunker in ChunkerRegistry
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
@property
|
|
86
|
+
@abstractmethod
|
|
87
|
+
def supported_extensions(self) -> list[str]:
|
|
88
|
+
"""File extensions this chunker handles.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
List of extensions including the dot (e.g., ['.py', '.pyw']).
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def language_name(self) -> str:
|
|
96
|
+
"""Human-readable name of the language.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
Language name (e.g., 'Python', 'JavaScript').
|
|
100
|
+
"""
|
|
101
|
+
return self.__class__.__name__.replace("Chunker", "")
|
|
102
|
+
|
|
103
|
+
@abstractmethod
|
|
104
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
105
|
+
"""Parse a file and extract semantic chunks.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
file_path: Path to the file (for metadata).
|
|
109
|
+
content: File content as string.
|
|
110
|
+
|
|
111
|
+
Returns:
|
|
112
|
+
List of extracted code chunks.
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def chunk_content(self, content: str, file_name: str = "unknown") -> list[CodeChunk]:
|
|
116
|
+
"""Parse content string directly.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
content: Code/text content.
|
|
120
|
+
file_name: Filename for metadata.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
List of extracted code chunks.
|
|
124
|
+
"""
|
|
125
|
+
return self.chunk_file(file_name, content)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
class FallbackChunker(BaseChunker):
|
|
129
|
+
"""Fallback chunker for unsupported file types.
|
|
130
|
+
|
|
131
|
+
Uses simple character-based chunking with overlap.
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
MAX_CHUNK_SIZE = 1500
|
|
135
|
+
OVERLAP_SIZE = 200
|
|
136
|
+
|
|
137
|
+
@property
|
|
138
|
+
def supported_extensions(self) -> list[str]:
|
|
139
|
+
return [] # Matches nothing, used as fallback
|
|
140
|
+
|
|
141
|
+
def chunk_file(self, file_path: str, content: str) -> list[CodeChunk]:
|
|
142
|
+
"""Chunk content by character count with overlap.
|
|
143
|
+
|
|
144
|
+
Args:
|
|
145
|
+
file_path: Path to the file.
|
|
146
|
+
content: File content.
|
|
147
|
+
|
|
148
|
+
Returns:
|
|
149
|
+
List of text chunks.
|
|
150
|
+
"""
|
|
151
|
+
if not content.strip():
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
# For small files, return as single chunk
|
|
155
|
+
if len(content) <= self.MAX_CHUNK_SIZE:
|
|
156
|
+
return [
|
|
157
|
+
CodeChunk(
|
|
158
|
+
content=content,
|
|
159
|
+
chunk_type=ChunkType.MODULE,
|
|
160
|
+
name=file_path.split("/")[-1] if "/" in file_path else file_path,
|
|
161
|
+
start_line=1,
|
|
162
|
+
end_line=content.count("\n") + 1,
|
|
163
|
+
language="unknown",
|
|
164
|
+
file_path=file_path,
|
|
165
|
+
)
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
# Split into overlapping chunks
|
|
169
|
+
chunks = []
|
|
170
|
+
start = 0
|
|
171
|
+
chunk_index = 0
|
|
172
|
+
|
|
173
|
+
while start < len(content):
|
|
174
|
+
end = min(start + self.MAX_CHUNK_SIZE, len(content))
|
|
175
|
+
|
|
176
|
+
# Try to break at a newline
|
|
177
|
+
if end < len(content):
|
|
178
|
+
newline_pos = content.rfind("\n", start, end)
|
|
179
|
+
if newline_pos > start + self.MAX_CHUNK_SIZE // 2:
|
|
180
|
+
end = newline_pos + 1
|
|
181
|
+
|
|
182
|
+
chunk_text = content[start:end]
|
|
183
|
+
start_line = content[:start].count("\n") + 1
|
|
184
|
+
end_line = start_line + chunk_text.count("\n")
|
|
185
|
+
|
|
186
|
+
chunks.append(
|
|
187
|
+
CodeChunk(
|
|
188
|
+
content=chunk_text,
|
|
189
|
+
chunk_type=ChunkType.MODULE,
|
|
190
|
+
name=f"{file_path.split('/')[-1]}:chunk_{chunk_index}",
|
|
191
|
+
start_line=start_line,
|
|
192
|
+
end_line=end_line,
|
|
193
|
+
language="unknown",
|
|
194
|
+
file_path=file_path,
|
|
195
|
+
)
|
|
196
|
+
)
|
|
197
|
+
|
|
198
|
+
# Move start with overlap
|
|
199
|
+
start = end - self.OVERLAP_SIZE if end < len(content) else end
|
|
200
|
+
chunk_index += 1
|
|
201
|
+
|
|
202
|
+
return chunks
|