nc1709 1.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nc1709/__init__.py +13 -0
- nc1709/agent/__init__.py +36 -0
- nc1709/agent/core.py +505 -0
- nc1709/agent/mcp_bridge.py +245 -0
- nc1709/agent/permissions.py +298 -0
- nc1709/agent/tools/__init__.py +21 -0
- nc1709/agent/tools/base.py +440 -0
- nc1709/agent/tools/bash_tool.py +367 -0
- nc1709/agent/tools/file_tools.py +454 -0
- nc1709/agent/tools/notebook_tools.py +516 -0
- nc1709/agent/tools/search_tools.py +322 -0
- nc1709/agent/tools/task_tool.py +284 -0
- nc1709/agent/tools/web_tools.py +555 -0
- nc1709/agents/__init__.py +17 -0
- nc1709/agents/auto_fix.py +506 -0
- nc1709/agents/test_generator.py +507 -0
- nc1709/checkpoints.py +372 -0
- nc1709/cli.py +3380 -0
- nc1709/cli_ui.py +1080 -0
- nc1709/cognitive/__init__.py +149 -0
- nc1709/cognitive/anticipation.py +594 -0
- nc1709/cognitive/context_engine.py +1046 -0
- nc1709/cognitive/council.py +824 -0
- nc1709/cognitive/learning.py +761 -0
- nc1709/cognitive/router.py +583 -0
- nc1709/cognitive/system.py +519 -0
- nc1709/config.py +155 -0
- nc1709/custom_commands.py +300 -0
- nc1709/executor.py +333 -0
- nc1709/file_controller.py +354 -0
- nc1709/git_integration.py +308 -0
- nc1709/github_integration.py +477 -0
- nc1709/image_input.py +446 -0
- nc1709/linting.py +519 -0
- nc1709/llm_adapter.py +667 -0
- nc1709/logger.py +192 -0
- nc1709/mcp/__init__.py +18 -0
- nc1709/mcp/client.py +370 -0
- nc1709/mcp/manager.py +407 -0
- nc1709/mcp/protocol.py +210 -0
- nc1709/mcp/server.py +473 -0
- nc1709/memory/__init__.py +20 -0
- nc1709/memory/embeddings.py +325 -0
- nc1709/memory/indexer.py +474 -0
- nc1709/memory/sessions.py +432 -0
- nc1709/memory/vector_store.py +451 -0
- nc1709/models/__init__.py +86 -0
- nc1709/models/detector.py +377 -0
- nc1709/models/formats.py +315 -0
- nc1709/models/manager.py +438 -0
- nc1709/models/registry.py +497 -0
- nc1709/performance/__init__.py +343 -0
- nc1709/performance/cache.py +705 -0
- nc1709/performance/pipeline.py +611 -0
- nc1709/performance/tiering.py +543 -0
- nc1709/plan_mode.py +362 -0
- nc1709/plugins/__init__.py +17 -0
- nc1709/plugins/agents/__init__.py +18 -0
- nc1709/plugins/agents/django_agent.py +912 -0
- nc1709/plugins/agents/docker_agent.py +623 -0
- nc1709/plugins/agents/fastapi_agent.py +887 -0
- nc1709/plugins/agents/git_agent.py +731 -0
- nc1709/plugins/agents/nextjs_agent.py +867 -0
- nc1709/plugins/base.py +359 -0
- nc1709/plugins/manager.py +411 -0
- nc1709/plugins/registry.py +337 -0
- nc1709/progress.py +443 -0
- nc1709/prompts/__init__.py +22 -0
- nc1709/prompts/agent_system.py +180 -0
- nc1709/prompts/task_prompts.py +340 -0
- nc1709/prompts/unified_prompt.py +133 -0
- nc1709/reasoning_engine.py +541 -0
- nc1709/remote_client.py +266 -0
- nc1709/shell_completions.py +349 -0
- nc1709/slash_commands.py +649 -0
- nc1709/task_classifier.py +408 -0
- nc1709/version_check.py +177 -0
- nc1709/web/__init__.py +8 -0
- nc1709/web/server.py +950 -0
- nc1709/web/templates/index.html +1127 -0
- nc1709-1.15.4.dist-info/METADATA +858 -0
- nc1709-1.15.4.dist-info/RECORD +86 -0
- nc1709-1.15.4.dist-info/WHEEL +5 -0
- nc1709-1.15.4.dist-info/entry_points.txt +2 -0
- nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
- nc1709-1.15.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding Engine for NC1709
|
|
3
|
+
Generates embeddings for code and text using sentence-transformers
|
|
4
|
+
"""
|
|
5
|
+
import hashlib
|
|
6
|
+
from typing import List, Optional, Union
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
# Lazy import to avoid slow startup
|
|
10
|
+
_model = None
|
|
11
|
+
_model_name = None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_embedding_model(model_name: str = "all-MiniLM-L6-v2"):
|
|
15
|
+
"""Get or create the embedding model (singleton pattern)
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
model_name: Name of the sentence-transformers model
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
SentenceTransformer model
|
|
22
|
+
"""
|
|
23
|
+
global _model, _model_name
|
|
24
|
+
|
|
25
|
+
if _model is None or _model_name != model_name:
|
|
26
|
+
try:
|
|
27
|
+
from sentence_transformers import SentenceTransformer
|
|
28
|
+
print(f"Loading embedding model: {model_name}...")
|
|
29
|
+
_model = SentenceTransformer(model_name)
|
|
30
|
+
_model_name = model_name
|
|
31
|
+
print(f"Embedding model loaded successfully")
|
|
32
|
+
except ImportError:
|
|
33
|
+
raise ImportError(
|
|
34
|
+
"sentence-transformers is required for embeddings. "
|
|
35
|
+
"Install with: pip install sentence-transformers"
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
return _model
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class EmbeddingEngine:
|
|
42
|
+
"""Generates embeddings for text and code"""
|
|
43
|
+
|
|
44
|
+
# Recommended models for different use cases
|
|
45
|
+
MODELS = {
|
|
46
|
+
"default": "all-MiniLM-L6-v2", # Fast, good general purpose (384 dims)
|
|
47
|
+
"code": "microsoft/codebert-base", # Optimized for code (768 dims)
|
|
48
|
+
"large": "all-mpnet-base-v2", # Higher quality (768 dims)
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
def __init__(
|
|
52
|
+
self,
|
|
53
|
+
model_name: Optional[str] = None,
|
|
54
|
+
cache_dir: Optional[str] = None
|
|
55
|
+
):
|
|
56
|
+
"""Initialize the embedding engine
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
model_name: Sentence-transformers model name
|
|
60
|
+
cache_dir: Directory to cache embeddings
|
|
61
|
+
"""
|
|
62
|
+
self.model_name = model_name or self.MODELS["default"]
|
|
63
|
+
self._model = None # Lazy loading
|
|
64
|
+
|
|
65
|
+
# Set up cache directory
|
|
66
|
+
if cache_dir:
|
|
67
|
+
self.cache_dir = Path(cache_dir).expanduser()
|
|
68
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
69
|
+
else:
|
|
70
|
+
self.cache_dir = None
|
|
71
|
+
|
|
72
|
+
self._cache = {}
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def model(self):
|
|
76
|
+
"""Lazy load the model"""
|
|
77
|
+
if self._model is None:
|
|
78
|
+
self._model = get_embedding_model(self.model_name)
|
|
79
|
+
return self._model
|
|
80
|
+
|
|
81
|
+
@property
|
|
82
|
+
def dimension(self) -> int:
|
|
83
|
+
"""Get the embedding dimension"""
|
|
84
|
+
return self.model.get_sentence_embedding_dimension()
|
|
85
|
+
|
|
86
|
+
def embed(self, text: str, use_cache: bool = True) -> List[float]:
|
|
87
|
+
"""Generate embedding for a single text
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
text: Text to embed
|
|
91
|
+
use_cache: Whether to use cached embeddings
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List of floats representing the embedding
|
|
95
|
+
"""
|
|
96
|
+
# Check cache
|
|
97
|
+
if use_cache:
|
|
98
|
+
cache_key = self._get_cache_key(text)
|
|
99
|
+
if cache_key in self._cache:
|
|
100
|
+
return self._cache[cache_key]
|
|
101
|
+
|
|
102
|
+
# Generate embedding
|
|
103
|
+
embedding = self.model.encode(text, convert_to_numpy=True).tolist()
|
|
104
|
+
|
|
105
|
+
# Cache result
|
|
106
|
+
if use_cache:
|
|
107
|
+
self._cache[cache_key] = embedding
|
|
108
|
+
|
|
109
|
+
return embedding
|
|
110
|
+
|
|
111
|
+
def embed_batch(
|
|
112
|
+
self,
|
|
113
|
+
texts: List[str],
|
|
114
|
+
batch_size: int = 32,
|
|
115
|
+
show_progress: bool = False
|
|
116
|
+
) -> List[List[float]]:
|
|
117
|
+
"""Generate embeddings for multiple texts
|
|
118
|
+
|
|
119
|
+
Args:
|
|
120
|
+
texts: List of texts to embed
|
|
121
|
+
batch_size: Batch size for processing
|
|
122
|
+
show_progress: Whether to show progress bar
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
List of embeddings
|
|
126
|
+
"""
|
|
127
|
+
embeddings = self.model.encode(
|
|
128
|
+
texts,
|
|
129
|
+
batch_size=batch_size,
|
|
130
|
+
show_progress_bar=show_progress,
|
|
131
|
+
convert_to_numpy=True
|
|
132
|
+
)
|
|
133
|
+
return embeddings.tolist()
|
|
134
|
+
|
|
135
|
+
def embed_code(self, code: str, language: Optional[str] = None) -> List[float]:
|
|
136
|
+
"""Generate embedding for code with optional language context
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
code: Source code to embed
|
|
140
|
+
language: Programming language (for context)
|
|
141
|
+
|
|
142
|
+
Returns:
|
|
143
|
+
Embedding vector
|
|
144
|
+
"""
|
|
145
|
+
# Add language context if provided
|
|
146
|
+
if language:
|
|
147
|
+
text = f"[{language}] {code}"
|
|
148
|
+
else:
|
|
149
|
+
text = code
|
|
150
|
+
|
|
151
|
+
return self.embed(text)
|
|
152
|
+
|
|
153
|
+
def similarity(self, embedding1: List[float], embedding2: List[float]) -> float:
|
|
154
|
+
"""Calculate cosine similarity between two embeddings
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
embedding1: First embedding
|
|
158
|
+
embedding2: Second embedding
|
|
159
|
+
|
|
160
|
+
Returns:
|
|
161
|
+
Similarity score (0 to 1)
|
|
162
|
+
"""
|
|
163
|
+
import numpy as np
|
|
164
|
+
|
|
165
|
+
a = np.array(embedding1)
|
|
166
|
+
b = np.array(embedding2)
|
|
167
|
+
|
|
168
|
+
return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
|
|
169
|
+
|
|
170
|
+
def _get_cache_key(self, text: str) -> str:
|
|
171
|
+
"""Generate cache key for text
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
text: Text to hash
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Cache key
|
|
178
|
+
"""
|
|
179
|
+
return hashlib.md5(text.encode()).hexdigest()
|
|
180
|
+
|
|
181
|
+
def clear_cache(self):
|
|
182
|
+
"""Clear the embedding cache"""
|
|
183
|
+
self._cache = {}
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
class CodeChunker:
|
|
187
|
+
"""Splits code into meaningful chunks for embedding"""
|
|
188
|
+
|
|
189
|
+
# Default chunk settings
|
|
190
|
+
DEFAULT_CHUNK_SIZE = 512 # tokens
|
|
191
|
+
DEFAULT_OVERLAP = 64 # tokens
|
|
192
|
+
|
|
193
|
+
def __init__(
|
|
194
|
+
self,
|
|
195
|
+
chunk_size: int = DEFAULT_CHUNK_SIZE,
|
|
196
|
+
overlap: int = DEFAULT_OVERLAP
|
|
197
|
+
):
|
|
198
|
+
"""Initialize the code chunker
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
chunk_size: Maximum chunk size in approximate tokens
|
|
202
|
+
overlap: Overlap between chunks
|
|
203
|
+
"""
|
|
204
|
+
self.chunk_size = chunk_size
|
|
205
|
+
self.overlap = overlap
|
|
206
|
+
|
|
207
|
+
def chunk_code(
|
|
208
|
+
self,
|
|
209
|
+
code: str,
|
|
210
|
+
language: Optional[str] = None
|
|
211
|
+
) -> List[dict]:
|
|
212
|
+
"""Split code into chunks
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
code: Source code to chunk
|
|
216
|
+
language: Programming language
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
List of chunk dictionaries with content and metadata
|
|
220
|
+
"""
|
|
221
|
+
chunks = []
|
|
222
|
+
lines = code.split('\n')
|
|
223
|
+
|
|
224
|
+
# Approximate tokens (rough estimate: 4 chars per token)
|
|
225
|
+
chars_per_chunk = self.chunk_size * 4
|
|
226
|
+
overlap_chars = self.overlap * 4
|
|
227
|
+
|
|
228
|
+
current_chunk = []
|
|
229
|
+
current_size = 0
|
|
230
|
+
chunk_start_line = 0
|
|
231
|
+
|
|
232
|
+
for i, line in enumerate(lines):
|
|
233
|
+
line_size = len(line) + 1 # +1 for newline
|
|
234
|
+
|
|
235
|
+
if current_size + line_size > chars_per_chunk and current_chunk:
|
|
236
|
+
# Save current chunk
|
|
237
|
+
chunk_content = '\n'.join(current_chunk)
|
|
238
|
+
chunks.append({
|
|
239
|
+
'content': chunk_content,
|
|
240
|
+
'start_line': chunk_start_line,
|
|
241
|
+
'end_line': i - 1,
|
|
242
|
+
'language': language
|
|
243
|
+
})
|
|
244
|
+
|
|
245
|
+
# Start new chunk with overlap
|
|
246
|
+
overlap_lines = []
|
|
247
|
+
overlap_size = 0
|
|
248
|
+
for prev_line in reversed(current_chunk):
|
|
249
|
+
if overlap_size + len(prev_line) > overlap_chars:
|
|
250
|
+
break
|
|
251
|
+
overlap_lines.insert(0, prev_line)
|
|
252
|
+
overlap_size += len(prev_line) + 1
|
|
253
|
+
|
|
254
|
+
current_chunk = overlap_lines
|
|
255
|
+
current_size = overlap_size
|
|
256
|
+
chunk_start_line = i - len(overlap_lines)
|
|
257
|
+
|
|
258
|
+
current_chunk.append(line)
|
|
259
|
+
current_size += line_size
|
|
260
|
+
|
|
261
|
+
# Don't forget the last chunk
|
|
262
|
+
if current_chunk:
|
|
263
|
+
chunk_content = '\n'.join(current_chunk)
|
|
264
|
+
if chunk_content.strip(): # Only add non-empty chunks
|
|
265
|
+
chunks.append({
|
|
266
|
+
'content': chunk_content,
|
|
267
|
+
'start_line': chunk_start_line,
|
|
268
|
+
'end_line': len(lines) - 1,
|
|
269
|
+
'language': language
|
|
270
|
+
})
|
|
271
|
+
|
|
272
|
+
return chunks
|
|
273
|
+
|
|
274
|
+
def chunk_by_functions(self, code: str, language: str) -> List[dict]:
|
|
275
|
+
"""Split code by function/class definitions (language-aware)
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
code: Source code
|
|
279
|
+
language: Programming language
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
List of chunks, one per function/class
|
|
283
|
+
"""
|
|
284
|
+
# Simple regex-based function detection
|
|
285
|
+
import re
|
|
286
|
+
|
|
287
|
+
chunks = []
|
|
288
|
+
patterns = {
|
|
289
|
+
'python': r'^(def |class |async def )',
|
|
290
|
+
'javascript': r'^(function |const \w+ = |class |async function )',
|
|
291
|
+
'typescript': r'^(function |const \w+ = |class |async function |interface |type )',
|
|
292
|
+
'go': r'^func ',
|
|
293
|
+
'rust': r'^(fn |impl |struct |enum )',
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
pattern = patterns.get(language.lower(), patterns['python'])
|
|
297
|
+
lines = code.split('\n')
|
|
298
|
+
|
|
299
|
+
current_chunk = []
|
|
300
|
+
chunk_start = 0
|
|
301
|
+
|
|
302
|
+
for i, line in enumerate(lines):
|
|
303
|
+
if re.match(pattern, line.strip()) and current_chunk:
|
|
304
|
+
# Save previous chunk
|
|
305
|
+
chunks.append({
|
|
306
|
+
'content': '\n'.join(current_chunk),
|
|
307
|
+
'start_line': chunk_start,
|
|
308
|
+
'end_line': i - 1,
|
|
309
|
+
'language': language
|
|
310
|
+
})
|
|
311
|
+
current_chunk = []
|
|
312
|
+
chunk_start = i
|
|
313
|
+
|
|
314
|
+
current_chunk.append(line)
|
|
315
|
+
|
|
316
|
+
# Last chunk
|
|
317
|
+
if current_chunk:
|
|
318
|
+
chunks.append({
|
|
319
|
+
'content': '\n'.join(current_chunk),
|
|
320
|
+
'start_line': chunk_start,
|
|
321
|
+
'end_line': len(lines) - 1,
|
|
322
|
+
'language': language
|
|
323
|
+
})
|
|
324
|
+
|
|
325
|
+
return chunks
|