nc1709 1.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nc1709/__init__.py +13 -0
- nc1709/agent/__init__.py +36 -0
- nc1709/agent/core.py +505 -0
- nc1709/agent/mcp_bridge.py +245 -0
- nc1709/agent/permissions.py +298 -0
- nc1709/agent/tools/__init__.py +21 -0
- nc1709/agent/tools/base.py +440 -0
- nc1709/agent/tools/bash_tool.py +367 -0
- nc1709/agent/tools/file_tools.py +454 -0
- nc1709/agent/tools/notebook_tools.py +516 -0
- nc1709/agent/tools/search_tools.py +322 -0
- nc1709/agent/tools/task_tool.py +284 -0
- nc1709/agent/tools/web_tools.py +555 -0
- nc1709/agents/__init__.py +17 -0
- nc1709/agents/auto_fix.py +506 -0
- nc1709/agents/test_generator.py +507 -0
- nc1709/checkpoints.py +372 -0
- nc1709/cli.py +3380 -0
- nc1709/cli_ui.py +1080 -0
- nc1709/cognitive/__init__.py +149 -0
- nc1709/cognitive/anticipation.py +594 -0
- nc1709/cognitive/context_engine.py +1046 -0
- nc1709/cognitive/council.py +824 -0
- nc1709/cognitive/learning.py +761 -0
- nc1709/cognitive/router.py +583 -0
- nc1709/cognitive/system.py +519 -0
- nc1709/config.py +155 -0
- nc1709/custom_commands.py +300 -0
- nc1709/executor.py +333 -0
- nc1709/file_controller.py +354 -0
- nc1709/git_integration.py +308 -0
- nc1709/github_integration.py +477 -0
- nc1709/image_input.py +446 -0
- nc1709/linting.py +519 -0
- nc1709/llm_adapter.py +667 -0
- nc1709/logger.py +192 -0
- nc1709/mcp/__init__.py +18 -0
- nc1709/mcp/client.py +370 -0
- nc1709/mcp/manager.py +407 -0
- nc1709/mcp/protocol.py +210 -0
- nc1709/mcp/server.py +473 -0
- nc1709/memory/__init__.py +20 -0
- nc1709/memory/embeddings.py +325 -0
- nc1709/memory/indexer.py +474 -0
- nc1709/memory/sessions.py +432 -0
- nc1709/memory/vector_store.py +451 -0
- nc1709/models/__init__.py +86 -0
- nc1709/models/detector.py +377 -0
- nc1709/models/formats.py +315 -0
- nc1709/models/manager.py +438 -0
- nc1709/models/registry.py +497 -0
- nc1709/performance/__init__.py +343 -0
- nc1709/performance/cache.py +705 -0
- nc1709/performance/pipeline.py +611 -0
- nc1709/performance/tiering.py +543 -0
- nc1709/plan_mode.py +362 -0
- nc1709/plugins/__init__.py +17 -0
- nc1709/plugins/agents/__init__.py +18 -0
- nc1709/plugins/agents/django_agent.py +912 -0
- nc1709/plugins/agents/docker_agent.py +623 -0
- nc1709/plugins/agents/fastapi_agent.py +887 -0
- nc1709/plugins/agents/git_agent.py +731 -0
- nc1709/plugins/agents/nextjs_agent.py +867 -0
- nc1709/plugins/base.py +359 -0
- nc1709/plugins/manager.py +411 -0
- nc1709/plugins/registry.py +337 -0
- nc1709/progress.py +443 -0
- nc1709/prompts/__init__.py +22 -0
- nc1709/prompts/agent_system.py +180 -0
- nc1709/prompts/task_prompts.py +340 -0
- nc1709/prompts/unified_prompt.py +133 -0
- nc1709/reasoning_engine.py +541 -0
- nc1709/remote_client.py +266 -0
- nc1709/shell_completions.py +349 -0
- nc1709/slash_commands.py +649 -0
- nc1709/task_classifier.py +408 -0
- nc1709/version_check.py +177 -0
- nc1709/web/__init__.py +8 -0
- nc1709/web/server.py +950 -0
- nc1709/web/templates/index.html +1127 -0
- nc1709-1.15.4.dist-info/METADATA +858 -0
- nc1709-1.15.4.dist-info/RECORD +86 -0
- nc1709-1.15.4.dist-info/WHEEL +5 -0
- nc1709-1.15.4.dist-info/entry_points.txt +2 -0
- nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
- nc1709-1.15.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1046 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Layer 2: Deep Context Engine
|
|
3
|
+
|
|
4
|
+
Provides semantic understanding of the codebase through:
|
|
5
|
+
- AST analysis and code graph building
|
|
6
|
+
- Call graph and dependency mapping
|
|
7
|
+
- Pattern detection and recognition
|
|
8
|
+
- Semantic search via embeddings (ChromaDB)
|
|
9
|
+
- Incremental indexing for large codebases
|
|
10
|
+
|
|
11
|
+
This layer answers: "What does NC1709 know about this codebase?"
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import os
|
|
15
|
+
import ast
|
|
16
|
+
import hashlib
|
|
17
|
+
import json
|
|
18
|
+
import logging
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Dict, List, Optional, Set, Any, Tuple
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
from enum import Enum
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
import threading
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class NodeType(Enum):
|
|
30
|
+
"""Types of nodes in the code graph"""
|
|
31
|
+
MODULE = "module"
|
|
32
|
+
CLASS = "class"
|
|
33
|
+
FUNCTION = "function"
|
|
34
|
+
METHOD = "method"
|
|
35
|
+
VARIABLE = "variable"
|
|
36
|
+
IMPORT = "import"
|
|
37
|
+
CONSTANT = "constant"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class CodeNode:
|
|
42
|
+
"""A node in the code graph representing a code element"""
|
|
43
|
+
id: str # Unique identifier (file:line:name)
|
|
44
|
+
name: str
|
|
45
|
+
node_type: NodeType
|
|
46
|
+
file_path: str
|
|
47
|
+
line_start: int
|
|
48
|
+
line_end: int
|
|
49
|
+
docstring: Optional[str] = None
|
|
50
|
+
signature: Optional[str] = None
|
|
51
|
+
parent_id: Optional[str] = None
|
|
52
|
+
children_ids: List[str] = field(default_factory=list)
|
|
53
|
+
references: List[str] = field(default_factory=list) # What this node references
|
|
54
|
+
referenced_by: List[str] = field(default_factory=list) # What references this node
|
|
55
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
58
|
+
"""Convert to dictionary for serialization"""
|
|
59
|
+
return {
|
|
60
|
+
"id": self.id,
|
|
61
|
+
"name": self.name,
|
|
62
|
+
"node_type": self.node_type.value,
|
|
63
|
+
"file_path": self.file_path,
|
|
64
|
+
"line_start": self.line_start,
|
|
65
|
+
"line_end": self.line_end,
|
|
66
|
+
"docstring": self.docstring,
|
|
67
|
+
"signature": self.signature,
|
|
68
|
+
"parent_id": self.parent_id,
|
|
69
|
+
"children_ids": self.children_ids,
|
|
70
|
+
"references": self.references,
|
|
71
|
+
"referenced_by": self.referenced_by,
|
|
72
|
+
"metadata": self.metadata,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def from_dict(cls, data: Dict[str, Any]) -> "CodeNode":
|
|
77
|
+
"""Create from dictionary"""
|
|
78
|
+
return cls(
|
|
79
|
+
id=data["id"],
|
|
80
|
+
name=data["name"],
|
|
81
|
+
node_type=NodeType(data["node_type"]),
|
|
82
|
+
file_path=data["file_path"],
|
|
83
|
+
line_start=data["line_start"],
|
|
84
|
+
line_end=data["line_end"],
|
|
85
|
+
docstring=data.get("docstring"),
|
|
86
|
+
signature=data.get("signature"),
|
|
87
|
+
parent_id=data.get("parent_id"),
|
|
88
|
+
children_ids=data.get("children_ids", []),
|
|
89
|
+
references=data.get("references", []),
|
|
90
|
+
referenced_by=data.get("referenced_by", []),
|
|
91
|
+
metadata=data.get("metadata", {}),
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class CodePattern:
|
|
97
|
+
"""A detected pattern in the codebase"""
|
|
98
|
+
pattern_type: str # e.g., "singleton", "factory", "decorator", "error_handling"
|
|
99
|
+
description: str
|
|
100
|
+
file_paths: List[str]
|
|
101
|
+
node_ids: List[str]
|
|
102
|
+
confidence: float # 0.0 to 1.0
|
|
103
|
+
examples: List[str] = field(default_factory=list)
|
|
104
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@dataclass
|
|
108
|
+
class FileContext:
|
|
109
|
+
"""Context information for a single file"""
|
|
110
|
+
file_path: str
|
|
111
|
+
language: str
|
|
112
|
+
size_bytes: int
|
|
113
|
+
line_count: int
|
|
114
|
+
last_modified: datetime
|
|
115
|
+
content_hash: str
|
|
116
|
+
imports: List[str] = field(default_factory=list)
|
|
117
|
+
exports: List[str] = field(default_factory=list)
|
|
118
|
+
dependencies: List[str] = field(default_factory=list)
|
|
119
|
+
node_ids: List[str] = field(default_factory=list)
|
|
120
|
+
summary: Optional[str] = None
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
@dataclass
|
|
124
|
+
class ContextBudget:
|
|
125
|
+
"""Budget allocation for context in a request"""
|
|
126
|
+
max_tokens: int = 8000
|
|
127
|
+
file_context_tokens: int = 3000
|
|
128
|
+
code_graph_tokens: int = 2000
|
|
129
|
+
pattern_tokens: int = 1000
|
|
130
|
+
history_tokens: int = 2000
|
|
131
|
+
|
|
132
|
+
def remaining(self, used: int) -> int:
|
|
133
|
+
"""Calculate remaining tokens"""
|
|
134
|
+
return max(0, self.max_tokens - used)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
class CodeGraphBuilder(ast.NodeVisitor):
|
|
138
|
+
"""Builds a code graph from Python AST"""
|
|
139
|
+
|
|
140
|
+
def __init__(self, file_path: str, source_code: str):
|
|
141
|
+
self.file_path = file_path
|
|
142
|
+
self.source_code = source_code
|
|
143
|
+
self.source_lines = source_code.splitlines()
|
|
144
|
+
self.nodes: Dict[str, CodeNode] = {}
|
|
145
|
+
self.current_parent: Optional[str] = None
|
|
146
|
+
self.imports: List[str] = []
|
|
147
|
+
self.exports: List[str] = []
|
|
148
|
+
|
|
149
|
+
def _make_id(self, name: str, line: int) -> str:
|
|
150
|
+
"""Create unique node ID"""
|
|
151
|
+
return f"{self.file_path}:{line}:{name}"
|
|
152
|
+
|
|
153
|
+
def _get_docstring(self, node: ast.AST) -> Optional[str]:
|
|
154
|
+
"""Extract docstring from node if present"""
|
|
155
|
+
try:
|
|
156
|
+
return ast.get_docstring(node)
|
|
157
|
+
except Exception:
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
def _get_signature(self, node: ast.FunctionDef) -> str:
|
|
161
|
+
"""Extract function signature"""
|
|
162
|
+
args = []
|
|
163
|
+
for arg in node.args.args:
|
|
164
|
+
arg_str = arg.arg
|
|
165
|
+
if arg.annotation:
|
|
166
|
+
try:
|
|
167
|
+
arg_str += f": {ast.unparse(arg.annotation)}"
|
|
168
|
+
except Exception:
|
|
169
|
+
pass
|
|
170
|
+
args.append(arg_str)
|
|
171
|
+
|
|
172
|
+
# Add *args and **kwargs
|
|
173
|
+
if node.args.vararg:
|
|
174
|
+
args.append(f"*{node.args.vararg.arg}")
|
|
175
|
+
if node.args.kwarg:
|
|
176
|
+
args.append(f"**{node.args.kwarg.arg}")
|
|
177
|
+
|
|
178
|
+
returns = ""
|
|
179
|
+
if node.returns:
|
|
180
|
+
try:
|
|
181
|
+
returns = f" -> {ast.unparse(node.returns)}"
|
|
182
|
+
except Exception:
|
|
183
|
+
pass
|
|
184
|
+
|
|
185
|
+
return f"def {node.name}({', '.join(args)}){returns}"
|
|
186
|
+
|
|
187
|
+
def visit_Module(self, node: ast.Module) -> None:
|
|
188
|
+
"""Visit module node"""
|
|
189
|
+
module_name = Path(self.file_path).stem
|
|
190
|
+
module_id = self._make_id(module_name, 1)
|
|
191
|
+
|
|
192
|
+
self.nodes[module_id] = CodeNode(
|
|
193
|
+
id=module_id,
|
|
194
|
+
name=module_name,
|
|
195
|
+
node_type=NodeType.MODULE,
|
|
196
|
+
file_path=self.file_path,
|
|
197
|
+
line_start=1,
|
|
198
|
+
line_end=len(self.source_lines),
|
|
199
|
+
docstring=self._get_docstring(node),
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
old_parent = self.current_parent
|
|
203
|
+
self.current_parent = module_id
|
|
204
|
+
self.generic_visit(node)
|
|
205
|
+
self.current_parent = old_parent
|
|
206
|
+
|
|
207
|
+
def visit_ClassDef(self, node: ast.ClassDef) -> None:
|
|
208
|
+
"""Visit class definition"""
|
|
209
|
+
class_id = self._make_id(node.name, node.lineno)
|
|
210
|
+
|
|
211
|
+
# Get base classes
|
|
212
|
+
bases = []
|
|
213
|
+
for base in node.bases:
|
|
214
|
+
try:
|
|
215
|
+
bases.append(ast.unparse(base))
|
|
216
|
+
except Exception:
|
|
217
|
+
pass
|
|
218
|
+
|
|
219
|
+
self.nodes[class_id] = CodeNode(
|
|
220
|
+
id=class_id,
|
|
221
|
+
name=node.name,
|
|
222
|
+
node_type=NodeType.CLASS,
|
|
223
|
+
file_path=self.file_path,
|
|
224
|
+
line_start=node.lineno,
|
|
225
|
+
line_end=node.end_lineno or node.lineno,
|
|
226
|
+
docstring=self._get_docstring(node),
|
|
227
|
+
parent_id=self.current_parent,
|
|
228
|
+
metadata={"bases": bases, "decorators": [d.id if hasattr(d, 'id') else str(d) for d in node.decorator_list]},
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
# Add to parent's children
|
|
232
|
+
if self.current_parent and self.current_parent in self.nodes:
|
|
233
|
+
self.nodes[self.current_parent].children_ids.append(class_id)
|
|
234
|
+
|
|
235
|
+
# Track exports
|
|
236
|
+
if not node.name.startswith('_'):
|
|
237
|
+
self.exports.append(node.name)
|
|
238
|
+
|
|
239
|
+
old_parent = self.current_parent
|
|
240
|
+
self.current_parent = class_id
|
|
241
|
+
self.generic_visit(node)
|
|
242
|
+
self.current_parent = old_parent
|
|
243
|
+
|
|
244
|
+
def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
|
|
245
|
+
"""Visit function/method definition"""
|
|
246
|
+
func_id = self._make_id(node.name, node.lineno)
|
|
247
|
+
|
|
248
|
+
# Determine if method or function
|
|
249
|
+
parent_node = self.nodes.get(self.current_parent) if self.current_parent else None
|
|
250
|
+
is_method = parent_node and parent_node.node_type == NodeType.CLASS
|
|
251
|
+
|
|
252
|
+
self.nodes[func_id] = CodeNode(
|
|
253
|
+
id=func_id,
|
|
254
|
+
name=node.name,
|
|
255
|
+
node_type=NodeType.METHOD if is_method else NodeType.FUNCTION,
|
|
256
|
+
file_path=self.file_path,
|
|
257
|
+
line_start=node.lineno,
|
|
258
|
+
line_end=node.end_lineno or node.lineno,
|
|
259
|
+
docstring=self._get_docstring(node),
|
|
260
|
+
signature=self._get_signature(node),
|
|
261
|
+
parent_id=self.current_parent,
|
|
262
|
+
metadata={"decorators": [d.id if hasattr(d, 'id') else str(d) for d in node.decorator_list]},
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
# Add to parent's children
|
|
266
|
+
if self.current_parent and self.current_parent in self.nodes:
|
|
267
|
+
self.nodes[self.current_parent].children_ids.append(func_id)
|
|
268
|
+
|
|
269
|
+
# Track exports
|
|
270
|
+
if not node.name.startswith('_') and not is_method:
|
|
271
|
+
self.exports.append(node.name)
|
|
272
|
+
|
|
273
|
+
old_parent = self.current_parent
|
|
274
|
+
self.current_parent = func_id
|
|
275
|
+
self.generic_visit(node)
|
|
276
|
+
self.current_parent = old_parent
|
|
277
|
+
|
|
278
|
+
visit_AsyncFunctionDef = visit_FunctionDef
|
|
279
|
+
|
|
280
|
+
def visit_Import(self, node: ast.Import) -> None:
|
|
281
|
+
"""Visit import statement"""
|
|
282
|
+
for alias in node.names:
|
|
283
|
+
self.imports.append(alias.name)
|
|
284
|
+
import_id = self._make_id(f"import_{alias.name}", node.lineno)
|
|
285
|
+
self.nodes[import_id] = CodeNode(
|
|
286
|
+
id=import_id,
|
|
287
|
+
name=alias.name,
|
|
288
|
+
node_type=NodeType.IMPORT,
|
|
289
|
+
file_path=self.file_path,
|
|
290
|
+
line_start=node.lineno,
|
|
291
|
+
line_end=node.lineno,
|
|
292
|
+
parent_id=self.current_parent,
|
|
293
|
+
metadata={"alias": alias.asname},
|
|
294
|
+
)
|
|
295
|
+
self.generic_visit(node)
|
|
296
|
+
|
|
297
|
+
def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
|
|
298
|
+
"""Visit from ... import statement"""
|
|
299
|
+
module = node.module or ""
|
|
300
|
+
for alias in node.names:
|
|
301
|
+
full_import = f"{module}.{alias.name}" if module else alias.name
|
|
302
|
+
self.imports.append(full_import)
|
|
303
|
+
import_id = self._make_id(f"import_{full_import}", node.lineno)
|
|
304
|
+
self.nodes[import_id] = CodeNode(
|
|
305
|
+
id=import_id,
|
|
306
|
+
name=alias.name,
|
|
307
|
+
node_type=NodeType.IMPORT,
|
|
308
|
+
file_path=self.file_path,
|
|
309
|
+
line_start=node.lineno,
|
|
310
|
+
line_end=node.lineno,
|
|
311
|
+
parent_id=self.current_parent,
|
|
312
|
+
metadata={"module": module, "alias": alias.asname},
|
|
313
|
+
)
|
|
314
|
+
self.generic_visit(node)
|
|
315
|
+
|
|
316
|
+
def build(self) -> Tuple[Dict[str, CodeNode], List[str], List[str]]:
|
|
317
|
+
"""Build the code graph and return nodes, imports, exports"""
|
|
318
|
+
try:
|
|
319
|
+
tree = ast.parse(self.source_code)
|
|
320
|
+
self.visit(tree)
|
|
321
|
+
except SyntaxError as e:
|
|
322
|
+
logger.warning(f"Syntax error parsing {self.file_path}: {e}")
|
|
323
|
+
except Exception as e:
|
|
324
|
+
logger.warning(f"Error parsing {self.file_path}: {e}")
|
|
325
|
+
|
|
326
|
+
return self.nodes, self.imports, self.exports
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
class PatternDetector:
|
|
330
|
+
"""Detects common code patterns in the codebase"""
|
|
331
|
+
|
|
332
|
+
def __init__(self):
|
|
333
|
+
self.patterns: List[CodePattern] = []
|
|
334
|
+
|
|
335
|
+
def detect_patterns(self, nodes: Dict[str, CodeNode], file_contexts: Dict[str, FileContext]) -> List[CodePattern]:
|
|
336
|
+
"""Detect patterns across the codebase"""
|
|
337
|
+
self.patterns = []
|
|
338
|
+
|
|
339
|
+
# Detect singleton pattern
|
|
340
|
+
self._detect_singleton(nodes)
|
|
341
|
+
|
|
342
|
+
# Detect factory pattern
|
|
343
|
+
self._detect_factory(nodes)
|
|
344
|
+
|
|
345
|
+
# Detect decorator pattern
|
|
346
|
+
self._detect_decorator_usage(nodes)
|
|
347
|
+
|
|
348
|
+
# Detect error handling patterns
|
|
349
|
+
self._detect_error_handling(nodes)
|
|
350
|
+
|
|
351
|
+
# Detect MVC/MVP patterns
|
|
352
|
+
self._detect_architecture_pattern(file_contexts)
|
|
353
|
+
|
|
354
|
+
# Detect testing patterns
|
|
355
|
+
self._detect_testing_pattern(nodes, file_contexts)
|
|
356
|
+
|
|
357
|
+
return self.patterns
|
|
358
|
+
|
|
359
|
+
def _detect_singleton(self, nodes: Dict[str, CodeNode]) -> None:
|
|
360
|
+
"""Detect singleton pattern"""
|
|
361
|
+
for node_id, node in nodes.items():
|
|
362
|
+
if node.node_type == NodeType.CLASS:
|
|
363
|
+
# Check for __new__ method or _instance attribute
|
|
364
|
+
has_instance = any(
|
|
365
|
+
"_instance" in child_id.lower() or "__new__" in child_id
|
|
366
|
+
for child_id in node.children_ids
|
|
367
|
+
)
|
|
368
|
+
if has_instance:
|
|
369
|
+
self.patterns.append(CodePattern(
|
|
370
|
+
pattern_type="singleton",
|
|
371
|
+
description=f"Singleton pattern detected in class {node.name}",
|
|
372
|
+
file_paths=[node.file_path],
|
|
373
|
+
node_ids=[node_id],
|
|
374
|
+
confidence=0.8,
|
|
375
|
+
))
|
|
376
|
+
|
|
377
|
+
def _detect_factory(self, nodes: Dict[str, CodeNode]) -> None:
|
|
378
|
+
"""Detect factory pattern"""
|
|
379
|
+
for node_id, node in nodes.items():
|
|
380
|
+
if node.node_type in (NodeType.FUNCTION, NodeType.METHOD):
|
|
381
|
+
name_lower = node.name.lower()
|
|
382
|
+
if any(kw in name_lower for kw in ["create", "build", "make", "factory", "get_instance"]):
|
|
383
|
+
self.patterns.append(CodePattern(
|
|
384
|
+
pattern_type="factory",
|
|
385
|
+
description=f"Factory pattern detected: {node.name}",
|
|
386
|
+
file_paths=[node.file_path],
|
|
387
|
+
node_ids=[node_id],
|
|
388
|
+
confidence=0.7,
|
|
389
|
+
))
|
|
390
|
+
|
|
391
|
+
def _detect_decorator_usage(self, nodes: Dict[str, CodeNode]) -> None:
|
|
392
|
+
"""Detect heavy decorator usage"""
|
|
393
|
+
decorated_functions = []
|
|
394
|
+
for node_id, node in nodes.items():
|
|
395
|
+
if node.node_type in (NodeType.FUNCTION, NodeType.METHOD):
|
|
396
|
+
decorators = node.metadata.get("decorators", [])
|
|
397
|
+
if decorators:
|
|
398
|
+
decorated_functions.append((node_id, decorators))
|
|
399
|
+
|
|
400
|
+
if len(decorated_functions) > 5:
|
|
401
|
+
self.patterns.append(CodePattern(
|
|
402
|
+
pattern_type="decorator_heavy",
|
|
403
|
+
description=f"Heavy decorator usage detected ({len(decorated_functions)} decorated functions)",
|
|
404
|
+
file_paths=list(set(nodes[nid].file_path for nid, _ in decorated_functions)),
|
|
405
|
+
node_ids=[nid for nid, _ in decorated_functions[:10]], # Limit examples
|
|
406
|
+
confidence=0.9,
|
|
407
|
+
))
|
|
408
|
+
|
|
409
|
+
def _detect_error_handling(self, nodes: Dict[str, CodeNode]) -> None:
|
|
410
|
+
"""Detect error handling patterns"""
|
|
411
|
+
# This would need actual AST analysis for try/except blocks
|
|
412
|
+
# Simplified version based on naming
|
|
413
|
+
error_handlers = []
|
|
414
|
+
for node_id, node in nodes.items():
|
|
415
|
+
if node.node_type in (NodeType.FUNCTION, NodeType.METHOD):
|
|
416
|
+
name_lower = node.name.lower()
|
|
417
|
+
if any(kw in name_lower for kw in ["handle", "error", "exception", "catch"]):
|
|
418
|
+
error_handlers.append(node_id)
|
|
419
|
+
|
|
420
|
+
if error_handlers:
|
|
421
|
+
self.patterns.append(CodePattern(
|
|
422
|
+
pattern_type="error_handling",
|
|
423
|
+
description=f"Error handling pattern detected ({len(error_handlers)} handlers)",
|
|
424
|
+
file_paths=list(set(nodes[nid].file_path for nid in error_handlers)),
|
|
425
|
+
node_ids=error_handlers[:10],
|
|
426
|
+
confidence=0.6,
|
|
427
|
+
))
|
|
428
|
+
|
|
429
|
+
def _detect_architecture_pattern(self, file_contexts: Dict[str, FileContext]) -> None:
|
|
430
|
+
"""Detect architectural patterns like MVC"""
|
|
431
|
+
files = list(file_contexts.keys())
|
|
432
|
+
files_lower = [f.lower() for f in files]
|
|
433
|
+
|
|
434
|
+
has_models = any("model" in f for f in files_lower)
|
|
435
|
+
has_views = any("view" in f for f in files_lower)
|
|
436
|
+
has_controllers = any("controller" in f or "handler" in f for f in files_lower)
|
|
437
|
+
|
|
438
|
+
if has_models and has_views and has_controllers:
|
|
439
|
+
self.patterns.append(CodePattern(
|
|
440
|
+
pattern_type="mvc",
|
|
441
|
+
description="MVC/MVP architectural pattern detected",
|
|
442
|
+
file_paths=[f for f in files if any(k in f.lower() for k in ["model", "view", "controller", "handler"])],
|
|
443
|
+
node_ids=[],
|
|
444
|
+
confidence=0.75,
|
|
445
|
+
))
|
|
446
|
+
|
|
447
|
+
def _detect_testing_pattern(self, nodes: Dict[str, CodeNode], file_contexts: Dict[str, FileContext]) -> None:
|
|
448
|
+
"""Detect testing patterns"""
|
|
449
|
+
test_files = [f for f in file_contexts.keys() if "test" in f.lower()]
|
|
450
|
+
test_functions = [nid for nid, n in nodes.items() if n.name.startswith("test_")]
|
|
451
|
+
|
|
452
|
+
if test_files or test_functions:
|
|
453
|
+
self.patterns.append(CodePattern(
|
|
454
|
+
pattern_type="testing",
|
|
455
|
+
description=f"Testing pattern detected ({len(test_files)} test files, {len(test_functions)} test functions)",
|
|
456
|
+
file_paths=test_files[:10],
|
|
457
|
+
node_ids=test_functions[:10],
|
|
458
|
+
confidence=0.95,
|
|
459
|
+
))
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class SemanticIndex:
|
|
463
|
+
"""Semantic search index using embeddings (ChromaDB optional)"""
|
|
464
|
+
|
|
465
|
+
def __init__(self, index_path: Optional[Path] = None):
|
|
466
|
+
self.index_path = index_path
|
|
467
|
+
self._chroma_client = None
|
|
468
|
+
self._collection = None
|
|
469
|
+
self._fallback_index: Dict[str, Dict[str, Any]] = {} # Simple keyword-based fallback
|
|
470
|
+
self._lock = threading.Lock()
|
|
471
|
+
|
|
472
|
+
def _init_chroma(self) -> bool:
|
|
473
|
+
"""Initialize ChromaDB if available"""
|
|
474
|
+
if self._chroma_client is not None:
|
|
475
|
+
return self._collection is not None
|
|
476
|
+
|
|
477
|
+
try:
|
|
478
|
+
import chromadb
|
|
479
|
+
from chromadb.config import Settings
|
|
480
|
+
|
|
481
|
+
persist_dir = str(self.index_path) if self.index_path else None
|
|
482
|
+
if persist_dir:
|
|
483
|
+
self._chroma_client = chromadb.Client(Settings(
|
|
484
|
+
persist_directory=persist_dir,
|
|
485
|
+
anonymized_telemetry=False
|
|
486
|
+
))
|
|
487
|
+
else:
|
|
488
|
+
self._chroma_client = chromadb.Client()
|
|
489
|
+
|
|
490
|
+
self._collection = self._chroma_client.get_or_create_collection(
|
|
491
|
+
name="nc1709_codebase",
|
|
492
|
+
metadata={"hnsw:space": "cosine"}
|
|
493
|
+
)
|
|
494
|
+
logger.info("ChromaDB initialized for semantic search")
|
|
495
|
+
return True
|
|
496
|
+
except ImportError:
|
|
497
|
+
logger.info("ChromaDB not available, using fallback keyword search")
|
|
498
|
+
return False
|
|
499
|
+
except Exception as e:
|
|
500
|
+
logger.warning(f"Error initializing ChromaDB: {e}")
|
|
501
|
+
return False
|
|
502
|
+
|
|
503
|
+
def index_node(self, node: CodeNode, content: str) -> None:
|
|
504
|
+
"""Index a code node for semantic search"""
|
|
505
|
+
with self._lock:
|
|
506
|
+
# Create searchable text
|
|
507
|
+
searchable = f"{node.name} {node.docstring or ''} {node.signature or ''}"
|
|
508
|
+
|
|
509
|
+
if self._init_chroma() and self._collection:
|
|
510
|
+
try:
|
|
511
|
+
self._collection.upsert(
|
|
512
|
+
ids=[node.id],
|
|
513
|
+
documents=[searchable],
|
|
514
|
+
metadatas=[{
|
|
515
|
+
"name": node.name,
|
|
516
|
+
"type": node.node_type.value,
|
|
517
|
+
"file": node.file_path,
|
|
518
|
+
"line": node.line_start,
|
|
519
|
+
}]
|
|
520
|
+
)
|
|
521
|
+
except Exception as e:
|
|
522
|
+
logger.warning(f"Error indexing to ChromaDB: {e}")
|
|
523
|
+
self._fallback_index[node.id] = {
|
|
524
|
+
"text": searchable.lower(),
|
|
525
|
+
"node": node,
|
|
526
|
+
}
|
|
527
|
+
else:
|
|
528
|
+
# Fallback to simple keyword index
|
|
529
|
+
self._fallback_index[node.id] = {
|
|
530
|
+
"text": searchable.lower(),
|
|
531
|
+
"node": node,
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
def search(self, query: str, limit: int = 10) -> List[Tuple[CodeNode, float]]:
|
|
535
|
+
"""Search for nodes matching query"""
|
|
536
|
+
results = []
|
|
537
|
+
|
|
538
|
+
with self._lock:
|
|
539
|
+
if self._collection:
|
|
540
|
+
try:
|
|
541
|
+
search_results = self._collection.query(
|
|
542
|
+
query_texts=[query],
|
|
543
|
+
n_results=limit
|
|
544
|
+
)
|
|
545
|
+
if search_results and search_results.get("ids"):
|
|
546
|
+
for i, node_id in enumerate(search_results["ids"][0]):
|
|
547
|
+
distance = search_results["distances"][0][i] if search_results.get("distances") else 0.5
|
|
548
|
+
score = 1.0 - distance # Convert distance to similarity
|
|
549
|
+
# We'd need to fetch the actual node from storage
|
|
550
|
+
results.append((node_id, score))
|
|
551
|
+
except Exception as e:
|
|
552
|
+
logger.warning(f"ChromaDB search error: {e}")
|
|
553
|
+
|
|
554
|
+
# Fallback search
|
|
555
|
+
if not results:
|
|
556
|
+
query_lower = query.lower()
|
|
557
|
+
query_terms = query_lower.split()
|
|
558
|
+
|
|
559
|
+
for node_id, data in self._fallback_index.items():
|
|
560
|
+
text = data["text"]
|
|
561
|
+
# Simple scoring: count matching terms
|
|
562
|
+
matches = sum(1 for term in query_terms if term in text)
|
|
563
|
+
if matches > 0:
|
|
564
|
+
score = matches / len(query_terms)
|
|
565
|
+
results.append((data["node"], score))
|
|
566
|
+
|
|
567
|
+
results.sort(key=lambda x: x[1], reverse=True)
|
|
568
|
+
results = results[:limit]
|
|
569
|
+
|
|
570
|
+
return results
|
|
571
|
+
|
|
572
|
+
def clear(self) -> None:
|
|
573
|
+
"""Clear the index"""
|
|
574
|
+
with self._lock:
|
|
575
|
+
if self._collection:
|
|
576
|
+
try:
|
|
577
|
+
self._chroma_client.delete_collection("nc1709_codebase")
|
|
578
|
+
self._collection = self._chroma_client.get_or_create_collection(
|
|
579
|
+
name="nc1709_codebase",
|
|
580
|
+
metadata={"hnsw:space": "cosine"}
|
|
581
|
+
)
|
|
582
|
+
except Exception as e:
|
|
583
|
+
logger.warning(f"Error clearing ChromaDB: {e}")
|
|
584
|
+
|
|
585
|
+
self._fallback_index.clear()
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
class DeepContextEngine:
|
|
589
|
+
"""
|
|
590
|
+
Layer 2: Deep Context Engine
|
|
591
|
+
|
|
592
|
+
Provides semantic understanding of the codebase through:
|
|
593
|
+
- Code graph building and navigation
|
|
594
|
+
- Pattern detection
|
|
595
|
+
- Semantic search
|
|
596
|
+
- Context budgeting for LLM requests
|
|
597
|
+
"""
|
|
598
|
+
|
|
599
|
+
# File extensions to index
|
|
600
|
+
SUPPORTED_EXTENSIONS = {
|
|
601
|
+
".py": "python",
|
|
602
|
+
".js": "javascript",
|
|
603
|
+
".ts": "typescript",
|
|
604
|
+
".jsx": "javascript",
|
|
605
|
+
".tsx": "typescript",
|
|
606
|
+
".go": "go",
|
|
607
|
+
".rs": "rust",
|
|
608
|
+
".java": "java",
|
|
609
|
+
".cpp": "cpp",
|
|
610
|
+
".c": "c",
|
|
611
|
+
".h": "c",
|
|
612
|
+
".hpp": "cpp",
|
|
613
|
+
".rb": "ruby",
|
|
614
|
+
".php": "php",
|
|
615
|
+
".swift": "swift",
|
|
616
|
+
".kt": "kotlin",
|
|
617
|
+
".scala": "scala",
|
|
618
|
+
".cs": "csharp",
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
# Directories to skip
|
|
622
|
+
SKIP_DIRS = {
|
|
623
|
+
"__pycache__", ".git", ".svn", ".hg", "node_modules",
|
|
624
|
+
"venv", ".venv", "env", ".env", "dist", "build",
|
|
625
|
+
".idea", ".vscode", ".pytest_cache", ".mypy_cache",
|
|
626
|
+
"eggs", "*.egg-info", ".tox", "htmlcov",
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
def __init__(self, project_root: Optional[Path] = None, cache_dir: Optional[Path] = None):
|
|
630
|
+
self.project_root = project_root or Path.cwd()
|
|
631
|
+
self.cache_dir = cache_dir or (self.project_root / ".nc1709" / "context_cache")
|
|
632
|
+
|
|
633
|
+
# Core data structures
|
|
634
|
+
self.code_graph: Dict[str, CodeNode] = {}
|
|
635
|
+
self.file_contexts: Dict[str, FileContext] = {}
|
|
636
|
+
self.patterns: List[CodePattern] = []
|
|
637
|
+
|
|
638
|
+
# Components
|
|
639
|
+
self.pattern_detector = PatternDetector()
|
|
640
|
+
self.semantic_index = SemanticIndex(self.cache_dir / "semantic_index" if self.cache_dir else None)
|
|
641
|
+
|
|
642
|
+
# State
|
|
643
|
+
self._indexed = False
|
|
644
|
+
self._lock = threading.Lock()
|
|
645
|
+
self._file_hashes: Dict[str, str] = {} # Track file changes
|
|
646
|
+
|
|
647
|
+
def _should_skip_dir(self, dir_name: str) -> bool:
|
|
648
|
+
"""Check if directory should be skipped"""
|
|
649
|
+
return dir_name in self.SKIP_DIRS or dir_name.startswith('.')
|
|
650
|
+
|
|
651
|
+
def _get_file_hash(self, file_path: Path) -> str:
|
|
652
|
+
"""Get hash of file contents for change detection"""
|
|
653
|
+
try:
|
|
654
|
+
content = file_path.read_bytes()
|
|
655
|
+
return hashlib.md5(content).hexdigest()
|
|
656
|
+
except Exception:
|
|
657
|
+
return ""
|
|
658
|
+
|
|
659
|
+
def _get_language(self, file_path: Path) -> Optional[str]:
|
|
660
|
+
"""Get language from file extension"""
|
|
661
|
+
return self.SUPPORTED_EXTENSIONS.get(file_path.suffix.lower())
|
|
662
|
+
|
|
663
|
+
def index_file(self, file_path: Path, force: bool = False) -> Optional[FileContext]:
|
|
664
|
+
"""Index a single file"""
|
|
665
|
+
str_path = str(file_path)
|
|
666
|
+
|
|
667
|
+
# Check if file has changed
|
|
668
|
+
current_hash = self._get_file_hash(file_path)
|
|
669
|
+
if not force and str_path in self._file_hashes:
|
|
670
|
+
if self._file_hashes[str_path] == current_hash:
|
|
671
|
+
return self.file_contexts.get(str_path)
|
|
672
|
+
|
|
673
|
+
language = self._get_language(file_path)
|
|
674
|
+
if not language:
|
|
675
|
+
return None
|
|
676
|
+
|
|
677
|
+
try:
|
|
678
|
+
content = file_path.read_text(encoding='utf-8', errors='ignore')
|
|
679
|
+
lines = content.splitlines()
|
|
680
|
+
|
|
681
|
+
# Build code graph for Python files
|
|
682
|
+
nodes: Dict[str, CodeNode] = {}
|
|
683
|
+
imports: List[str] = []
|
|
684
|
+
exports: List[str] = []
|
|
685
|
+
|
|
686
|
+
if language == "python":
|
|
687
|
+
builder = CodeGraphBuilder(str_path, content)
|
|
688
|
+
nodes, imports, exports = builder.build()
|
|
689
|
+
|
|
690
|
+
# Add nodes to global graph
|
|
691
|
+
with self._lock:
|
|
692
|
+
self.code_graph.update(nodes)
|
|
693
|
+
|
|
694
|
+
# Index nodes for semantic search
|
|
695
|
+
for node in nodes.values():
|
|
696
|
+
self.semantic_index.index_node(node, content)
|
|
697
|
+
|
|
698
|
+
# Create file context
|
|
699
|
+
file_context = FileContext(
|
|
700
|
+
file_path=str_path,
|
|
701
|
+
language=language,
|
|
702
|
+
size_bytes=len(content.encode('utf-8')),
|
|
703
|
+
line_count=len(lines),
|
|
704
|
+
last_modified=datetime.fromtimestamp(file_path.stat().st_mtime),
|
|
705
|
+
content_hash=current_hash,
|
|
706
|
+
imports=imports,
|
|
707
|
+
exports=exports,
|
|
708
|
+
node_ids=list(nodes.keys()),
|
|
709
|
+
)
|
|
710
|
+
|
|
711
|
+
with self._lock:
|
|
712
|
+
self.file_contexts[str_path] = file_context
|
|
713
|
+
self._file_hashes[str_path] = current_hash
|
|
714
|
+
|
|
715
|
+
return file_context
|
|
716
|
+
|
|
717
|
+
except Exception as e:
|
|
718
|
+
logger.warning(f"Error indexing {file_path}: {e}")
|
|
719
|
+
return None
|
|
720
|
+
|
|
721
|
+
def index_project(self, incremental: bool = True) -> Dict[str, Any]:
|
|
722
|
+
"""
|
|
723
|
+
Index the entire project
|
|
724
|
+
|
|
725
|
+
Args:
|
|
726
|
+
incremental: If True, only index changed files
|
|
727
|
+
|
|
728
|
+
Returns:
|
|
729
|
+
Statistics about the indexing
|
|
730
|
+
"""
|
|
731
|
+
stats = {
|
|
732
|
+
"files_scanned": 0,
|
|
733
|
+
"files_indexed": 0,
|
|
734
|
+
"files_skipped": 0,
|
|
735
|
+
"nodes_created": 0,
|
|
736
|
+
"patterns_detected": 0,
|
|
737
|
+
"errors": 0,
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
logger.info(f"Starting project indexing: {self.project_root}")
|
|
741
|
+
|
|
742
|
+
for root, dirs, files in os.walk(self.project_root):
|
|
743
|
+
# Filter out directories to skip
|
|
744
|
+
dirs[:] = [d for d in dirs if not self._should_skip_dir(d)]
|
|
745
|
+
|
|
746
|
+
for file_name in files:
|
|
747
|
+
file_path = Path(root) / file_name
|
|
748
|
+
stats["files_scanned"] += 1
|
|
749
|
+
|
|
750
|
+
if self._get_language(file_path):
|
|
751
|
+
result = self.index_file(file_path, force=not incremental)
|
|
752
|
+
if result:
|
|
753
|
+
stats["files_indexed"] += 1
|
|
754
|
+
stats["nodes_created"] += len(result.node_ids)
|
|
755
|
+
else:
|
|
756
|
+
stats["errors"] += 1
|
|
757
|
+
else:
|
|
758
|
+
stats["files_skipped"] += 1
|
|
759
|
+
|
|
760
|
+
# Detect patterns
|
|
761
|
+
self.patterns = self.pattern_detector.detect_patterns(self.code_graph, self.file_contexts)
|
|
762
|
+
stats["patterns_detected"] = len(self.patterns)
|
|
763
|
+
|
|
764
|
+
self._indexed = True
|
|
765
|
+
logger.info(f"Indexing complete: {stats}")
|
|
766
|
+
|
|
767
|
+
return stats
|
|
768
|
+
|
|
769
|
+
def search_code(self, query: str, limit: int = 10) -> List[Tuple[CodeNode, float]]:
|
|
770
|
+
"""
|
|
771
|
+
Search for code matching the query
|
|
772
|
+
|
|
773
|
+
Args:
|
|
774
|
+
query: Search query
|
|
775
|
+
limit: Maximum results
|
|
776
|
+
|
|
777
|
+
Returns:
|
|
778
|
+
List of (CodeNode, score) tuples
|
|
779
|
+
"""
|
|
780
|
+
return self.semantic_index.search(query, limit)
|
|
781
|
+
|
|
782
|
+
def get_file_context(self, file_path: str) -> Optional[FileContext]:
|
|
783
|
+
"""Get context for a specific file"""
|
|
784
|
+
return self.file_contexts.get(file_path)
|
|
785
|
+
|
|
786
|
+
def get_node(self, node_id: str) -> Optional[CodeNode]:
|
|
787
|
+
"""Get a specific code node by ID"""
|
|
788
|
+
return self.code_graph.get(node_id)
|
|
789
|
+
|
|
790
|
+
def get_related_nodes(self, node_id: str, depth: int = 1) -> List[CodeNode]:
|
|
791
|
+
"""
|
|
792
|
+
Get nodes related to the given node
|
|
793
|
+
|
|
794
|
+
Args:
|
|
795
|
+
node_id: Starting node ID
|
|
796
|
+
depth: How many levels of relationships to follow
|
|
797
|
+
|
|
798
|
+
Returns:
|
|
799
|
+
List of related CodeNodes
|
|
800
|
+
"""
|
|
801
|
+
if node_id not in self.code_graph:
|
|
802
|
+
return []
|
|
803
|
+
|
|
804
|
+
related = set()
|
|
805
|
+
to_visit = [(node_id, 0)]
|
|
806
|
+
visited = set()
|
|
807
|
+
|
|
808
|
+
while to_visit:
|
|
809
|
+
current_id, current_depth = to_visit.pop(0)
|
|
810
|
+
|
|
811
|
+
if current_id in visited or current_depth > depth:
|
|
812
|
+
continue
|
|
813
|
+
|
|
814
|
+
visited.add(current_id)
|
|
815
|
+
node = self.code_graph.get(current_id)
|
|
816
|
+
|
|
817
|
+
if node and current_id != node_id:
|
|
818
|
+
related.add(current_id)
|
|
819
|
+
|
|
820
|
+
if node and current_depth < depth:
|
|
821
|
+
# Add children
|
|
822
|
+
for child_id in node.children_ids:
|
|
823
|
+
if child_id not in visited:
|
|
824
|
+
to_visit.append((child_id, current_depth + 1))
|
|
825
|
+
|
|
826
|
+
# Add parent
|
|
827
|
+
if node.parent_id and node.parent_id not in visited:
|
|
828
|
+
to_visit.append((node.parent_id, current_depth + 1))
|
|
829
|
+
|
|
830
|
+
# Add references
|
|
831
|
+
for ref_id in node.references:
|
|
832
|
+
if ref_id not in visited:
|
|
833
|
+
to_visit.append((ref_id, current_depth + 1))
|
|
834
|
+
|
|
835
|
+
return [self.code_graph[nid] for nid in related if nid in self.code_graph]
|
|
836
|
+
|
|
837
|
+
def get_dependencies(self, file_path: str) -> List[str]:
|
|
838
|
+
"""Get files that this file depends on"""
|
|
839
|
+
context = self.file_contexts.get(file_path)
|
|
840
|
+
if not context:
|
|
841
|
+
return []
|
|
842
|
+
|
|
843
|
+
dependencies = []
|
|
844
|
+
for imp in context.imports:
|
|
845
|
+
# Try to resolve import to a file in the project
|
|
846
|
+
parts = imp.split('.')
|
|
847
|
+
for i in range(len(parts), 0, -1):
|
|
848
|
+
possible_path = self.project_root / '/'.join(parts[:i])
|
|
849
|
+
if possible_path.with_suffix('.py').exists():
|
|
850
|
+
dependencies.append(str(possible_path.with_suffix('.py')))
|
|
851
|
+
break
|
|
852
|
+
if (possible_path / '__init__.py').exists():
|
|
853
|
+
dependencies.append(str(possible_path / '__init__.py'))
|
|
854
|
+
break
|
|
855
|
+
|
|
856
|
+
return dependencies
|
|
857
|
+
|
|
858
|
+
def get_dependents(self, file_path: str) -> List[str]:
|
|
859
|
+
"""Get files that depend on this file"""
|
|
860
|
+
module_name = Path(file_path).stem
|
|
861
|
+
dependents = []
|
|
862
|
+
|
|
863
|
+
for ctx_path, ctx in self.file_contexts.items():
|
|
864
|
+
if ctx_path != file_path:
|
|
865
|
+
for imp in ctx.imports:
|
|
866
|
+
if module_name in imp:
|
|
867
|
+
dependents.append(ctx_path)
|
|
868
|
+
break
|
|
869
|
+
|
|
870
|
+
return dependents
|
|
871
|
+
|
|
872
|
+
def build_context_for_task(
|
|
873
|
+
self,
|
|
874
|
+
task_description: str,
|
|
875
|
+
target_files: Optional[List[str]] = None,
|
|
876
|
+
budget: Optional[ContextBudget] = None
|
|
877
|
+
) -> Dict[str, Any]:
|
|
878
|
+
"""
|
|
879
|
+
Build optimized context for a task
|
|
880
|
+
|
|
881
|
+
Args:
|
|
882
|
+
task_description: What the user is trying to do
|
|
883
|
+
target_files: Specific files to include
|
|
884
|
+
budget: Token budget allocation
|
|
885
|
+
|
|
886
|
+
Returns:
|
|
887
|
+
Context dict with relevant code, patterns, and metadata
|
|
888
|
+
"""
|
|
889
|
+
budget = budget or ContextBudget()
|
|
890
|
+
context = {
|
|
891
|
+
"files": [],
|
|
892
|
+
"nodes": [],
|
|
893
|
+
"patterns": [],
|
|
894
|
+
"dependencies": [],
|
|
895
|
+
"summary": "",
|
|
896
|
+
"tokens_used": 0,
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
# Search for relevant code
|
|
900
|
+
search_results = self.search_code(task_description, limit=20)
|
|
901
|
+
|
|
902
|
+
# Add target files first
|
|
903
|
+
if target_files:
|
|
904
|
+
for file_path in target_files:
|
|
905
|
+
if file_path in self.file_contexts:
|
|
906
|
+
context["files"].append(self.file_contexts[file_path])
|
|
907
|
+
# Add file's dependencies
|
|
908
|
+
context["dependencies"].extend(self.get_dependencies(file_path))
|
|
909
|
+
|
|
910
|
+
# Add relevant nodes from search
|
|
911
|
+
for node, score in search_results:
|
|
912
|
+
if isinstance(node, CodeNode):
|
|
913
|
+
context["nodes"].append({
|
|
914
|
+
"node": node.to_dict(),
|
|
915
|
+
"relevance": score,
|
|
916
|
+
})
|
|
917
|
+
|
|
918
|
+
# Add relevant patterns
|
|
919
|
+
for pattern in self.patterns:
|
|
920
|
+
# Check if pattern is relevant to target files or search results
|
|
921
|
+
if target_files:
|
|
922
|
+
if any(tf in pattern.file_paths for tf in target_files):
|
|
923
|
+
context["patterns"].append({
|
|
924
|
+
"type": pattern.pattern_type,
|
|
925
|
+
"description": pattern.description,
|
|
926
|
+
"confidence": pattern.confidence,
|
|
927
|
+
})
|
|
928
|
+
elif pattern.confidence > 0.7:
|
|
929
|
+
context["patterns"].append({
|
|
930
|
+
"type": pattern.pattern_type,
|
|
931
|
+
"description": pattern.description,
|
|
932
|
+
"confidence": pattern.confidence,
|
|
933
|
+
})
|
|
934
|
+
|
|
935
|
+
# Generate summary
|
|
936
|
+
context["summary"] = self._generate_context_summary(context)
|
|
937
|
+
|
|
938
|
+
return context
|
|
939
|
+
|
|
940
|
+
def _generate_context_summary(self, context: Dict[str, Any]) -> str:
|
|
941
|
+
"""Generate a brief summary of the context"""
|
|
942
|
+
parts = []
|
|
943
|
+
|
|
944
|
+
if context["files"]:
|
|
945
|
+
parts.append(f"{len(context['files'])} relevant files")
|
|
946
|
+
|
|
947
|
+
if context["nodes"]:
|
|
948
|
+
parts.append(f"{len(context['nodes'])} code elements")
|
|
949
|
+
|
|
950
|
+
if context["patterns"]:
|
|
951
|
+
pattern_types = set(p["type"] for p in context["patterns"])
|
|
952
|
+
parts.append(f"patterns detected: {', '.join(pattern_types)}")
|
|
953
|
+
|
|
954
|
+
if context["dependencies"]:
|
|
955
|
+
parts.append(f"{len(context['dependencies'])} dependencies")
|
|
956
|
+
|
|
957
|
+
return "; ".join(parts) if parts else "No context available"
|
|
958
|
+
|
|
959
|
+
def get_project_summary(self) -> Dict[str, Any]:
|
|
960
|
+
"""Get a summary of the indexed project"""
|
|
961
|
+
if not self._indexed:
|
|
962
|
+
return {"error": "Project not indexed. Call index_project() first."}
|
|
963
|
+
|
|
964
|
+
# Count by type
|
|
965
|
+
type_counts = {}
|
|
966
|
+
for node in self.code_graph.values():
|
|
967
|
+
type_name = node.node_type.value
|
|
968
|
+
type_counts[type_name] = type_counts.get(type_name, 0) + 1
|
|
969
|
+
|
|
970
|
+
# Language distribution
|
|
971
|
+
lang_counts = {}
|
|
972
|
+
total_lines = 0
|
|
973
|
+
for ctx in self.file_contexts.values():
|
|
974
|
+
lang_counts[ctx.language] = lang_counts.get(ctx.language, 0) + 1
|
|
975
|
+
total_lines += ctx.line_count
|
|
976
|
+
|
|
977
|
+
return {
|
|
978
|
+
"project_root": str(self.project_root),
|
|
979
|
+
"files_indexed": len(self.file_contexts),
|
|
980
|
+
"total_lines": total_lines,
|
|
981
|
+
"code_elements": type_counts,
|
|
982
|
+
"languages": lang_counts,
|
|
983
|
+
"patterns": [{"type": p.pattern_type, "description": p.description} for p in self.patterns],
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
def save_cache(self) -> None:
|
|
987
|
+
"""Save context cache to disk"""
|
|
988
|
+
if not self.cache_dir:
|
|
989
|
+
return
|
|
990
|
+
|
|
991
|
+
try:
|
|
992
|
+
self.cache_dir.mkdir(parents=True, exist_ok=True)
|
|
993
|
+
|
|
994
|
+
# Save code graph
|
|
995
|
+
graph_data = {nid: node.to_dict() for nid, node in self.code_graph.items()}
|
|
996
|
+
with open(self.cache_dir / "code_graph.json", "w") as f:
|
|
997
|
+
json.dump(graph_data, f)
|
|
998
|
+
|
|
999
|
+
# Save file hashes
|
|
1000
|
+
with open(self.cache_dir / "file_hashes.json", "w") as f:
|
|
1001
|
+
json.dump(self._file_hashes, f)
|
|
1002
|
+
|
|
1003
|
+
logger.info(f"Context cache saved to {self.cache_dir}")
|
|
1004
|
+
|
|
1005
|
+
except Exception as e:
|
|
1006
|
+
logger.warning(f"Error saving context cache: {e}")
|
|
1007
|
+
|
|
1008
|
+
def load_cache(self) -> bool:
|
|
1009
|
+
"""Load context cache from disk"""
|
|
1010
|
+
if not self.cache_dir or not self.cache_dir.exists():
|
|
1011
|
+
return False
|
|
1012
|
+
|
|
1013
|
+
try:
|
|
1014
|
+
# Load code graph
|
|
1015
|
+
graph_path = self.cache_dir / "code_graph.json"
|
|
1016
|
+
if graph_path.exists():
|
|
1017
|
+
with open(graph_path) as f:
|
|
1018
|
+
graph_data = json.load(f)
|
|
1019
|
+
self.code_graph = {nid: CodeNode.from_dict(data) for nid, data in graph_data.items()}
|
|
1020
|
+
|
|
1021
|
+
# Load file hashes
|
|
1022
|
+
hashes_path = self.cache_dir / "file_hashes.json"
|
|
1023
|
+
if hashes_path.exists():
|
|
1024
|
+
with open(hashes_path) as f:
|
|
1025
|
+
self._file_hashes = json.load(f)
|
|
1026
|
+
|
|
1027
|
+
logger.info(f"Context cache loaded from {self.cache_dir}")
|
|
1028
|
+
return True
|
|
1029
|
+
|
|
1030
|
+
except Exception as e:
|
|
1031
|
+
logger.warning(f"Error loading context cache: {e}")
|
|
1032
|
+
return False
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
# Convenience function for quick context building
|
|
1036
|
+
def get_context_engine(project_root: Optional[Path] = None) -> DeepContextEngine:
|
|
1037
|
+
"""Get or create a context engine instance"""
|
|
1038
|
+
return DeepContextEngine(project_root)
|
|
1039
|
+
|
|
1040
|
+
|
|
1041
|
+
def quick_context(task: str, files: Optional[List[str]] = None) -> Dict[str, Any]:
|
|
1042
|
+
"""Quickly build context for a task"""
|
|
1043
|
+
engine = get_context_engine()
|
|
1044
|
+
if not engine._indexed:
|
|
1045
|
+
engine.index_project()
|
|
1046
|
+
return engine.build_context_for_task(task, files)
|