nc1709 1.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. nc1709/__init__.py +13 -0
  2. nc1709/agent/__init__.py +36 -0
  3. nc1709/agent/core.py +505 -0
  4. nc1709/agent/mcp_bridge.py +245 -0
  5. nc1709/agent/permissions.py +298 -0
  6. nc1709/agent/tools/__init__.py +21 -0
  7. nc1709/agent/tools/base.py +440 -0
  8. nc1709/agent/tools/bash_tool.py +367 -0
  9. nc1709/agent/tools/file_tools.py +454 -0
  10. nc1709/agent/tools/notebook_tools.py +516 -0
  11. nc1709/agent/tools/search_tools.py +322 -0
  12. nc1709/agent/tools/task_tool.py +284 -0
  13. nc1709/agent/tools/web_tools.py +555 -0
  14. nc1709/agents/__init__.py +17 -0
  15. nc1709/agents/auto_fix.py +506 -0
  16. nc1709/agents/test_generator.py +507 -0
  17. nc1709/checkpoints.py +372 -0
  18. nc1709/cli.py +3380 -0
  19. nc1709/cli_ui.py +1080 -0
  20. nc1709/cognitive/__init__.py +149 -0
  21. nc1709/cognitive/anticipation.py +594 -0
  22. nc1709/cognitive/context_engine.py +1046 -0
  23. nc1709/cognitive/council.py +824 -0
  24. nc1709/cognitive/learning.py +761 -0
  25. nc1709/cognitive/router.py +583 -0
  26. nc1709/cognitive/system.py +519 -0
  27. nc1709/config.py +155 -0
  28. nc1709/custom_commands.py +300 -0
  29. nc1709/executor.py +333 -0
  30. nc1709/file_controller.py +354 -0
  31. nc1709/git_integration.py +308 -0
  32. nc1709/github_integration.py +477 -0
  33. nc1709/image_input.py +446 -0
  34. nc1709/linting.py +519 -0
  35. nc1709/llm_adapter.py +667 -0
  36. nc1709/logger.py +192 -0
  37. nc1709/mcp/__init__.py +18 -0
  38. nc1709/mcp/client.py +370 -0
  39. nc1709/mcp/manager.py +407 -0
  40. nc1709/mcp/protocol.py +210 -0
  41. nc1709/mcp/server.py +473 -0
  42. nc1709/memory/__init__.py +20 -0
  43. nc1709/memory/embeddings.py +325 -0
  44. nc1709/memory/indexer.py +474 -0
  45. nc1709/memory/sessions.py +432 -0
  46. nc1709/memory/vector_store.py +451 -0
  47. nc1709/models/__init__.py +86 -0
  48. nc1709/models/detector.py +377 -0
  49. nc1709/models/formats.py +315 -0
  50. nc1709/models/manager.py +438 -0
  51. nc1709/models/registry.py +497 -0
  52. nc1709/performance/__init__.py +343 -0
  53. nc1709/performance/cache.py +705 -0
  54. nc1709/performance/pipeline.py +611 -0
  55. nc1709/performance/tiering.py +543 -0
  56. nc1709/plan_mode.py +362 -0
  57. nc1709/plugins/__init__.py +17 -0
  58. nc1709/plugins/agents/__init__.py +18 -0
  59. nc1709/plugins/agents/django_agent.py +912 -0
  60. nc1709/plugins/agents/docker_agent.py +623 -0
  61. nc1709/plugins/agents/fastapi_agent.py +887 -0
  62. nc1709/plugins/agents/git_agent.py +731 -0
  63. nc1709/plugins/agents/nextjs_agent.py +867 -0
  64. nc1709/plugins/base.py +359 -0
  65. nc1709/plugins/manager.py +411 -0
  66. nc1709/plugins/registry.py +337 -0
  67. nc1709/progress.py +443 -0
  68. nc1709/prompts/__init__.py +22 -0
  69. nc1709/prompts/agent_system.py +180 -0
  70. nc1709/prompts/task_prompts.py +340 -0
  71. nc1709/prompts/unified_prompt.py +133 -0
  72. nc1709/reasoning_engine.py +541 -0
  73. nc1709/remote_client.py +266 -0
  74. nc1709/shell_completions.py +349 -0
  75. nc1709/slash_commands.py +649 -0
  76. nc1709/task_classifier.py +408 -0
  77. nc1709/version_check.py +177 -0
  78. nc1709/web/__init__.py +8 -0
  79. nc1709/web/server.py +950 -0
  80. nc1709/web/templates/index.html +1127 -0
  81. nc1709-1.15.4.dist-info/METADATA +858 -0
  82. nc1709-1.15.4.dist-info/RECORD +86 -0
  83. nc1709-1.15.4.dist-info/WHEEL +5 -0
  84. nc1709-1.15.4.dist-info/entry_points.txt +2 -0
  85. nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
  86. nc1709-1.15.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1046 @@
1
+ """
2
+ Layer 2: Deep Context Engine
3
+
4
+ Provides semantic understanding of the codebase through:
5
+ - AST analysis and code graph building
6
+ - Call graph and dependency mapping
7
+ - Pattern detection and recognition
8
+ - Semantic search via embeddings (ChromaDB)
9
+ - Incremental indexing for large codebases
10
+
11
+ This layer answers: "What does NC1709 know about this codebase?"
12
+ """
13
+
14
+ import os
15
+ import ast
16
+ import hashlib
17
+ import json
18
+ import logging
19
+ from dataclasses import dataclass, field
20
+ from typing import Dict, List, Optional, Set, Any, Tuple
21
+ from pathlib import Path
22
+ from enum import Enum
23
+ from datetime import datetime
24
+ import threading
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ class NodeType(Enum):
30
+ """Types of nodes in the code graph"""
31
+ MODULE = "module"
32
+ CLASS = "class"
33
+ FUNCTION = "function"
34
+ METHOD = "method"
35
+ VARIABLE = "variable"
36
+ IMPORT = "import"
37
+ CONSTANT = "constant"
38
+
39
+
40
+ @dataclass
41
+ class CodeNode:
42
+ """A node in the code graph representing a code element"""
43
+ id: str # Unique identifier (file:line:name)
44
+ name: str
45
+ node_type: NodeType
46
+ file_path: str
47
+ line_start: int
48
+ line_end: int
49
+ docstring: Optional[str] = None
50
+ signature: Optional[str] = None
51
+ parent_id: Optional[str] = None
52
+ children_ids: List[str] = field(default_factory=list)
53
+ references: List[str] = field(default_factory=list) # What this node references
54
+ referenced_by: List[str] = field(default_factory=list) # What references this node
55
+ metadata: Dict[str, Any] = field(default_factory=dict)
56
+
57
+ def to_dict(self) -> Dict[str, Any]:
58
+ """Convert to dictionary for serialization"""
59
+ return {
60
+ "id": self.id,
61
+ "name": self.name,
62
+ "node_type": self.node_type.value,
63
+ "file_path": self.file_path,
64
+ "line_start": self.line_start,
65
+ "line_end": self.line_end,
66
+ "docstring": self.docstring,
67
+ "signature": self.signature,
68
+ "parent_id": self.parent_id,
69
+ "children_ids": self.children_ids,
70
+ "references": self.references,
71
+ "referenced_by": self.referenced_by,
72
+ "metadata": self.metadata,
73
+ }
74
+
75
+ @classmethod
76
+ def from_dict(cls, data: Dict[str, Any]) -> "CodeNode":
77
+ """Create from dictionary"""
78
+ return cls(
79
+ id=data["id"],
80
+ name=data["name"],
81
+ node_type=NodeType(data["node_type"]),
82
+ file_path=data["file_path"],
83
+ line_start=data["line_start"],
84
+ line_end=data["line_end"],
85
+ docstring=data.get("docstring"),
86
+ signature=data.get("signature"),
87
+ parent_id=data.get("parent_id"),
88
+ children_ids=data.get("children_ids", []),
89
+ references=data.get("references", []),
90
+ referenced_by=data.get("referenced_by", []),
91
+ metadata=data.get("metadata", {}),
92
+ )
93
+
94
+
95
+ @dataclass
96
+ class CodePattern:
97
+ """A detected pattern in the codebase"""
98
+ pattern_type: str # e.g., "singleton", "factory", "decorator", "error_handling"
99
+ description: str
100
+ file_paths: List[str]
101
+ node_ids: List[str]
102
+ confidence: float # 0.0 to 1.0
103
+ examples: List[str] = field(default_factory=list)
104
+ metadata: Dict[str, Any] = field(default_factory=dict)
105
+
106
+
107
+ @dataclass
108
+ class FileContext:
109
+ """Context information for a single file"""
110
+ file_path: str
111
+ language: str
112
+ size_bytes: int
113
+ line_count: int
114
+ last_modified: datetime
115
+ content_hash: str
116
+ imports: List[str] = field(default_factory=list)
117
+ exports: List[str] = field(default_factory=list)
118
+ dependencies: List[str] = field(default_factory=list)
119
+ node_ids: List[str] = field(default_factory=list)
120
+ summary: Optional[str] = None
121
+
122
+
123
+ @dataclass
124
+ class ContextBudget:
125
+ """Budget allocation for context in a request"""
126
+ max_tokens: int = 8000
127
+ file_context_tokens: int = 3000
128
+ code_graph_tokens: int = 2000
129
+ pattern_tokens: int = 1000
130
+ history_tokens: int = 2000
131
+
132
+ def remaining(self, used: int) -> int:
133
+ """Calculate remaining tokens"""
134
+ return max(0, self.max_tokens - used)
135
+
136
+
137
+ class CodeGraphBuilder(ast.NodeVisitor):
138
+ """Builds a code graph from Python AST"""
139
+
140
+ def __init__(self, file_path: str, source_code: str):
141
+ self.file_path = file_path
142
+ self.source_code = source_code
143
+ self.source_lines = source_code.splitlines()
144
+ self.nodes: Dict[str, CodeNode] = {}
145
+ self.current_parent: Optional[str] = None
146
+ self.imports: List[str] = []
147
+ self.exports: List[str] = []
148
+
149
+ def _make_id(self, name: str, line: int) -> str:
150
+ """Create unique node ID"""
151
+ return f"{self.file_path}:{line}:{name}"
152
+
153
+ def _get_docstring(self, node: ast.AST) -> Optional[str]:
154
+ """Extract docstring from node if present"""
155
+ try:
156
+ return ast.get_docstring(node)
157
+ except Exception:
158
+ return None
159
+
160
+ def _get_signature(self, node: ast.FunctionDef) -> str:
161
+ """Extract function signature"""
162
+ args = []
163
+ for arg in node.args.args:
164
+ arg_str = arg.arg
165
+ if arg.annotation:
166
+ try:
167
+ arg_str += f": {ast.unparse(arg.annotation)}"
168
+ except Exception:
169
+ pass
170
+ args.append(arg_str)
171
+
172
+ # Add *args and **kwargs
173
+ if node.args.vararg:
174
+ args.append(f"*{node.args.vararg.arg}")
175
+ if node.args.kwarg:
176
+ args.append(f"**{node.args.kwarg.arg}")
177
+
178
+ returns = ""
179
+ if node.returns:
180
+ try:
181
+ returns = f" -> {ast.unparse(node.returns)}"
182
+ except Exception:
183
+ pass
184
+
185
+ return f"def {node.name}({', '.join(args)}){returns}"
186
+
187
+ def visit_Module(self, node: ast.Module) -> None:
188
+ """Visit module node"""
189
+ module_name = Path(self.file_path).stem
190
+ module_id = self._make_id(module_name, 1)
191
+
192
+ self.nodes[module_id] = CodeNode(
193
+ id=module_id,
194
+ name=module_name,
195
+ node_type=NodeType.MODULE,
196
+ file_path=self.file_path,
197
+ line_start=1,
198
+ line_end=len(self.source_lines),
199
+ docstring=self._get_docstring(node),
200
+ )
201
+
202
+ old_parent = self.current_parent
203
+ self.current_parent = module_id
204
+ self.generic_visit(node)
205
+ self.current_parent = old_parent
206
+
207
+ def visit_ClassDef(self, node: ast.ClassDef) -> None:
208
+ """Visit class definition"""
209
+ class_id = self._make_id(node.name, node.lineno)
210
+
211
+ # Get base classes
212
+ bases = []
213
+ for base in node.bases:
214
+ try:
215
+ bases.append(ast.unparse(base))
216
+ except Exception:
217
+ pass
218
+
219
+ self.nodes[class_id] = CodeNode(
220
+ id=class_id,
221
+ name=node.name,
222
+ node_type=NodeType.CLASS,
223
+ file_path=self.file_path,
224
+ line_start=node.lineno,
225
+ line_end=node.end_lineno or node.lineno,
226
+ docstring=self._get_docstring(node),
227
+ parent_id=self.current_parent,
228
+ metadata={"bases": bases, "decorators": [d.id if hasattr(d, 'id') else str(d) for d in node.decorator_list]},
229
+ )
230
+
231
+ # Add to parent's children
232
+ if self.current_parent and self.current_parent in self.nodes:
233
+ self.nodes[self.current_parent].children_ids.append(class_id)
234
+
235
+ # Track exports
236
+ if not node.name.startswith('_'):
237
+ self.exports.append(node.name)
238
+
239
+ old_parent = self.current_parent
240
+ self.current_parent = class_id
241
+ self.generic_visit(node)
242
+ self.current_parent = old_parent
243
+
244
+ def visit_FunctionDef(self, node: ast.FunctionDef) -> None:
245
+ """Visit function/method definition"""
246
+ func_id = self._make_id(node.name, node.lineno)
247
+
248
+ # Determine if method or function
249
+ parent_node = self.nodes.get(self.current_parent) if self.current_parent else None
250
+ is_method = parent_node and parent_node.node_type == NodeType.CLASS
251
+
252
+ self.nodes[func_id] = CodeNode(
253
+ id=func_id,
254
+ name=node.name,
255
+ node_type=NodeType.METHOD if is_method else NodeType.FUNCTION,
256
+ file_path=self.file_path,
257
+ line_start=node.lineno,
258
+ line_end=node.end_lineno or node.lineno,
259
+ docstring=self._get_docstring(node),
260
+ signature=self._get_signature(node),
261
+ parent_id=self.current_parent,
262
+ metadata={"decorators": [d.id if hasattr(d, 'id') else str(d) for d in node.decorator_list]},
263
+ )
264
+
265
+ # Add to parent's children
266
+ if self.current_parent and self.current_parent in self.nodes:
267
+ self.nodes[self.current_parent].children_ids.append(func_id)
268
+
269
+ # Track exports
270
+ if not node.name.startswith('_') and not is_method:
271
+ self.exports.append(node.name)
272
+
273
+ old_parent = self.current_parent
274
+ self.current_parent = func_id
275
+ self.generic_visit(node)
276
+ self.current_parent = old_parent
277
+
278
+ visit_AsyncFunctionDef = visit_FunctionDef
279
+
280
+ def visit_Import(self, node: ast.Import) -> None:
281
+ """Visit import statement"""
282
+ for alias in node.names:
283
+ self.imports.append(alias.name)
284
+ import_id = self._make_id(f"import_{alias.name}", node.lineno)
285
+ self.nodes[import_id] = CodeNode(
286
+ id=import_id,
287
+ name=alias.name,
288
+ node_type=NodeType.IMPORT,
289
+ file_path=self.file_path,
290
+ line_start=node.lineno,
291
+ line_end=node.lineno,
292
+ parent_id=self.current_parent,
293
+ metadata={"alias": alias.asname},
294
+ )
295
+ self.generic_visit(node)
296
+
297
+ def visit_ImportFrom(self, node: ast.ImportFrom) -> None:
298
+ """Visit from ... import statement"""
299
+ module = node.module or ""
300
+ for alias in node.names:
301
+ full_import = f"{module}.{alias.name}" if module else alias.name
302
+ self.imports.append(full_import)
303
+ import_id = self._make_id(f"import_{full_import}", node.lineno)
304
+ self.nodes[import_id] = CodeNode(
305
+ id=import_id,
306
+ name=alias.name,
307
+ node_type=NodeType.IMPORT,
308
+ file_path=self.file_path,
309
+ line_start=node.lineno,
310
+ line_end=node.lineno,
311
+ parent_id=self.current_parent,
312
+ metadata={"module": module, "alias": alias.asname},
313
+ )
314
+ self.generic_visit(node)
315
+
316
+ def build(self) -> Tuple[Dict[str, CodeNode], List[str], List[str]]:
317
+ """Build the code graph and return nodes, imports, exports"""
318
+ try:
319
+ tree = ast.parse(self.source_code)
320
+ self.visit(tree)
321
+ except SyntaxError as e:
322
+ logger.warning(f"Syntax error parsing {self.file_path}: {e}")
323
+ except Exception as e:
324
+ logger.warning(f"Error parsing {self.file_path}: {e}")
325
+
326
+ return self.nodes, self.imports, self.exports
327
+
328
+
329
+ class PatternDetector:
330
+ """Detects common code patterns in the codebase"""
331
+
332
+ def __init__(self):
333
+ self.patterns: List[CodePattern] = []
334
+
335
+ def detect_patterns(self, nodes: Dict[str, CodeNode], file_contexts: Dict[str, FileContext]) -> List[CodePattern]:
336
+ """Detect patterns across the codebase"""
337
+ self.patterns = []
338
+
339
+ # Detect singleton pattern
340
+ self._detect_singleton(nodes)
341
+
342
+ # Detect factory pattern
343
+ self._detect_factory(nodes)
344
+
345
+ # Detect decorator pattern
346
+ self._detect_decorator_usage(nodes)
347
+
348
+ # Detect error handling patterns
349
+ self._detect_error_handling(nodes)
350
+
351
+ # Detect MVC/MVP patterns
352
+ self._detect_architecture_pattern(file_contexts)
353
+
354
+ # Detect testing patterns
355
+ self._detect_testing_pattern(nodes, file_contexts)
356
+
357
+ return self.patterns
358
+
359
+ def _detect_singleton(self, nodes: Dict[str, CodeNode]) -> None:
360
+ """Detect singleton pattern"""
361
+ for node_id, node in nodes.items():
362
+ if node.node_type == NodeType.CLASS:
363
+ # Check for __new__ method or _instance attribute
364
+ has_instance = any(
365
+ "_instance" in child_id.lower() or "__new__" in child_id
366
+ for child_id in node.children_ids
367
+ )
368
+ if has_instance:
369
+ self.patterns.append(CodePattern(
370
+ pattern_type="singleton",
371
+ description=f"Singleton pattern detected in class {node.name}",
372
+ file_paths=[node.file_path],
373
+ node_ids=[node_id],
374
+ confidence=0.8,
375
+ ))
376
+
377
+ def _detect_factory(self, nodes: Dict[str, CodeNode]) -> None:
378
+ """Detect factory pattern"""
379
+ for node_id, node in nodes.items():
380
+ if node.node_type in (NodeType.FUNCTION, NodeType.METHOD):
381
+ name_lower = node.name.lower()
382
+ if any(kw in name_lower for kw in ["create", "build", "make", "factory", "get_instance"]):
383
+ self.patterns.append(CodePattern(
384
+ pattern_type="factory",
385
+ description=f"Factory pattern detected: {node.name}",
386
+ file_paths=[node.file_path],
387
+ node_ids=[node_id],
388
+ confidence=0.7,
389
+ ))
390
+
391
+ def _detect_decorator_usage(self, nodes: Dict[str, CodeNode]) -> None:
392
+ """Detect heavy decorator usage"""
393
+ decorated_functions = []
394
+ for node_id, node in nodes.items():
395
+ if node.node_type in (NodeType.FUNCTION, NodeType.METHOD):
396
+ decorators = node.metadata.get("decorators", [])
397
+ if decorators:
398
+ decorated_functions.append((node_id, decorators))
399
+
400
+ if len(decorated_functions) > 5:
401
+ self.patterns.append(CodePattern(
402
+ pattern_type="decorator_heavy",
403
+ description=f"Heavy decorator usage detected ({len(decorated_functions)} decorated functions)",
404
+ file_paths=list(set(nodes[nid].file_path for nid, _ in decorated_functions)),
405
+ node_ids=[nid for nid, _ in decorated_functions[:10]], # Limit examples
406
+ confidence=0.9,
407
+ ))
408
+
409
+ def _detect_error_handling(self, nodes: Dict[str, CodeNode]) -> None:
410
+ """Detect error handling patterns"""
411
+ # This would need actual AST analysis for try/except blocks
412
+ # Simplified version based on naming
413
+ error_handlers = []
414
+ for node_id, node in nodes.items():
415
+ if node.node_type in (NodeType.FUNCTION, NodeType.METHOD):
416
+ name_lower = node.name.lower()
417
+ if any(kw in name_lower for kw in ["handle", "error", "exception", "catch"]):
418
+ error_handlers.append(node_id)
419
+
420
+ if error_handlers:
421
+ self.patterns.append(CodePattern(
422
+ pattern_type="error_handling",
423
+ description=f"Error handling pattern detected ({len(error_handlers)} handlers)",
424
+ file_paths=list(set(nodes[nid].file_path for nid in error_handlers)),
425
+ node_ids=error_handlers[:10],
426
+ confidence=0.6,
427
+ ))
428
+
429
+ def _detect_architecture_pattern(self, file_contexts: Dict[str, FileContext]) -> None:
430
+ """Detect architectural patterns like MVC"""
431
+ files = list(file_contexts.keys())
432
+ files_lower = [f.lower() for f in files]
433
+
434
+ has_models = any("model" in f for f in files_lower)
435
+ has_views = any("view" in f for f in files_lower)
436
+ has_controllers = any("controller" in f or "handler" in f for f in files_lower)
437
+
438
+ if has_models and has_views and has_controllers:
439
+ self.patterns.append(CodePattern(
440
+ pattern_type="mvc",
441
+ description="MVC/MVP architectural pattern detected",
442
+ file_paths=[f for f in files if any(k in f.lower() for k in ["model", "view", "controller", "handler"])],
443
+ node_ids=[],
444
+ confidence=0.75,
445
+ ))
446
+
447
+ def _detect_testing_pattern(self, nodes: Dict[str, CodeNode], file_contexts: Dict[str, FileContext]) -> None:
448
+ """Detect testing patterns"""
449
+ test_files = [f for f in file_contexts.keys() if "test" in f.lower()]
450
+ test_functions = [nid for nid, n in nodes.items() if n.name.startswith("test_")]
451
+
452
+ if test_files or test_functions:
453
+ self.patterns.append(CodePattern(
454
+ pattern_type="testing",
455
+ description=f"Testing pattern detected ({len(test_files)} test files, {len(test_functions)} test functions)",
456
+ file_paths=test_files[:10],
457
+ node_ids=test_functions[:10],
458
+ confidence=0.95,
459
+ ))
460
+
461
+
462
+ class SemanticIndex:
463
+ """Semantic search index using embeddings (ChromaDB optional)"""
464
+
465
+ def __init__(self, index_path: Optional[Path] = None):
466
+ self.index_path = index_path
467
+ self._chroma_client = None
468
+ self._collection = None
469
+ self._fallback_index: Dict[str, Dict[str, Any]] = {} # Simple keyword-based fallback
470
+ self._lock = threading.Lock()
471
+
472
+ def _init_chroma(self) -> bool:
473
+ """Initialize ChromaDB if available"""
474
+ if self._chroma_client is not None:
475
+ return self._collection is not None
476
+
477
+ try:
478
+ import chromadb
479
+ from chromadb.config import Settings
480
+
481
+ persist_dir = str(self.index_path) if self.index_path else None
482
+ if persist_dir:
483
+ self._chroma_client = chromadb.Client(Settings(
484
+ persist_directory=persist_dir,
485
+ anonymized_telemetry=False
486
+ ))
487
+ else:
488
+ self._chroma_client = chromadb.Client()
489
+
490
+ self._collection = self._chroma_client.get_or_create_collection(
491
+ name="nc1709_codebase",
492
+ metadata={"hnsw:space": "cosine"}
493
+ )
494
+ logger.info("ChromaDB initialized for semantic search")
495
+ return True
496
+ except ImportError:
497
+ logger.info("ChromaDB not available, using fallback keyword search")
498
+ return False
499
+ except Exception as e:
500
+ logger.warning(f"Error initializing ChromaDB: {e}")
501
+ return False
502
+
503
+ def index_node(self, node: CodeNode, content: str) -> None:
504
+ """Index a code node for semantic search"""
505
+ with self._lock:
506
+ # Create searchable text
507
+ searchable = f"{node.name} {node.docstring or ''} {node.signature or ''}"
508
+
509
+ if self._init_chroma() and self._collection:
510
+ try:
511
+ self._collection.upsert(
512
+ ids=[node.id],
513
+ documents=[searchable],
514
+ metadatas=[{
515
+ "name": node.name,
516
+ "type": node.node_type.value,
517
+ "file": node.file_path,
518
+ "line": node.line_start,
519
+ }]
520
+ )
521
+ except Exception as e:
522
+ logger.warning(f"Error indexing to ChromaDB: {e}")
523
+ self._fallback_index[node.id] = {
524
+ "text": searchable.lower(),
525
+ "node": node,
526
+ }
527
+ else:
528
+ # Fallback to simple keyword index
529
+ self._fallback_index[node.id] = {
530
+ "text": searchable.lower(),
531
+ "node": node,
532
+ }
533
+
534
+ def search(self, query: str, limit: int = 10) -> List[Tuple[CodeNode, float]]:
535
+ """Search for nodes matching query"""
536
+ results = []
537
+
538
+ with self._lock:
539
+ if self._collection:
540
+ try:
541
+ search_results = self._collection.query(
542
+ query_texts=[query],
543
+ n_results=limit
544
+ )
545
+ if search_results and search_results.get("ids"):
546
+ for i, node_id in enumerate(search_results["ids"][0]):
547
+ distance = search_results["distances"][0][i] if search_results.get("distances") else 0.5
548
+ score = 1.0 - distance # Convert distance to similarity
549
+ # We'd need to fetch the actual node from storage
550
+ results.append((node_id, score))
551
+ except Exception as e:
552
+ logger.warning(f"ChromaDB search error: {e}")
553
+
554
+ # Fallback search
555
+ if not results:
556
+ query_lower = query.lower()
557
+ query_terms = query_lower.split()
558
+
559
+ for node_id, data in self._fallback_index.items():
560
+ text = data["text"]
561
+ # Simple scoring: count matching terms
562
+ matches = sum(1 for term in query_terms if term in text)
563
+ if matches > 0:
564
+ score = matches / len(query_terms)
565
+ results.append((data["node"], score))
566
+
567
+ results.sort(key=lambda x: x[1], reverse=True)
568
+ results = results[:limit]
569
+
570
+ return results
571
+
572
+ def clear(self) -> None:
573
+ """Clear the index"""
574
+ with self._lock:
575
+ if self._collection:
576
+ try:
577
+ self._chroma_client.delete_collection("nc1709_codebase")
578
+ self._collection = self._chroma_client.get_or_create_collection(
579
+ name="nc1709_codebase",
580
+ metadata={"hnsw:space": "cosine"}
581
+ )
582
+ except Exception as e:
583
+ logger.warning(f"Error clearing ChromaDB: {e}")
584
+
585
+ self._fallback_index.clear()
586
+
587
+
588
+ class DeepContextEngine:
589
+ """
590
+ Layer 2: Deep Context Engine
591
+
592
+ Provides semantic understanding of the codebase through:
593
+ - Code graph building and navigation
594
+ - Pattern detection
595
+ - Semantic search
596
+ - Context budgeting for LLM requests
597
+ """
598
+
599
+ # File extensions to index
600
+ SUPPORTED_EXTENSIONS = {
601
+ ".py": "python",
602
+ ".js": "javascript",
603
+ ".ts": "typescript",
604
+ ".jsx": "javascript",
605
+ ".tsx": "typescript",
606
+ ".go": "go",
607
+ ".rs": "rust",
608
+ ".java": "java",
609
+ ".cpp": "cpp",
610
+ ".c": "c",
611
+ ".h": "c",
612
+ ".hpp": "cpp",
613
+ ".rb": "ruby",
614
+ ".php": "php",
615
+ ".swift": "swift",
616
+ ".kt": "kotlin",
617
+ ".scala": "scala",
618
+ ".cs": "csharp",
619
+ }
620
+
621
+ # Directories to skip
622
+ SKIP_DIRS = {
623
+ "__pycache__", ".git", ".svn", ".hg", "node_modules",
624
+ "venv", ".venv", "env", ".env", "dist", "build",
625
+ ".idea", ".vscode", ".pytest_cache", ".mypy_cache",
626
+ "eggs", "*.egg-info", ".tox", "htmlcov",
627
+ }
628
+
629
+ def __init__(self, project_root: Optional[Path] = None, cache_dir: Optional[Path] = None):
630
+ self.project_root = project_root or Path.cwd()
631
+ self.cache_dir = cache_dir or (self.project_root / ".nc1709" / "context_cache")
632
+
633
+ # Core data structures
634
+ self.code_graph: Dict[str, CodeNode] = {}
635
+ self.file_contexts: Dict[str, FileContext] = {}
636
+ self.patterns: List[CodePattern] = []
637
+
638
+ # Components
639
+ self.pattern_detector = PatternDetector()
640
+ self.semantic_index = SemanticIndex(self.cache_dir / "semantic_index" if self.cache_dir else None)
641
+
642
+ # State
643
+ self._indexed = False
644
+ self._lock = threading.Lock()
645
+ self._file_hashes: Dict[str, str] = {} # Track file changes
646
+
647
+ def _should_skip_dir(self, dir_name: str) -> bool:
648
+ """Check if directory should be skipped"""
649
+ return dir_name in self.SKIP_DIRS or dir_name.startswith('.')
650
+
651
+ def _get_file_hash(self, file_path: Path) -> str:
652
+ """Get hash of file contents for change detection"""
653
+ try:
654
+ content = file_path.read_bytes()
655
+ return hashlib.md5(content).hexdigest()
656
+ except Exception:
657
+ return ""
658
+
659
+ def _get_language(self, file_path: Path) -> Optional[str]:
660
+ """Get language from file extension"""
661
+ return self.SUPPORTED_EXTENSIONS.get(file_path.suffix.lower())
662
+
663
+ def index_file(self, file_path: Path, force: bool = False) -> Optional[FileContext]:
664
+ """Index a single file"""
665
+ str_path = str(file_path)
666
+
667
+ # Check if file has changed
668
+ current_hash = self._get_file_hash(file_path)
669
+ if not force and str_path in self._file_hashes:
670
+ if self._file_hashes[str_path] == current_hash:
671
+ return self.file_contexts.get(str_path)
672
+
673
+ language = self._get_language(file_path)
674
+ if not language:
675
+ return None
676
+
677
+ try:
678
+ content = file_path.read_text(encoding='utf-8', errors='ignore')
679
+ lines = content.splitlines()
680
+
681
+ # Build code graph for Python files
682
+ nodes: Dict[str, CodeNode] = {}
683
+ imports: List[str] = []
684
+ exports: List[str] = []
685
+
686
+ if language == "python":
687
+ builder = CodeGraphBuilder(str_path, content)
688
+ nodes, imports, exports = builder.build()
689
+
690
+ # Add nodes to global graph
691
+ with self._lock:
692
+ self.code_graph.update(nodes)
693
+
694
+ # Index nodes for semantic search
695
+ for node in nodes.values():
696
+ self.semantic_index.index_node(node, content)
697
+
698
+ # Create file context
699
+ file_context = FileContext(
700
+ file_path=str_path,
701
+ language=language,
702
+ size_bytes=len(content.encode('utf-8')),
703
+ line_count=len(lines),
704
+ last_modified=datetime.fromtimestamp(file_path.stat().st_mtime),
705
+ content_hash=current_hash,
706
+ imports=imports,
707
+ exports=exports,
708
+ node_ids=list(nodes.keys()),
709
+ )
710
+
711
+ with self._lock:
712
+ self.file_contexts[str_path] = file_context
713
+ self._file_hashes[str_path] = current_hash
714
+
715
+ return file_context
716
+
717
+ except Exception as e:
718
+ logger.warning(f"Error indexing {file_path}: {e}")
719
+ return None
720
+
721
+ def index_project(self, incremental: bool = True) -> Dict[str, Any]:
722
+ """
723
+ Index the entire project
724
+
725
+ Args:
726
+ incremental: If True, only index changed files
727
+
728
+ Returns:
729
+ Statistics about the indexing
730
+ """
731
+ stats = {
732
+ "files_scanned": 0,
733
+ "files_indexed": 0,
734
+ "files_skipped": 0,
735
+ "nodes_created": 0,
736
+ "patterns_detected": 0,
737
+ "errors": 0,
738
+ }
739
+
740
+ logger.info(f"Starting project indexing: {self.project_root}")
741
+
742
+ for root, dirs, files in os.walk(self.project_root):
743
+ # Filter out directories to skip
744
+ dirs[:] = [d for d in dirs if not self._should_skip_dir(d)]
745
+
746
+ for file_name in files:
747
+ file_path = Path(root) / file_name
748
+ stats["files_scanned"] += 1
749
+
750
+ if self._get_language(file_path):
751
+ result = self.index_file(file_path, force=not incremental)
752
+ if result:
753
+ stats["files_indexed"] += 1
754
+ stats["nodes_created"] += len(result.node_ids)
755
+ else:
756
+ stats["errors"] += 1
757
+ else:
758
+ stats["files_skipped"] += 1
759
+
760
+ # Detect patterns
761
+ self.patterns = self.pattern_detector.detect_patterns(self.code_graph, self.file_contexts)
762
+ stats["patterns_detected"] = len(self.patterns)
763
+
764
+ self._indexed = True
765
+ logger.info(f"Indexing complete: {stats}")
766
+
767
+ return stats
768
+
769
+ def search_code(self, query: str, limit: int = 10) -> List[Tuple[CodeNode, float]]:
770
+ """
771
+ Search for code matching the query
772
+
773
+ Args:
774
+ query: Search query
775
+ limit: Maximum results
776
+
777
+ Returns:
778
+ List of (CodeNode, score) tuples
779
+ """
780
+ return self.semantic_index.search(query, limit)
781
+
782
+ def get_file_context(self, file_path: str) -> Optional[FileContext]:
783
+ """Get context for a specific file"""
784
+ return self.file_contexts.get(file_path)
785
+
786
+ def get_node(self, node_id: str) -> Optional[CodeNode]:
787
+ """Get a specific code node by ID"""
788
+ return self.code_graph.get(node_id)
789
+
790
+ def get_related_nodes(self, node_id: str, depth: int = 1) -> List[CodeNode]:
791
+ """
792
+ Get nodes related to the given node
793
+
794
+ Args:
795
+ node_id: Starting node ID
796
+ depth: How many levels of relationships to follow
797
+
798
+ Returns:
799
+ List of related CodeNodes
800
+ """
801
+ if node_id not in self.code_graph:
802
+ return []
803
+
804
+ related = set()
805
+ to_visit = [(node_id, 0)]
806
+ visited = set()
807
+
808
+ while to_visit:
809
+ current_id, current_depth = to_visit.pop(0)
810
+
811
+ if current_id in visited or current_depth > depth:
812
+ continue
813
+
814
+ visited.add(current_id)
815
+ node = self.code_graph.get(current_id)
816
+
817
+ if node and current_id != node_id:
818
+ related.add(current_id)
819
+
820
+ if node and current_depth < depth:
821
+ # Add children
822
+ for child_id in node.children_ids:
823
+ if child_id not in visited:
824
+ to_visit.append((child_id, current_depth + 1))
825
+
826
+ # Add parent
827
+ if node.parent_id and node.parent_id not in visited:
828
+ to_visit.append((node.parent_id, current_depth + 1))
829
+
830
+ # Add references
831
+ for ref_id in node.references:
832
+ if ref_id not in visited:
833
+ to_visit.append((ref_id, current_depth + 1))
834
+
835
+ return [self.code_graph[nid] for nid in related if nid in self.code_graph]
836
+
837
+ def get_dependencies(self, file_path: str) -> List[str]:
838
+ """Get files that this file depends on"""
839
+ context = self.file_contexts.get(file_path)
840
+ if not context:
841
+ return []
842
+
843
+ dependencies = []
844
+ for imp in context.imports:
845
+ # Try to resolve import to a file in the project
846
+ parts = imp.split('.')
847
+ for i in range(len(parts), 0, -1):
848
+ possible_path = self.project_root / '/'.join(parts[:i])
849
+ if possible_path.with_suffix('.py').exists():
850
+ dependencies.append(str(possible_path.with_suffix('.py')))
851
+ break
852
+ if (possible_path / '__init__.py').exists():
853
+ dependencies.append(str(possible_path / '__init__.py'))
854
+ break
855
+
856
+ return dependencies
857
+
858
+ def get_dependents(self, file_path: str) -> List[str]:
859
+ """Get files that depend on this file"""
860
+ module_name = Path(file_path).stem
861
+ dependents = []
862
+
863
+ for ctx_path, ctx in self.file_contexts.items():
864
+ if ctx_path != file_path:
865
+ for imp in ctx.imports:
866
+ if module_name in imp:
867
+ dependents.append(ctx_path)
868
+ break
869
+
870
+ return dependents
871
+
872
+ def build_context_for_task(
873
+ self,
874
+ task_description: str,
875
+ target_files: Optional[List[str]] = None,
876
+ budget: Optional[ContextBudget] = None
877
+ ) -> Dict[str, Any]:
878
+ """
879
+ Build optimized context for a task
880
+
881
+ Args:
882
+ task_description: What the user is trying to do
883
+ target_files: Specific files to include
884
+ budget: Token budget allocation
885
+
886
+ Returns:
887
+ Context dict with relevant code, patterns, and metadata
888
+ """
889
+ budget = budget or ContextBudget()
890
+ context = {
891
+ "files": [],
892
+ "nodes": [],
893
+ "patterns": [],
894
+ "dependencies": [],
895
+ "summary": "",
896
+ "tokens_used": 0,
897
+ }
898
+
899
+ # Search for relevant code
900
+ search_results = self.search_code(task_description, limit=20)
901
+
902
+ # Add target files first
903
+ if target_files:
904
+ for file_path in target_files:
905
+ if file_path in self.file_contexts:
906
+ context["files"].append(self.file_contexts[file_path])
907
+ # Add file's dependencies
908
+ context["dependencies"].extend(self.get_dependencies(file_path))
909
+
910
+ # Add relevant nodes from search
911
+ for node, score in search_results:
912
+ if isinstance(node, CodeNode):
913
+ context["nodes"].append({
914
+ "node": node.to_dict(),
915
+ "relevance": score,
916
+ })
917
+
918
+ # Add relevant patterns
919
+ for pattern in self.patterns:
920
+ # Check if pattern is relevant to target files or search results
921
+ if target_files:
922
+ if any(tf in pattern.file_paths for tf in target_files):
923
+ context["patterns"].append({
924
+ "type": pattern.pattern_type,
925
+ "description": pattern.description,
926
+ "confidence": pattern.confidence,
927
+ })
928
+ elif pattern.confidence > 0.7:
929
+ context["patterns"].append({
930
+ "type": pattern.pattern_type,
931
+ "description": pattern.description,
932
+ "confidence": pattern.confidence,
933
+ })
934
+
935
+ # Generate summary
936
+ context["summary"] = self._generate_context_summary(context)
937
+
938
+ return context
939
+
940
+ def _generate_context_summary(self, context: Dict[str, Any]) -> str:
941
+ """Generate a brief summary of the context"""
942
+ parts = []
943
+
944
+ if context["files"]:
945
+ parts.append(f"{len(context['files'])} relevant files")
946
+
947
+ if context["nodes"]:
948
+ parts.append(f"{len(context['nodes'])} code elements")
949
+
950
+ if context["patterns"]:
951
+ pattern_types = set(p["type"] for p in context["patterns"])
952
+ parts.append(f"patterns detected: {', '.join(pattern_types)}")
953
+
954
+ if context["dependencies"]:
955
+ parts.append(f"{len(context['dependencies'])} dependencies")
956
+
957
+ return "; ".join(parts) if parts else "No context available"
958
+
959
+ def get_project_summary(self) -> Dict[str, Any]:
960
+ """Get a summary of the indexed project"""
961
+ if not self._indexed:
962
+ return {"error": "Project not indexed. Call index_project() first."}
963
+
964
+ # Count by type
965
+ type_counts = {}
966
+ for node in self.code_graph.values():
967
+ type_name = node.node_type.value
968
+ type_counts[type_name] = type_counts.get(type_name, 0) + 1
969
+
970
+ # Language distribution
971
+ lang_counts = {}
972
+ total_lines = 0
973
+ for ctx in self.file_contexts.values():
974
+ lang_counts[ctx.language] = lang_counts.get(ctx.language, 0) + 1
975
+ total_lines += ctx.line_count
976
+
977
+ return {
978
+ "project_root": str(self.project_root),
979
+ "files_indexed": len(self.file_contexts),
980
+ "total_lines": total_lines,
981
+ "code_elements": type_counts,
982
+ "languages": lang_counts,
983
+ "patterns": [{"type": p.pattern_type, "description": p.description} for p in self.patterns],
984
+ }
985
+
986
+ def save_cache(self) -> None:
987
+ """Save context cache to disk"""
988
+ if not self.cache_dir:
989
+ return
990
+
991
+ try:
992
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
993
+
994
+ # Save code graph
995
+ graph_data = {nid: node.to_dict() for nid, node in self.code_graph.items()}
996
+ with open(self.cache_dir / "code_graph.json", "w") as f:
997
+ json.dump(graph_data, f)
998
+
999
+ # Save file hashes
1000
+ with open(self.cache_dir / "file_hashes.json", "w") as f:
1001
+ json.dump(self._file_hashes, f)
1002
+
1003
+ logger.info(f"Context cache saved to {self.cache_dir}")
1004
+
1005
+ except Exception as e:
1006
+ logger.warning(f"Error saving context cache: {e}")
1007
+
1008
+ def load_cache(self) -> bool:
1009
+ """Load context cache from disk"""
1010
+ if not self.cache_dir or not self.cache_dir.exists():
1011
+ return False
1012
+
1013
+ try:
1014
+ # Load code graph
1015
+ graph_path = self.cache_dir / "code_graph.json"
1016
+ if graph_path.exists():
1017
+ with open(graph_path) as f:
1018
+ graph_data = json.load(f)
1019
+ self.code_graph = {nid: CodeNode.from_dict(data) for nid, data in graph_data.items()}
1020
+
1021
+ # Load file hashes
1022
+ hashes_path = self.cache_dir / "file_hashes.json"
1023
+ if hashes_path.exists():
1024
+ with open(hashes_path) as f:
1025
+ self._file_hashes = json.load(f)
1026
+
1027
+ logger.info(f"Context cache loaded from {self.cache_dir}")
1028
+ return True
1029
+
1030
+ except Exception as e:
1031
+ logger.warning(f"Error loading context cache: {e}")
1032
+ return False
1033
+
1034
+
1035
+ # Convenience function for quick context building
1036
+ def get_context_engine(project_root: Optional[Path] = None) -> DeepContextEngine:
1037
+ """Get or create a context engine instance"""
1038
+ return DeepContextEngine(project_root)
1039
+
1040
+
1041
+ def quick_context(task: str, files: Optional[List[str]] = None) -> Dict[str, Any]:
1042
+ """Quickly build context for a task"""
1043
+ engine = get_context_engine()
1044
+ if not engine._indexed:
1045
+ engine.index_project()
1046
+ return engine.build_context_for_task(task, files)