nc1709 1.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nc1709/__init__.py +13 -0
- nc1709/agent/__init__.py +36 -0
- nc1709/agent/core.py +505 -0
- nc1709/agent/mcp_bridge.py +245 -0
- nc1709/agent/permissions.py +298 -0
- nc1709/agent/tools/__init__.py +21 -0
- nc1709/agent/tools/base.py +440 -0
- nc1709/agent/tools/bash_tool.py +367 -0
- nc1709/agent/tools/file_tools.py +454 -0
- nc1709/agent/tools/notebook_tools.py +516 -0
- nc1709/agent/tools/search_tools.py +322 -0
- nc1709/agent/tools/task_tool.py +284 -0
- nc1709/agent/tools/web_tools.py +555 -0
- nc1709/agents/__init__.py +17 -0
- nc1709/agents/auto_fix.py +506 -0
- nc1709/agents/test_generator.py +507 -0
- nc1709/checkpoints.py +372 -0
- nc1709/cli.py +3380 -0
- nc1709/cli_ui.py +1080 -0
- nc1709/cognitive/__init__.py +149 -0
- nc1709/cognitive/anticipation.py +594 -0
- nc1709/cognitive/context_engine.py +1046 -0
- nc1709/cognitive/council.py +824 -0
- nc1709/cognitive/learning.py +761 -0
- nc1709/cognitive/router.py +583 -0
- nc1709/cognitive/system.py +519 -0
- nc1709/config.py +155 -0
- nc1709/custom_commands.py +300 -0
- nc1709/executor.py +333 -0
- nc1709/file_controller.py +354 -0
- nc1709/git_integration.py +308 -0
- nc1709/github_integration.py +477 -0
- nc1709/image_input.py +446 -0
- nc1709/linting.py +519 -0
- nc1709/llm_adapter.py +667 -0
- nc1709/logger.py +192 -0
- nc1709/mcp/__init__.py +18 -0
- nc1709/mcp/client.py +370 -0
- nc1709/mcp/manager.py +407 -0
- nc1709/mcp/protocol.py +210 -0
- nc1709/mcp/server.py +473 -0
- nc1709/memory/__init__.py +20 -0
- nc1709/memory/embeddings.py +325 -0
- nc1709/memory/indexer.py +474 -0
- nc1709/memory/sessions.py +432 -0
- nc1709/memory/vector_store.py +451 -0
- nc1709/models/__init__.py +86 -0
- nc1709/models/detector.py +377 -0
- nc1709/models/formats.py +315 -0
- nc1709/models/manager.py +438 -0
- nc1709/models/registry.py +497 -0
- nc1709/performance/__init__.py +343 -0
- nc1709/performance/cache.py +705 -0
- nc1709/performance/pipeline.py +611 -0
- nc1709/performance/tiering.py +543 -0
- nc1709/plan_mode.py +362 -0
- nc1709/plugins/__init__.py +17 -0
- nc1709/plugins/agents/__init__.py +18 -0
- nc1709/plugins/agents/django_agent.py +912 -0
- nc1709/plugins/agents/docker_agent.py +623 -0
- nc1709/plugins/agents/fastapi_agent.py +887 -0
- nc1709/plugins/agents/git_agent.py +731 -0
- nc1709/plugins/agents/nextjs_agent.py +867 -0
- nc1709/plugins/base.py +359 -0
- nc1709/plugins/manager.py +411 -0
- nc1709/plugins/registry.py +337 -0
- nc1709/progress.py +443 -0
- nc1709/prompts/__init__.py +22 -0
- nc1709/prompts/agent_system.py +180 -0
- nc1709/prompts/task_prompts.py +340 -0
- nc1709/prompts/unified_prompt.py +133 -0
- nc1709/reasoning_engine.py +541 -0
- nc1709/remote_client.py +266 -0
- nc1709/shell_completions.py +349 -0
- nc1709/slash_commands.py +649 -0
- nc1709/task_classifier.py +408 -0
- nc1709/version_check.py +177 -0
- nc1709/web/__init__.py +8 -0
- nc1709/web/server.py +950 -0
- nc1709/web/templates/index.html +1127 -0
- nc1709-1.15.4.dist-info/METADATA +858 -0
- nc1709-1.15.4.dist-info/RECORD +86 -0
- nc1709-1.15.4.dist-info/WHEEL +5 -0
- nc1709-1.15.4.dist-info/entry_points.txt +2 -0
- nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
- nc1709-1.15.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,583 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NC1709 Cognitive Architecture - Layer 1: Intelligent Router
|
|
3
|
+
|
|
4
|
+
Replaces keyword-based task classification with LLM-powered intent analysis.
|
|
5
|
+
Uses a fast model to understand user intent and route to the best model/agent.
|
|
6
|
+
|
|
7
|
+
Features:
|
|
8
|
+
- LLM-based intent detection (not keywords)
|
|
9
|
+
- 18 task categories for fine-grained routing
|
|
10
|
+
- Confidence scoring
|
|
11
|
+
- Multi-model routing with fallbacks
|
|
12
|
+
- Council activation for complex tasks
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import asyncio
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Optional, List, Dict, Any, TYPE_CHECKING
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
from ..llm_adapter import LLMAdapter
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class TaskCategory(Enum):
|
|
27
|
+
"""Expanded task categories for intelligent routing (18 vs old 5)"""
|
|
28
|
+
|
|
29
|
+
# Complex reasoning tasks
|
|
30
|
+
REASONING = "reasoning"
|
|
31
|
+
DEBUGGING = "debugging"
|
|
32
|
+
SECURITY = "security"
|
|
33
|
+
PERFORMANCE = "performance"
|
|
34
|
+
|
|
35
|
+
# Code-focused tasks
|
|
36
|
+
CODE_GENERATION = "code_generation"
|
|
37
|
+
CODE_MODIFICATION = "code_modification"
|
|
38
|
+
CODE_REVIEW = "code_review"
|
|
39
|
+
REFACTORING = "refactoring"
|
|
40
|
+
TESTING = "testing"
|
|
41
|
+
|
|
42
|
+
# Documentation & explanation
|
|
43
|
+
EXPLANATION = "explanation"
|
|
44
|
+
DOCUMENTATION = "documentation"
|
|
45
|
+
|
|
46
|
+
# Operations
|
|
47
|
+
DEVOPS = "devops"
|
|
48
|
+
DATABASE = "database"
|
|
49
|
+
FILE_OPERATIONS = "file_operations"
|
|
50
|
+
COMMAND_EXECUTION = "command_execution"
|
|
51
|
+
GIT_OPERATIONS = "git_operations"
|
|
52
|
+
PROJECT_SETUP = "project_setup"
|
|
53
|
+
|
|
54
|
+
# Quick tasks
|
|
55
|
+
QUICK_ANSWER = "quick_answer"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class IntentAnalysis:
|
|
60
|
+
"""Result of analyzing user intent"""
|
|
61
|
+
|
|
62
|
+
primary_category: TaskCategory
|
|
63
|
+
secondary_categories: List[TaskCategory] = field(default_factory=list)
|
|
64
|
+
confidence: float = 0.7 # 0.0 to 1.0
|
|
65
|
+
complexity: float = 0.5 # 0.0 to 1.0 (how complex is this task)
|
|
66
|
+
requires_context: bool = True # Does this need codebase context?
|
|
67
|
+
requires_execution: bool = False # Does this need to run commands?
|
|
68
|
+
requires_file_access: bool = True # Does this need to read/write files?
|
|
69
|
+
estimated_tokens: int = 1000 # Rough estimate of response size
|
|
70
|
+
key_entities: List[str] = field(default_factory=list) # Files, functions, classes mentioned
|
|
71
|
+
user_goal: str = "" # One-sentence summary of what user wants
|
|
72
|
+
analysis_time_ms: float = 0 # How long analysis took
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class RoutingDecision:
|
|
77
|
+
"""Final routing decision"""
|
|
78
|
+
|
|
79
|
+
primary_model: str
|
|
80
|
+
fallback_model: Optional[str] = None
|
|
81
|
+
should_use_council: bool = False # Use multi-agent for complex tasks
|
|
82
|
+
context_budget: int = 4000 # How many tokens for context
|
|
83
|
+
agents_to_involve: List[str] = field(default_factory=list) # Which council agents
|
|
84
|
+
confidence: float = 0.7
|
|
85
|
+
reasoning: str = "" # Why this routing was chosen
|
|
86
|
+
intent: Optional[IntentAnalysis] = None # The underlying analysis
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class IntentAnalyzer:
|
|
90
|
+
"""
|
|
91
|
+
Uses a fast model to analyze user intent before routing.
|
|
92
|
+
This replaces the keyword-based TaskClassifier.
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
ANALYSIS_PROMPT = '''Analyze this user request and respond with JSON only.
|
|
96
|
+
|
|
97
|
+
User Request: {prompt}
|
|
98
|
+
|
|
99
|
+
Current Context:
|
|
100
|
+
- Working Directory: {cwd}
|
|
101
|
+
- Recent Files: {recent_files}
|
|
102
|
+
- Recent Actions: {recent_actions}
|
|
103
|
+
|
|
104
|
+
Analyze and respond with this exact JSON structure:
|
|
105
|
+
{{
|
|
106
|
+
"primary_category": "<one of: reasoning, code_generation, code_modification, code_review, debugging, explanation, refactoring, testing, documentation, devops, database, security, performance, quick_answer, file_operations, command_execution, git_operations, project_setup>",
|
|
107
|
+
"secondary_categories": ["<list of other relevant categories>"],
|
|
108
|
+
"confidence": <0.0-1.0>,
|
|
109
|
+
"complexity": <0.0-1.0>,
|
|
110
|
+
"requires_context": <true/false>,
|
|
111
|
+
"requires_execution": <true/false>,
|
|
112
|
+
"requires_file_access": <true/false>,
|
|
113
|
+
"estimated_tokens": <number>,
|
|
114
|
+
"key_entities": ["<files, functions, classes mentioned>"],
|
|
115
|
+
"user_goal": "<one sentence summary>"
|
|
116
|
+
}}
|
|
117
|
+
|
|
118
|
+
Respond with JSON only, no other text.'''
|
|
119
|
+
|
|
120
|
+
def __init__(self, llm_adapter: Optional["LLMAdapter"] = None):
|
|
121
|
+
self.llm = llm_adapter
|
|
122
|
+
self.analysis_model = "ollama/qwen2.5:7b" # Fast model for analysis
|
|
123
|
+
self._cache: Dict[str, IntentAnalysis] = {} # Simple cache
|
|
124
|
+
self._cache_ttl = 300 # 5 minutes
|
|
125
|
+
|
|
126
|
+
def set_llm_adapter(self, llm_adapter: "LLMAdapter") -> None:
|
|
127
|
+
"""Set the LLM adapter (for deferred initialization)"""
|
|
128
|
+
self.llm = llm_adapter
|
|
129
|
+
|
|
130
|
+
async def analyze(
|
|
131
|
+
self,
|
|
132
|
+
prompt: str,
|
|
133
|
+
context: Optional[Dict[str, Any]] = None
|
|
134
|
+
) -> IntentAnalysis:
|
|
135
|
+
"""Analyze user intent using LLM"""
|
|
136
|
+
|
|
137
|
+
start_time = datetime.now()
|
|
138
|
+
context = context or {}
|
|
139
|
+
|
|
140
|
+
# Check cache first (simple string hash)
|
|
141
|
+
cache_key = f"{prompt[:100]}:{context.get('cwd', '')}"
|
|
142
|
+
if cache_key in self._cache:
|
|
143
|
+
cached = self._cache[cache_key]
|
|
144
|
+
return cached
|
|
145
|
+
|
|
146
|
+
# If no LLM available, use fallback
|
|
147
|
+
if self.llm is None:
|
|
148
|
+
return self._fallback_analysis(prompt)
|
|
149
|
+
|
|
150
|
+
analysis_prompt = self.ANALYSIS_PROMPT.format(
|
|
151
|
+
prompt=prompt,
|
|
152
|
+
cwd=context.get("cwd", "unknown"),
|
|
153
|
+
recent_files=str(context.get("recent_files", [])[:5]),
|
|
154
|
+
recent_actions=str(context.get("recent_actions", [])[:3])
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
# Call fast model for analysis
|
|
159
|
+
response = await self._call_llm(analysis_prompt)
|
|
160
|
+
|
|
161
|
+
# Parse JSON response
|
|
162
|
+
data = self._parse_json_response(response)
|
|
163
|
+
|
|
164
|
+
elapsed_ms = (datetime.now() - start_time).total_seconds() * 1000
|
|
165
|
+
|
|
166
|
+
result = IntentAnalysis(
|
|
167
|
+
primary_category=TaskCategory(data.get("primary_category", "code_generation")),
|
|
168
|
+
secondary_categories=[
|
|
169
|
+
TaskCategory(c) for c in data.get("secondary_categories", [])
|
|
170
|
+
if c in [e.value for e in TaskCategory]
|
|
171
|
+
],
|
|
172
|
+
confidence=float(data.get("confidence", 0.7)),
|
|
173
|
+
complexity=float(data.get("complexity", 0.5)),
|
|
174
|
+
requires_context=bool(data.get("requires_context", True)),
|
|
175
|
+
requires_execution=bool(data.get("requires_execution", False)),
|
|
176
|
+
requires_file_access=bool(data.get("requires_file_access", True)),
|
|
177
|
+
estimated_tokens=int(data.get("estimated_tokens", 1000)),
|
|
178
|
+
key_entities=data.get("key_entities", []),
|
|
179
|
+
user_goal=data.get("user_goal", prompt[:100]),
|
|
180
|
+
analysis_time_ms=elapsed_ms
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
# Cache the result
|
|
184
|
+
self._cache[cache_key] = result
|
|
185
|
+
|
|
186
|
+
return result
|
|
187
|
+
|
|
188
|
+
except Exception as e:
|
|
189
|
+
# Fallback to keyword-based analysis
|
|
190
|
+
return self._fallback_analysis(prompt)
|
|
191
|
+
|
|
192
|
+
async def _call_llm(self, prompt: str) -> str:
|
|
193
|
+
"""Call the LLM for analysis"""
|
|
194
|
+
if hasattr(self.llm, 'complete_async'):
|
|
195
|
+
return await self.llm.complete_async(
|
|
196
|
+
prompt,
|
|
197
|
+
model=self.analysis_model,
|
|
198
|
+
temperature=0.1,
|
|
199
|
+
max_tokens=500
|
|
200
|
+
)
|
|
201
|
+
elif hasattr(self.llm, 'complete'):
|
|
202
|
+
# Sync fallback
|
|
203
|
+
return self.llm.complete(
|
|
204
|
+
prompt,
|
|
205
|
+
model=self.analysis_model,
|
|
206
|
+
temperature=0.1,
|
|
207
|
+
max_tokens=500
|
|
208
|
+
)
|
|
209
|
+
else:
|
|
210
|
+
raise RuntimeError("LLM adapter has no complete method")
|
|
211
|
+
|
|
212
|
+
def _parse_json_response(self, response: str) -> Dict[str, Any]:
|
|
213
|
+
"""Parse JSON from LLM response, handling various formats"""
|
|
214
|
+
# Try direct JSON parse
|
|
215
|
+
try:
|
|
216
|
+
return json.loads(response)
|
|
217
|
+
except json.JSONDecodeError:
|
|
218
|
+
pass
|
|
219
|
+
|
|
220
|
+
# Try extracting JSON from markdown code blocks
|
|
221
|
+
import re
|
|
222
|
+
json_match = re.search(r'```(?:json)?\s*([\s\S]*?)```', response)
|
|
223
|
+
if json_match:
|
|
224
|
+
try:
|
|
225
|
+
return json.loads(json_match.group(1))
|
|
226
|
+
except json.JSONDecodeError:
|
|
227
|
+
pass
|
|
228
|
+
|
|
229
|
+
# Try finding JSON object in text
|
|
230
|
+
json_match = re.search(r'\{[\s\S]*\}', response)
|
|
231
|
+
if json_match:
|
|
232
|
+
try:
|
|
233
|
+
return json.loads(json_match.group(0))
|
|
234
|
+
except json.JSONDecodeError:
|
|
235
|
+
pass
|
|
236
|
+
|
|
237
|
+
# Return empty dict if all parsing fails
|
|
238
|
+
return {}
|
|
239
|
+
|
|
240
|
+
def _fallback_analysis(self, prompt: str) -> IntentAnalysis:
|
|
241
|
+
"""Fallback keyword-based analysis if LLM fails"""
|
|
242
|
+
prompt_lower = prompt.lower()
|
|
243
|
+
|
|
244
|
+
# Keyword patterns for each category
|
|
245
|
+
category_keywords = {
|
|
246
|
+
TaskCategory.EXPLANATION: ["explain", "what is", "what does", "how does", "why", "describe"],
|
|
247
|
+
TaskCategory.DEBUGGING: ["bug", "error", "fix", "broken", "crash", "issue", "problem", "debug"],
|
|
248
|
+
TaskCategory.CODE_GENERATION: ["write", "create", "implement", "build", "generate", "make", "add"],
|
|
249
|
+
TaskCategory.CODE_MODIFICATION: ["change", "modify", "update", "edit", "alter"],
|
|
250
|
+
TaskCategory.REFACTORING: ["refactor", "clean", "improve", "restructure", "reorganize"],
|
|
251
|
+
TaskCategory.TESTING: ["test", "unittest", "pytest", "spec", "coverage"],
|
|
252
|
+
TaskCategory.CODE_REVIEW: ["review", "check", "audit", "inspect"],
|
|
253
|
+
TaskCategory.DOCUMENTATION: ["document", "docstring", "readme", "comment"],
|
|
254
|
+
TaskCategory.SECURITY: ["security", "vulnerability", "auth", "password", "encrypt"],
|
|
255
|
+
TaskCategory.PERFORMANCE: ["optimize", "performance", "speed", "slow", "memory", "efficient"],
|
|
256
|
+
TaskCategory.GIT_OPERATIONS: ["git", "commit", "push", "pull", "branch", "merge"],
|
|
257
|
+
TaskCategory.DEVOPS: ["docker", "deploy", "ci/cd", "kubernetes", "container"],
|
|
258
|
+
TaskCategory.DATABASE: ["sql", "database", "query", "migration", "schema"],
|
|
259
|
+
TaskCategory.FILE_OPERATIONS: ["file", "read", "write", "copy", "move", "delete"],
|
|
260
|
+
TaskCategory.COMMAND_EXECUTION: ["run", "execute", "shell", "terminal", "command"],
|
|
261
|
+
TaskCategory.PROJECT_SETUP: ["setup", "init", "scaffold", "bootstrap", "new project"],
|
|
262
|
+
TaskCategory.QUICK_ANSWER: ["?", "what", "which", "where", "when"],
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
# Find matching category
|
|
266
|
+
category = TaskCategory.CODE_GENERATION # Default
|
|
267
|
+
for cat, keywords in category_keywords.items():
|
|
268
|
+
if any(kw in prompt_lower for kw in keywords):
|
|
269
|
+
category = cat
|
|
270
|
+
break
|
|
271
|
+
|
|
272
|
+
# Estimate complexity based on prompt length and keywords
|
|
273
|
+
complexity = min(len(prompt) / 500, 1.0)
|
|
274
|
+
if any(kw in prompt_lower for kw in ["complex", "entire", "all", "whole", "complete"]):
|
|
275
|
+
complexity = min(complexity + 0.3, 1.0)
|
|
276
|
+
|
|
277
|
+
return IntentAnalysis(
|
|
278
|
+
primary_category=category,
|
|
279
|
+
secondary_categories=[],
|
|
280
|
+
confidence=0.5, # Lower confidence for fallback
|
|
281
|
+
complexity=complexity,
|
|
282
|
+
requires_context=True,
|
|
283
|
+
requires_execution=any(kw in prompt_lower for kw in ["run", "execute", "test"]),
|
|
284
|
+
requires_file_access=True,
|
|
285
|
+
estimated_tokens=1000,
|
|
286
|
+
key_entities=[],
|
|
287
|
+
user_goal=prompt[:100]
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def analyze_sync(
|
|
291
|
+
self,
|
|
292
|
+
prompt: str,
|
|
293
|
+
context: Optional[Dict[str, Any]] = None
|
|
294
|
+
) -> IntentAnalysis:
|
|
295
|
+
"""Synchronous version of analyze"""
|
|
296
|
+
try:
|
|
297
|
+
loop = asyncio.get_event_loop()
|
|
298
|
+
if loop.is_running():
|
|
299
|
+
# We're already in an async context, use fallback
|
|
300
|
+
return self._fallback_analysis(prompt)
|
|
301
|
+
return loop.run_until_complete(self.analyze(prompt, context))
|
|
302
|
+
except RuntimeError:
|
|
303
|
+
# No event loop, create one
|
|
304
|
+
return asyncio.run(self.analyze(prompt, context))
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
class IntelligentRouter:
|
|
308
|
+
"""
|
|
309
|
+
Makes intelligent routing decisions based on intent analysis.
|
|
310
|
+
Replaces simple task-to-model mapping with dynamic routing.
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
# Model assignments for different task types
|
|
314
|
+
MODEL_MAPPING = {
|
|
315
|
+
# Complex reasoning tasks → DeepSeek-R1
|
|
316
|
+
TaskCategory.REASONING: "ollama/deepseek-r1:latest",
|
|
317
|
+
TaskCategory.DEBUGGING: "ollama/deepseek-r1:latest",
|
|
318
|
+
TaskCategory.SECURITY: "ollama/deepseek-r1:latest",
|
|
319
|
+
TaskCategory.PERFORMANCE: "ollama/deepseek-r1:latest",
|
|
320
|
+
|
|
321
|
+
# Code-heavy tasks → Qwen2.5-Coder
|
|
322
|
+
TaskCategory.CODE_GENERATION: "ollama/qwen2.5-coder:32b",
|
|
323
|
+
TaskCategory.CODE_MODIFICATION: "ollama/qwen2.5-coder:32b",
|
|
324
|
+
TaskCategory.REFACTORING: "ollama/qwen2.5-coder:32b",
|
|
325
|
+
TaskCategory.TESTING: "ollama/qwen2.5-coder:32b",
|
|
326
|
+
|
|
327
|
+
# General tasks → Qwen2.5
|
|
328
|
+
TaskCategory.EXPLANATION: "ollama/qwen2.5:32b",
|
|
329
|
+
TaskCategory.DOCUMENTATION: "ollama/qwen2.5:32b",
|
|
330
|
+
TaskCategory.CODE_REVIEW: "ollama/qwen2.5:32b",
|
|
331
|
+
|
|
332
|
+
# Operations → Qwen2.5
|
|
333
|
+
TaskCategory.DEVOPS: "ollama/qwen2.5:32b",
|
|
334
|
+
TaskCategory.DATABASE: "ollama/qwen2.5:32b",
|
|
335
|
+
TaskCategory.FILE_OPERATIONS: "ollama/qwen2.5:32b",
|
|
336
|
+
TaskCategory.COMMAND_EXECUTION: "ollama/qwen2.5:32b",
|
|
337
|
+
TaskCategory.GIT_OPERATIONS: "ollama/qwen2.5:32b",
|
|
338
|
+
TaskCategory.PROJECT_SETUP: "ollama/qwen2.5:32b",
|
|
339
|
+
|
|
340
|
+
# Quick tasks → Fast model
|
|
341
|
+
TaskCategory.QUICK_ANSWER: "ollama/qwen2.5-coder:7b",
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
# Fallback chain for each model
|
|
345
|
+
FALLBACK_CHAIN = {
|
|
346
|
+
"ollama/deepseek-r1:latest": "ollama/qwen2.5:32b",
|
|
347
|
+
"ollama/qwen2.5-coder:32b": "ollama/qwen2.5:32b",
|
|
348
|
+
"ollama/qwen2.5:32b": "ollama/qwen2.5-coder:7b",
|
|
349
|
+
"ollama/qwen2.5-coder:7b": None,
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
# Complexity threshold for using multi-agent council
|
|
353
|
+
COUNCIL_THRESHOLD = 0.75
|
|
354
|
+
|
|
355
|
+
# Categories that always benefit from council
|
|
356
|
+
COUNCIL_CATEGORIES = {
|
|
357
|
+
TaskCategory.REASONING,
|
|
358
|
+
TaskCategory.SECURITY,
|
|
359
|
+
TaskCategory.REFACTORING,
|
|
360
|
+
TaskCategory.PERFORMANCE,
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
def __init__(self, intent_analyzer: Optional[IntentAnalyzer] = None):
|
|
364
|
+
self.analyzer = intent_analyzer or IntentAnalyzer()
|
|
365
|
+
self._routing_history: List[RoutingDecision] = []
|
|
366
|
+
|
|
367
|
+
def set_analyzer(self, analyzer: IntentAnalyzer) -> None:
|
|
368
|
+
"""Set the intent analyzer"""
|
|
369
|
+
self.analyzer = analyzer
|
|
370
|
+
|
|
371
|
+
async def route(
|
|
372
|
+
self,
|
|
373
|
+
prompt: str,
|
|
374
|
+
context: Optional[Dict[str, Any]] = None
|
|
375
|
+
) -> RoutingDecision:
|
|
376
|
+
"""Make routing decision for a prompt"""
|
|
377
|
+
|
|
378
|
+
# Step 1: Analyze intent
|
|
379
|
+
intent = await self.analyzer.analyze(prompt, context)
|
|
380
|
+
|
|
381
|
+
# Step 2: Select primary model
|
|
382
|
+
primary_model = self.MODEL_MAPPING.get(
|
|
383
|
+
intent.primary_category,
|
|
384
|
+
"ollama/qwen2.5:32b"
|
|
385
|
+
)
|
|
386
|
+
|
|
387
|
+
# Step 3: Determine if council should be used
|
|
388
|
+
should_use_council = self._should_use_council(intent)
|
|
389
|
+
|
|
390
|
+
# Step 4: Select which agents if using council
|
|
391
|
+
agents = []
|
|
392
|
+
if should_use_council:
|
|
393
|
+
agents = self._select_agents(intent)
|
|
394
|
+
|
|
395
|
+
# Step 5: Calculate context budget
|
|
396
|
+
context_budget = self._calculate_context_budget(intent)
|
|
397
|
+
|
|
398
|
+
# Step 6: Build decision
|
|
399
|
+
decision = RoutingDecision(
|
|
400
|
+
primary_model=primary_model,
|
|
401
|
+
fallback_model=self.FALLBACK_CHAIN.get(primary_model),
|
|
402
|
+
should_use_council=should_use_council,
|
|
403
|
+
context_budget=context_budget,
|
|
404
|
+
agents_to_involve=agents,
|
|
405
|
+
confidence=intent.confidence,
|
|
406
|
+
reasoning=self._build_reasoning(intent, primary_model, should_use_council),
|
|
407
|
+
intent=intent
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# Track history
|
|
411
|
+
self._routing_history.append(decision)
|
|
412
|
+
if len(self._routing_history) > 100:
|
|
413
|
+
self._routing_history = self._routing_history[-50:]
|
|
414
|
+
|
|
415
|
+
return decision
|
|
416
|
+
|
|
417
|
+
def route_sync(
|
|
418
|
+
self,
|
|
419
|
+
prompt: str,
|
|
420
|
+
context: Optional[Dict[str, Any]] = None
|
|
421
|
+
) -> RoutingDecision:
|
|
422
|
+
"""Synchronous version of route"""
|
|
423
|
+
try:
|
|
424
|
+
loop = asyncio.get_event_loop()
|
|
425
|
+
if loop.is_running():
|
|
426
|
+
# Use fallback analysis
|
|
427
|
+
intent = self.analyzer._fallback_analysis(prompt)
|
|
428
|
+
return self._build_decision_from_intent(intent)
|
|
429
|
+
return loop.run_until_complete(self.route(prompt, context))
|
|
430
|
+
except RuntimeError:
|
|
431
|
+
return asyncio.run(self.route(prompt, context))
|
|
432
|
+
|
|
433
|
+
def _build_decision_from_intent(self, intent: IntentAnalysis) -> RoutingDecision:
|
|
434
|
+
"""Build a routing decision from an intent analysis"""
|
|
435
|
+
primary_model = self.MODEL_MAPPING.get(
|
|
436
|
+
intent.primary_category,
|
|
437
|
+
"ollama/qwen2.5:32b"
|
|
438
|
+
)
|
|
439
|
+
should_use_council = self._should_use_council(intent)
|
|
440
|
+
agents = self._select_agents(intent) if should_use_council else []
|
|
441
|
+
|
|
442
|
+
return RoutingDecision(
|
|
443
|
+
primary_model=primary_model,
|
|
444
|
+
fallback_model=self.FALLBACK_CHAIN.get(primary_model),
|
|
445
|
+
should_use_council=should_use_council,
|
|
446
|
+
context_budget=self._calculate_context_budget(intent),
|
|
447
|
+
agents_to_involve=agents,
|
|
448
|
+
confidence=intent.confidence,
|
|
449
|
+
reasoning=self._build_reasoning(intent, primary_model, should_use_council),
|
|
450
|
+
intent=intent
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
def _should_use_council(self, intent: IntentAnalysis) -> bool:
|
|
454
|
+
"""Determine if multi-agent council should be used"""
|
|
455
|
+
return (
|
|
456
|
+
intent.complexity >= self.COUNCIL_THRESHOLD or
|
|
457
|
+
len(intent.secondary_categories) >= 3 or
|
|
458
|
+
intent.primary_category in self.COUNCIL_CATEGORIES
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
def _select_agents(self, intent: IntentAnalysis) -> List[str]:
|
|
462
|
+
"""Select which council agents to involve"""
|
|
463
|
+
agents = ["implementer"] # Always include implementer
|
|
464
|
+
|
|
465
|
+
category_to_agents = {
|
|
466
|
+
TaskCategory.REASONING: ["architect"],
|
|
467
|
+
TaskCategory.CODE_REVIEW: ["reviewer"],
|
|
468
|
+
TaskCategory.SECURITY: ["security"],
|
|
469
|
+
TaskCategory.PERFORMANCE: ["performance"],
|
|
470
|
+
TaskCategory.DEBUGGING: ["debugger"],
|
|
471
|
+
TaskCategory.REFACTORING: ["architect", "reviewer"],
|
|
472
|
+
TaskCategory.CODE_GENERATION: ["reviewer"],
|
|
473
|
+
TaskCategory.TESTING: ["reviewer"],
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
# Add agents for primary category
|
|
477
|
+
if intent.primary_category in category_to_agents:
|
|
478
|
+
agents.extend(category_to_agents[intent.primary_category])
|
|
479
|
+
|
|
480
|
+
# Add agents for secondary categories
|
|
481
|
+
for cat in intent.secondary_categories:
|
|
482
|
+
if cat in category_to_agents:
|
|
483
|
+
agents.extend(category_to_agents[cat])
|
|
484
|
+
|
|
485
|
+
# Remove duplicates while preserving order
|
|
486
|
+
seen = set()
|
|
487
|
+
unique_agents = []
|
|
488
|
+
for agent in agents:
|
|
489
|
+
if agent not in seen:
|
|
490
|
+
seen.add(agent)
|
|
491
|
+
unique_agents.append(agent)
|
|
492
|
+
|
|
493
|
+
return unique_agents[:4] # Max 4 agents
|
|
494
|
+
|
|
495
|
+
def _calculate_context_budget(self, intent: IntentAnalysis) -> int:
|
|
496
|
+
"""Calculate how many tokens to allocate for context"""
|
|
497
|
+
base_budget = 4000
|
|
498
|
+
|
|
499
|
+
# Increase for complex tasks
|
|
500
|
+
if intent.complexity > 0.7:
|
|
501
|
+
base_budget += 4000
|
|
502
|
+
elif intent.complexity > 0.4:
|
|
503
|
+
base_budget += 2000
|
|
504
|
+
|
|
505
|
+
# Increase if context is required
|
|
506
|
+
if intent.requires_context:
|
|
507
|
+
base_budget += 2000
|
|
508
|
+
|
|
509
|
+
# Adjust based on estimated response size
|
|
510
|
+
if intent.estimated_tokens > 2000:
|
|
511
|
+
base_budget += 2000
|
|
512
|
+
|
|
513
|
+
# Cap at reasonable limit
|
|
514
|
+
return min(base_budget, 16000)
|
|
515
|
+
|
|
516
|
+
def _build_reasoning(
|
|
517
|
+
self,
|
|
518
|
+
intent: IntentAnalysis,
|
|
519
|
+
model: str,
|
|
520
|
+
use_council: bool
|
|
521
|
+
) -> str:
|
|
522
|
+
"""Build human-readable reasoning for the routing decision"""
|
|
523
|
+
parts = [
|
|
524
|
+
f"Task: {intent.user_goal[:50]}{'...' if len(intent.user_goal) > 50 else ''}",
|
|
525
|
+
f"Category: {intent.primary_category.value}",
|
|
526
|
+
f"Complexity: {intent.complexity:.0%}",
|
|
527
|
+
f"Model: {model.split('/')[-1]}",
|
|
528
|
+
]
|
|
529
|
+
|
|
530
|
+
if use_council:
|
|
531
|
+
parts.append("Using multi-agent council")
|
|
532
|
+
|
|
533
|
+
if intent.requires_execution:
|
|
534
|
+
parts.append("Requires execution")
|
|
535
|
+
|
|
536
|
+
return " | ".join(parts)
|
|
537
|
+
|
|
538
|
+
def get_routing_stats(self) -> Dict[str, Any]:
|
|
539
|
+
"""Get statistics about routing decisions"""
|
|
540
|
+
if not self._routing_history:
|
|
541
|
+
return {"total_routes": 0}
|
|
542
|
+
|
|
543
|
+
category_counts = {}
|
|
544
|
+
model_counts = {}
|
|
545
|
+
council_count = 0
|
|
546
|
+
|
|
547
|
+
for decision in self._routing_history:
|
|
548
|
+
if decision.intent:
|
|
549
|
+
cat = decision.intent.primary_category.value
|
|
550
|
+
category_counts[cat] = category_counts.get(cat, 0) + 1
|
|
551
|
+
|
|
552
|
+
model = decision.primary_model.split("/")[-1]
|
|
553
|
+
model_counts[model] = model_counts.get(model, 0) + 1
|
|
554
|
+
|
|
555
|
+
if decision.should_use_council:
|
|
556
|
+
council_count += 1
|
|
557
|
+
|
|
558
|
+
return {
|
|
559
|
+
"total_routes": len(self._routing_history),
|
|
560
|
+
"category_distribution": category_counts,
|
|
561
|
+
"model_distribution": model_counts,
|
|
562
|
+
"council_percentage": council_count / len(self._routing_history) * 100,
|
|
563
|
+
"avg_confidence": sum(d.confidence for d in self._routing_history) / len(self._routing_history)
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
# Convenience function for quick routing
|
|
568
|
+
async def quick_route(prompt: str, llm_adapter: Optional["LLMAdapter"] = None) -> RoutingDecision:
|
|
569
|
+
"""Quick routing without setting up full router"""
|
|
570
|
+
analyzer = IntentAnalyzer(llm_adapter)
|
|
571
|
+
router = IntelligentRouter(analyzer)
|
|
572
|
+
return await router.route(prompt)
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
# Export main classes
|
|
576
|
+
__all__ = [
|
|
577
|
+
"TaskCategory",
|
|
578
|
+
"IntentAnalysis",
|
|
579
|
+
"RoutingDecision",
|
|
580
|
+
"IntentAnalyzer",
|
|
581
|
+
"IntelligentRouter",
|
|
582
|
+
"quick_route",
|
|
583
|
+
]
|