nc1709 1.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nc1709/__init__.py +13 -0
- nc1709/agent/__init__.py +36 -0
- nc1709/agent/core.py +505 -0
- nc1709/agent/mcp_bridge.py +245 -0
- nc1709/agent/permissions.py +298 -0
- nc1709/agent/tools/__init__.py +21 -0
- nc1709/agent/tools/base.py +440 -0
- nc1709/agent/tools/bash_tool.py +367 -0
- nc1709/agent/tools/file_tools.py +454 -0
- nc1709/agent/tools/notebook_tools.py +516 -0
- nc1709/agent/tools/search_tools.py +322 -0
- nc1709/agent/tools/task_tool.py +284 -0
- nc1709/agent/tools/web_tools.py +555 -0
- nc1709/agents/__init__.py +17 -0
- nc1709/agents/auto_fix.py +506 -0
- nc1709/agents/test_generator.py +507 -0
- nc1709/checkpoints.py +372 -0
- nc1709/cli.py +3380 -0
- nc1709/cli_ui.py +1080 -0
- nc1709/cognitive/__init__.py +149 -0
- nc1709/cognitive/anticipation.py +594 -0
- nc1709/cognitive/context_engine.py +1046 -0
- nc1709/cognitive/council.py +824 -0
- nc1709/cognitive/learning.py +761 -0
- nc1709/cognitive/router.py +583 -0
- nc1709/cognitive/system.py +519 -0
- nc1709/config.py +155 -0
- nc1709/custom_commands.py +300 -0
- nc1709/executor.py +333 -0
- nc1709/file_controller.py +354 -0
- nc1709/git_integration.py +308 -0
- nc1709/github_integration.py +477 -0
- nc1709/image_input.py +446 -0
- nc1709/linting.py +519 -0
- nc1709/llm_adapter.py +667 -0
- nc1709/logger.py +192 -0
- nc1709/mcp/__init__.py +18 -0
- nc1709/mcp/client.py +370 -0
- nc1709/mcp/manager.py +407 -0
- nc1709/mcp/protocol.py +210 -0
- nc1709/mcp/server.py +473 -0
- nc1709/memory/__init__.py +20 -0
- nc1709/memory/embeddings.py +325 -0
- nc1709/memory/indexer.py +474 -0
- nc1709/memory/sessions.py +432 -0
- nc1709/memory/vector_store.py +451 -0
- nc1709/models/__init__.py +86 -0
- nc1709/models/detector.py +377 -0
- nc1709/models/formats.py +315 -0
- nc1709/models/manager.py +438 -0
- nc1709/models/registry.py +497 -0
- nc1709/performance/__init__.py +343 -0
- nc1709/performance/cache.py +705 -0
- nc1709/performance/pipeline.py +611 -0
- nc1709/performance/tiering.py +543 -0
- nc1709/plan_mode.py +362 -0
- nc1709/plugins/__init__.py +17 -0
- nc1709/plugins/agents/__init__.py +18 -0
- nc1709/plugins/agents/django_agent.py +912 -0
- nc1709/plugins/agents/docker_agent.py +623 -0
- nc1709/plugins/agents/fastapi_agent.py +887 -0
- nc1709/plugins/agents/git_agent.py +731 -0
- nc1709/plugins/agents/nextjs_agent.py +867 -0
- nc1709/plugins/base.py +359 -0
- nc1709/plugins/manager.py +411 -0
- nc1709/plugins/registry.py +337 -0
- nc1709/progress.py +443 -0
- nc1709/prompts/__init__.py +22 -0
- nc1709/prompts/agent_system.py +180 -0
- nc1709/prompts/task_prompts.py +340 -0
- nc1709/prompts/unified_prompt.py +133 -0
- nc1709/reasoning_engine.py +541 -0
- nc1709/remote_client.py +266 -0
- nc1709/shell_completions.py +349 -0
- nc1709/slash_commands.py +649 -0
- nc1709/task_classifier.py +408 -0
- nc1709/version_check.py +177 -0
- nc1709/web/__init__.py +8 -0
- nc1709/web/server.py +950 -0
- nc1709/web/templates/index.html +1127 -0
- nc1709-1.15.4.dist-info/METADATA +858 -0
- nc1709-1.15.4.dist-info/RECORD +86 -0
- nc1709-1.15.4.dist-info/WHEEL +5 -0
- nc1709-1.15.4.dist-info/entry_points.txt +2 -0
- nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
- nc1709-1.15.4.dist-info/top_level.txt +1 -0
nc1709/llm_adapter.py
ADDED
|
@@ -0,0 +1,667 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LLM Adapter with Multi-Model Orchestration
|
|
3
|
+
Handles communication with local LLMs via Ollama using LiteLLM for routing
|
|
4
|
+
"""
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
from typing import List, Dict, Any, Optional, Union
|
|
8
|
+
from enum import Enum
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
from litellm import completion, acompletion
|
|
12
|
+
LITELLM_AVAILABLE = True
|
|
13
|
+
except ImportError:
|
|
14
|
+
LITELLM_AVAILABLE = False
|
|
15
|
+
print("Warning: litellm not installed. Install with: pip install litellm")
|
|
16
|
+
|
|
17
|
+
from .config import get_config
|
|
18
|
+
|
|
19
|
+
# Import cognitive router (Layer 1 of Cognitive Architecture)
|
|
20
|
+
try:
|
|
21
|
+
from .cognitive.router import IntentAnalyzer, IntelligentRouter, RoutingDecision
|
|
22
|
+
COGNITIVE_ROUTER_AVAILABLE = True
|
|
23
|
+
except ImportError:
|
|
24
|
+
COGNITIVE_ROUTER_AVAILABLE = False
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TaskType(Enum):
|
|
28
|
+
"""Types of tasks for model routing"""
|
|
29
|
+
REASONING = "reasoning" # Complex logic, planning, math
|
|
30
|
+
CODING = "coding" # Code generation, debugging
|
|
31
|
+
TOOLS = "tools" # File ops, command execution
|
|
32
|
+
GENERAL = "general" # Q&A, explanations
|
|
33
|
+
FAST = "fast" # Quick responses
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TaskClassifier:
|
|
37
|
+
"""Classifies user requests to determine appropriate model"""
|
|
38
|
+
|
|
39
|
+
# Keywords that indicate specific task types
|
|
40
|
+
REASONING_KEYWORDS = [
|
|
41
|
+
"prove", "reason", "explain why", "logic", "analyze", "plan",
|
|
42
|
+
"design", "architecture", "strategy", "solve", "calculate",
|
|
43
|
+
"mathematical", "theorem", "proof", "deduce"
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
CODING_KEYWORDS = [
|
|
47
|
+
"code", "function", "class", "debug", "refactor", "implement",
|
|
48
|
+
"write a", "create a", "build a", "fix", "bug", "error",
|
|
49
|
+
"optimize", "algorithm", "program", "script", "api"
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
TOOLS_KEYWORDS = [
|
|
53
|
+
"create file", "write file", "read file", "delete file",
|
|
54
|
+
"run command", "execute", "install", "git", "docker",
|
|
55
|
+
"npm", "pip", "deploy", "build", "compile"
|
|
56
|
+
]
|
|
57
|
+
|
|
58
|
+
FAST_KEYWORDS = [
|
|
59
|
+
"quick", "simple", "just", "only", "briefly", "short"
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
@classmethod
|
|
63
|
+
def classify(cls, prompt: str) -> TaskType:
|
|
64
|
+
"""Classify a prompt to determine task type
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
prompt: User's input prompt
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
TaskType enum value
|
|
71
|
+
"""
|
|
72
|
+
prompt_lower = prompt.lower()
|
|
73
|
+
|
|
74
|
+
# Count keyword matches for each category
|
|
75
|
+
reasoning_score = sum(1 for kw in cls.REASONING_KEYWORDS if kw in prompt_lower)
|
|
76
|
+
coding_score = sum(1 for kw in cls.CODING_KEYWORDS if kw in prompt_lower)
|
|
77
|
+
tools_score = sum(1 for kw in cls.TOOLS_KEYWORDS if kw in prompt_lower)
|
|
78
|
+
fast_score = sum(1 for kw in cls.FAST_KEYWORDS if kw in prompt_lower)
|
|
79
|
+
|
|
80
|
+
# Determine task type based on scores
|
|
81
|
+
if fast_score > 0 and len(prompt.split()) < 20:
|
|
82
|
+
return TaskType.FAST
|
|
83
|
+
|
|
84
|
+
if tools_score > 0:
|
|
85
|
+
return TaskType.TOOLS
|
|
86
|
+
|
|
87
|
+
if coding_score > reasoning_score and coding_score > 0:
|
|
88
|
+
return TaskType.CODING
|
|
89
|
+
|
|
90
|
+
if reasoning_score > 0:
|
|
91
|
+
return TaskType.REASONING
|
|
92
|
+
|
|
93
|
+
return TaskType.GENERAL
|
|
94
|
+
|
|
95
|
+
@classmethod
|
|
96
|
+
def classify_with_context(cls, prompt: str, context: Optional[Dict[str, Any]] = None) -> TaskType:
|
|
97
|
+
"""Classify with additional context
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
prompt: User's input prompt
|
|
101
|
+
context: Additional context (e.g., file paths, previous task type)
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
TaskType enum value
|
|
105
|
+
"""
|
|
106
|
+
# Basic classification
|
|
107
|
+
task_type = cls.classify(prompt)
|
|
108
|
+
|
|
109
|
+
# Refine based on context
|
|
110
|
+
if context:
|
|
111
|
+
# If we're in a coding session, prefer coding model
|
|
112
|
+
if context.get("previous_task") == TaskType.CODING and task_type == TaskType.GENERAL:
|
|
113
|
+
task_type = TaskType.CODING
|
|
114
|
+
|
|
115
|
+
# If file paths are mentioned, likely tools/coding
|
|
116
|
+
if context.get("file_paths") and task_type == TaskType.GENERAL:
|
|
117
|
+
task_type = TaskType.CODING
|
|
118
|
+
|
|
119
|
+
return task_type
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class LLMAdapter:
|
|
123
|
+
"""Adapter for interacting with LLMs via LiteLLM and Ollama"""
|
|
124
|
+
|
|
125
|
+
def __init__(self, skip_health_check: bool = False):
|
|
126
|
+
"""Initialize the LLM adapter
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
skip_health_check: Skip Ollama connectivity check (useful for testing)
|
|
130
|
+
"""
|
|
131
|
+
self.config = get_config()
|
|
132
|
+
self.conversation_history: List[Dict[str, str]] = []
|
|
133
|
+
self._ollama_available = False
|
|
134
|
+
|
|
135
|
+
if not LITELLM_AVAILABLE:
|
|
136
|
+
raise ImportError(
|
|
137
|
+
"LiteLLM is required but not installed. "
|
|
138
|
+
"Install with: pip install litellm"
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
if not skip_health_check:
|
|
142
|
+
self._check_ollama_connection()
|
|
143
|
+
|
|
144
|
+
def _check_ollama_connection(self) -> bool:
|
|
145
|
+
"""Check if Ollama is running and accessible
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
True if Ollama is available
|
|
149
|
+
"""
|
|
150
|
+
import urllib.request
|
|
151
|
+
import urllib.error
|
|
152
|
+
|
|
153
|
+
base_url = self.config.get("ollama.base_url", "http://localhost:11434")
|
|
154
|
+
health_url = f"{base_url}/api/tags"
|
|
155
|
+
|
|
156
|
+
try:
|
|
157
|
+
with urllib.request.urlopen(health_url, timeout=5) as response:
|
|
158
|
+
if response.status == 200:
|
|
159
|
+
self._ollama_available = True
|
|
160
|
+
return True
|
|
161
|
+
except urllib.error.URLError as e:
|
|
162
|
+
print(f"⚠️ Warning: Cannot connect to Ollama at {base_url}")
|
|
163
|
+
print(f" Error: {e.reason}")
|
|
164
|
+
print(f" Please ensure Ollama is running: ollama serve")
|
|
165
|
+
self._ollama_available = False
|
|
166
|
+
except Exception as e:
|
|
167
|
+
print(f"⚠️ Warning: Ollama health check failed: {e}")
|
|
168
|
+
self._ollama_available = False
|
|
169
|
+
|
|
170
|
+
return self._ollama_available
|
|
171
|
+
|
|
172
|
+
def is_ollama_available(self) -> bool:
|
|
173
|
+
"""Check if Ollama is available
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
True if Ollama was reachable during init or last check
|
|
177
|
+
"""
|
|
178
|
+
return self._ollama_available
|
|
179
|
+
|
|
180
|
+
def _get_model_for_task(self, task_type: TaskType) -> str:
|
|
181
|
+
"""Get the appropriate model for a task type
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
task_type: Type of task
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Model identifier for Ollama
|
|
188
|
+
"""
|
|
189
|
+
model_name = self.config.get_model_for_task(task_type.value)
|
|
190
|
+
return f"ollama/{model_name}"
|
|
191
|
+
|
|
192
|
+
def _build_messages(self, prompt: str, system_prompt: Optional[str] = None) -> List[Dict[str, str]]:
|
|
193
|
+
"""Build message list for the LLM
|
|
194
|
+
|
|
195
|
+
Args:
|
|
196
|
+
prompt: User's prompt
|
|
197
|
+
system_prompt: Optional system prompt
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
List of message dictionaries
|
|
201
|
+
"""
|
|
202
|
+
messages = []
|
|
203
|
+
|
|
204
|
+
if system_prompt:
|
|
205
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
206
|
+
|
|
207
|
+
# Add conversation history (last N messages)
|
|
208
|
+
max_history = self.config.get("memory.conversation_history", 10)
|
|
209
|
+
messages.extend(self.conversation_history[-max_history:])
|
|
210
|
+
|
|
211
|
+
# Add current prompt
|
|
212
|
+
messages.append({"role": "user", "content": prompt})
|
|
213
|
+
|
|
214
|
+
return messages
|
|
215
|
+
|
|
216
|
+
def complete(
|
|
217
|
+
self,
|
|
218
|
+
prompt: str,
|
|
219
|
+
task_type: Optional[TaskType] = None,
|
|
220
|
+
system_prompt: Optional[str] = None,
|
|
221
|
+
temperature: float = 0.7,
|
|
222
|
+
max_tokens: Optional[int] = None,
|
|
223
|
+
stream: bool = False
|
|
224
|
+
) -> str:
|
|
225
|
+
"""Get completion from LLM
|
|
226
|
+
|
|
227
|
+
Args:
|
|
228
|
+
prompt: User's prompt
|
|
229
|
+
task_type: Type of task (auto-detected if None)
|
|
230
|
+
system_prompt: Optional system prompt
|
|
231
|
+
temperature: Sampling temperature (0.0 to 1.0)
|
|
232
|
+
max_tokens: Maximum tokens to generate
|
|
233
|
+
stream: Whether to stream the response
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
LLM response text
|
|
237
|
+
"""
|
|
238
|
+
# Auto-classify if task type not provided
|
|
239
|
+
if task_type is None:
|
|
240
|
+
task_type = TaskClassifier.classify(prompt)
|
|
241
|
+
|
|
242
|
+
# Get appropriate model
|
|
243
|
+
model = self._get_model_for_task(task_type)
|
|
244
|
+
|
|
245
|
+
# Build messages
|
|
246
|
+
messages = self._build_messages(prompt, system_prompt)
|
|
247
|
+
|
|
248
|
+
# Get Ollama base URL and retry config
|
|
249
|
+
api_base = self.config.get("ollama.base_url")
|
|
250
|
+
max_retries = self.config.get("execution.max_retries", 3)
|
|
251
|
+
|
|
252
|
+
last_error = None
|
|
253
|
+
for attempt in range(max_retries):
|
|
254
|
+
try:
|
|
255
|
+
if stream:
|
|
256
|
+
return self._stream_completion(model, messages, temperature, max_tokens, api_base, prompt)
|
|
257
|
+
else:
|
|
258
|
+
response = completion(
|
|
259
|
+
model=model,
|
|
260
|
+
messages=messages,
|
|
261
|
+
temperature=temperature,
|
|
262
|
+
max_tokens=max_tokens,
|
|
263
|
+
api_base=api_base
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
response_text = response.choices[0].message.content
|
|
267
|
+
|
|
268
|
+
# Update conversation history
|
|
269
|
+
self.conversation_history.append({"role": "user", "content": prompt})
|
|
270
|
+
self.conversation_history.append({"role": "assistant", "content": response_text})
|
|
271
|
+
|
|
272
|
+
return response_text
|
|
273
|
+
|
|
274
|
+
except Exception as e:
|
|
275
|
+
last_error = e
|
|
276
|
+
if attempt < max_retries - 1:
|
|
277
|
+
wait_time = 2 ** attempt # Exponential backoff: 1, 2, 4 seconds
|
|
278
|
+
print(f"⚠️ LLM request failed (attempt {attempt + 1}/{max_retries}): {e}")
|
|
279
|
+
print(f" Retrying in {wait_time} seconds...")
|
|
280
|
+
time.sleep(wait_time)
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
raise RuntimeError(f"LLM completion failed after {max_retries} attempts: {last_error}")
|
|
284
|
+
|
|
285
|
+
def _stream_completion(
|
|
286
|
+
self,
|
|
287
|
+
model: str,
|
|
288
|
+
messages: List[Dict[str, str]],
|
|
289
|
+
temperature: float,
|
|
290
|
+
max_tokens: Optional[int],
|
|
291
|
+
api_base: str,
|
|
292
|
+
original_prompt: str
|
|
293
|
+
) -> str:
|
|
294
|
+
"""Stream completion from LLM and print chunks in real-time
|
|
295
|
+
|
|
296
|
+
Args:
|
|
297
|
+
model: Model identifier
|
|
298
|
+
messages: Message list
|
|
299
|
+
temperature: Sampling temperature
|
|
300
|
+
max_tokens: Maximum tokens
|
|
301
|
+
api_base: Ollama base URL
|
|
302
|
+
original_prompt: Original user prompt for history
|
|
303
|
+
|
|
304
|
+
Returns:
|
|
305
|
+
Complete response text
|
|
306
|
+
"""
|
|
307
|
+
response = completion(
|
|
308
|
+
model=model,
|
|
309
|
+
messages=messages,
|
|
310
|
+
temperature=temperature,
|
|
311
|
+
max_tokens=max_tokens,
|
|
312
|
+
api_base=api_base,
|
|
313
|
+
stream=True
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
full_response = ""
|
|
317
|
+
for chunk in response:
|
|
318
|
+
if chunk.choices[0].delta.content:
|
|
319
|
+
content = chunk.choices[0].delta.content
|
|
320
|
+
full_response += content
|
|
321
|
+
# Print chunk in real-time
|
|
322
|
+
sys.stdout.write(content)
|
|
323
|
+
sys.stdout.flush()
|
|
324
|
+
|
|
325
|
+
# Print newline after streaming completes
|
|
326
|
+
print()
|
|
327
|
+
|
|
328
|
+
# Update conversation history after streaming completes
|
|
329
|
+
self.conversation_history.append({"role": "user", "content": original_prompt})
|
|
330
|
+
self.conversation_history.append({"role": "assistant", "content": full_response})
|
|
331
|
+
|
|
332
|
+
return full_response
|
|
333
|
+
|
|
334
|
+
def complete_with_tools(
|
|
335
|
+
self,
|
|
336
|
+
prompt: str,
|
|
337
|
+
tools: List[Dict[str, Any]],
|
|
338
|
+
task_type: Optional[TaskType] = None
|
|
339
|
+
) -> Dict[str, Any]:
|
|
340
|
+
"""Get completion with tool/function calling support
|
|
341
|
+
|
|
342
|
+
Args:
|
|
343
|
+
prompt: User's prompt
|
|
344
|
+
tools: List of available tools (function definitions)
|
|
345
|
+
task_type: Type of task (auto-detected if None)
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Response with potential tool calls
|
|
349
|
+
"""
|
|
350
|
+
# Use tools-optimized model
|
|
351
|
+
if task_type is None:
|
|
352
|
+
task_type = TaskType.TOOLS
|
|
353
|
+
|
|
354
|
+
model = self._get_model_for_task(task_type)
|
|
355
|
+
messages = self._build_messages(prompt)
|
|
356
|
+
api_base = self.config.get("ollama.base_url")
|
|
357
|
+
|
|
358
|
+
try:
|
|
359
|
+
response = completion(
|
|
360
|
+
model=model,
|
|
361
|
+
messages=messages,
|
|
362
|
+
tools=tools,
|
|
363
|
+
api_base=api_base
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return {
|
|
367
|
+
"content": response.choices[0].message.content,
|
|
368
|
+
"tool_calls": getattr(response.choices[0].message, "tool_calls", None)
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
except Exception as e:
|
|
372
|
+
raise RuntimeError(f"LLM completion with tools failed: {e}")
|
|
373
|
+
|
|
374
|
+
def chat(
|
|
375
|
+
self,
|
|
376
|
+
messages: List[Dict[str, str]],
|
|
377
|
+
task_type: Optional[TaskType] = None,
|
|
378
|
+
temperature: float = 0.7,
|
|
379
|
+
max_tokens: Optional[int] = None,
|
|
380
|
+
) -> str:
|
|
381
|
+
"""Chat with the LLM using a message list (for agent use)
|
|
382
|
+
|
|
383
|
+
This method is designed for the Agent class which manages its own
|
|
384
|
+
conversation history and passes complete message lists.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
messages: List of message dicts with 'role' and 'content' keys
|
|
388
|
+
task_type: Type of task (defaults to TOOLS for agent use)
|
|
389
|
+
temperature: Sampling temperature
|
|
390
|
+
max_tokens: Maximum tokens to generate
|
|
391
|
+
|
|
392
|
+
Returns:
|
|
393
|
+
LLM response text
|
|
394
|
+
"""
|
|
395
|
+
# Default to TOOLS task type for agent operations
|
|
396
|
+
if task_type is None:
|
|
397
|
+
task_type = TaskType.TOOLS
|
|
398
|
+
|
|
399
|
+
# Get appropriate model
|
|
400
|
+
model = self._get_model_for_task(task_type)
|
|
401
|
+
api_base = self.config.get("ollama.base_url")
|
|
402
|
+
max_retries = self.config.get("execution.max_retries", 3)
|
|
403
|
+
|
|
404
|
+
last_error = None
|
|
405
|
+
for attempt in range(max_retries):
|
|
406
|
+
try:
|
|
407
|
+
response = completion(
|
|
408
|
+
model=model,
|
|
409
|
+
messages=messages,
|
|
410
|
+
temperature=temperature,
|
|
411
|
+
max_tokens=max_tokens,
|
|
412
|
+
api_base=api_base
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
return response.choices[0].message.content
|
|
416
|
+
|
|
417
|
+
except Exception as e:
|
|
418
|
+
last_error = e
|
|
419
|
+
if attempt < max_retries - 1:
|
|
420
|
+
wait_time = 2 ** attempt
|
|
421
|
+
print(f"⚠️ LLM request failed (attempt {attempt + 1}/{max_retries}): {e}")
|
|
422
|
+
print(f" Retrying in {wait_time} seconds...")
|
|
423
|
+
time.sleep(wait_time)
|
|
424
|
+
continue
|
|
425
|
+
|
|
426
|
+
raise RuntimeError(f"LLM chat failed after {max_retries} attempts: {last_error}")
|
|
427
|
+
|
|
428
|
+
def clear_history(self) -> None:
|
|
429
|
+
"""Clear conversation history"""
|
|
430
|
+
self.conversation_history = []
|
|
431
|
+
|
|
432
|
+
def get_model_info(self, task_type: TaskType) -> Dict[str, str]:
|
|
433
|
+
"""Get information about the model used for a task type
|
|
434
|
+
|
|
435
|
+
Args:
|
|
436
|
+
task_type: Type of task
|
|
437
|
+
|
|
438
|
+
Returns:
|
|
439
|
+
Dictionary with model information
|
|
440
|
+
"""
|
|
441
|
+
model = self._get_model_for_task(task_type)
|
|
442
|
+
return {
|
|
443
|
+
"task_type": task_type.value,
|
|
444
|
+
"model": model,
|
|
445
|
+
"base_url": self.config.get("ollama.base_url")
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
# ==================== Cognitive Architecture Integration ====================
|
|
449
|
+
|
|
450
|
+
def _init_cognitive_router(self) -> None:
|
|
451
|
+
"""Initialize the cognitive router (lazy initialization)"""
|
|
452
|
+
if not hasattr(self, '_cognitive_router'):
|
|
453
|
+
self._cognitive_router = None
|
|
454
|
+
self._intent_analyzer = None
|
|
455
|
+
|
|
456
|
+
if self._cognitive_router is None and COGNITIVE_ROUTER_AVAILABLE:
|
|
457
|
+
self._intent_analyzer = IntentAnalyzer(self)
|
|
458
|
+
self._cognitive_router = IntelligentRouter(self._intent_analyzer)
|
|
459
|
+
|
|
460
|
+
@property
|
|
461
|
+
def cognitive_router(self) -> Optional["IntelligentRouter"]:
|
|
462
|
+
"""Get the cognitive router (lazy initialization)"""
|
|
463
|
+
self._init_cognitive_router()
|
|
464
|
+
return self._cognitive_router
|
|
465
|
+
|
|
466
|
+
@property
|
|
467
|
+
def has_cognitive_routing(self) -> bool:
|
|
468
|
+
"""Check if cognitive routing is available"""
|
|
469
|
+
return COGNITIVE_ROUTER_AVAILABLE
|
|
470
|
+
|
|
471
|
+
def complete_with_routing(
|
|
472
|
+
self,
|
|
473
|
+
prompt: str,
|
|
474
|
+
context: Optional[Dict[str, Any]] = None,
|
|
475
|
+
system_prompt: Optional[str] = None,
|
|
476
|
+
temperature: float = 0.7,
|
|
477
|
+
max_tokens: Optional[int] = None,
|
|
478
|
+
stream: bool = False,
|
|
479
|
+
use_cognitive: bool = True
|
|
480
|
+
) -> str:
|
|
481
|
+
"""Complete with intelligent routing from cognitive architecture
|
|
482
|
+
|
|
483
|
+
This is the new primary method for completions that uses Layer 1
|
|
484
|
+
of the cognitive architecture for intelligent model selection.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
prompt: User's prompt
|
|
488
|
+
context: Context for routing decisions (cwd, recent_files, etc.)
|
|
489
|
+
system_prompt: Optional system prompt
|
|
490
|
+
temperature: Sampling temperature
|
|
491
|
+
max_tokens: Maximum tokens to generate
|
|
492
|
+
stream: Whether to stream the response
|
|
493
|
+
use_cognitive: Whether to use cognitive routing (falls back to basic if False)
|
|
494
|
+
|
|
495
|
+
Returns:
|
|
496
|
+
LLM response text
|
|
497
|
+
"""
|
|
498
|
+
# Try cognitive routing first
|
|
499
|
+
if use_cognitive and self.has_cognitive_routing:
|
|
500
|
+
self._init_cognitive_router()
|
|
501
|
+
|
|
502
|
+
# Get routing decision (sync version for now)
|
|
503
|
+
decision = self._cognitive_router.route_sync(prompt, context)
|
|
504
|
+
|
|
505
|
+
# Log routing decision
|
|
506
|
+
print(f"🧭 {decision.reasoning}")
|
|
507
|
+
|
|
508
|
+
# Use the model selected by cognitive router
|
|
509
|
+
model = decision.primary_model
|
|
510
|
+
|
|
511
|
+
# If council should be used, we'll handle that when Layer 3 is implemented
|
|
512
|
+
if decision.should_use_council:
|
|
513
|
+
print(f" 📋 Would use council with agents: {decision.agents_to_involve}")
|
|
514
|
+
# For now, continue with standard completion
|
|
515
|
+
# Council integration will come with Layer 3
|
|
516
|
+
|
|
517
|
+
# Build messages
|
|
518
|
+
messages = self._build_messages(prompt, system_prompt)
|
|
519
|
+
api_base = self.config.get("ollama.base_url")
|
|
520
|
+
|
|
521
|
+
try:
|
|
522
|
+
if stream:
|
|
523
|
+
return self._stream_completion_with_model(
|
|
524
|
+
model, messages, temperature, max_tokens, api_base, prompt
|
|
525
|
+
)
|
|
526
|
+
else:
|
|
527
|
+
response = completion(
|
|
528
|
+
model=model,
|
|
529
|
+
messages=messages,
|
|
530
|
+
temperature=temperature,
|
|
531
|
+
max_tokens=max_tokens,
|
|
532
|
+
api_base=api_base
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
response_text = response.choices[0].message.content
|
|
536
|
+
|
|
537
|
+
# Update conversation history
|
|
538
|
+
self.conversation_history.append({"role": "user", "content": prompt})
|
|
539
|
+
self.conversation_history.append({"role": "assistant", "content": response_text})
|
|
540
|
+
|
|
541
|
+
return response_text
|
|
542
|
+
|
|
543
|
+
except Exception as e:
|
|
544
|
+
# Fallback to decision's fallback model
|
|
545
|
+
if decision.fallback_model:
|
|
546
|
+
print(f"⚠️ Primary model failed, trying fallback: {decision.fallback_model}")
|
|
547
|
+
return self._complete_with_fallback(
|
|
548
|
+
decision.fallback_model, messages, temperature,
|
|
549
|
+
max_tokens, api_base, prompt, stream
|
|
550
|
+
)
|
|
551
|
+
raise
|
|
552
|
+
|
|
553
|
+
# Fallback to basic routing
|
|
554
|
+
return self.complete(
|
|
555
|
+
prompt=prompt,
|
|
556
|
+
task_type=None, # Auto-classify
|
|
557
|
+
system_prompt=system_prompt,
|
|
558
|
+
temperature=temperature,
|
|
559
|
+
max_tokens=max_tokens,
|
|
560
|
+
stream=stream
|
|
561
|
+
)
|
|
562
|
+
|
|
563
|
+
def _stream_completion_with_model(
|
|
564
|
+
self,
|
|
565
|
+
model: str,
|
|
566
|
+
messages: List[Dict[str, str]],
|
|
567
|
+
temperature: float,
|
|
568
|
+
max_tokens: Optional[int],
|
|
569
|
+
api_base: str,
|
|
570
|
+
original_prompt: str
|
|
571
|
+
) -> str:
|
|
572
|
+
"""Stream completion with specific model (for cognitive routing)"""
|
|
573
|
+
response = completion(
|
|
574
|
+
model=model,
|
|
575
|
+
messages=messages,
|
|
576
|
+
temperature=temperature,
|
|
577
|
+
max_tokens=max_tokens,
|
|
578
|
+
api_base=api_base,
|
|
579
|
+
stream=True
|
|
580
|
+
)
|
|
581
|
+
|
|
582
|
+
full_response = ""
|
|
583
|
+
for chunk in response:
|
|
584
|
+
if chunk.choices[0].delta.content:
|
|
585
|
+
content = chunk.choices[0].delta.content
|
|
586
|
+
full_response += content
|
|
587
|
+
sys.stdout.write(content)
|
|
588
|
+
sys.stdout.flush()
|
|
589
|
+
|
|
590
|
+
print() # Newline after streaming
|
|
591
|
+
|
|
592
|
+
self.conversation_history.append({"role": "user", "content": original_prompt})
|
|
593
|
+
self.conversation_history.append({"role": "assistant", "content": full_response})
|
|
594
|
+
|
|
595
|
+
return full_response
|
|
596
|
+
|
|
597
|
+
def _complete_with_fallback(
|
|
598
|
+
self,
|
|
599
|
+
model: str,
|
|
600
|
+
messages: List[Dict[str, str]],
|
|
601
|
+
temperature: float,
|
|
602
|
+
max_tokens: Optional[int],
|
|
603
|
+
api_base: str,
|
|
604
|
+
original_prompt: str,
|
|
605
|
+
stream: bool
|
|
606
|
+
) -> str:
|
|
607
|
+
"""Complete with a fallback model"""
|
|
608
|
+
if stream:
|
|
609
|
+
return self._stream_completion_with_model(
|
|
610
|
+
model, messages, temperature, max_tokens, api_base, original_prompt
|
|
611
|
+
)
|
|
612
|
+
|
|
613
|
+
response = completion(
|
|
614
|
+
model=model,
|
|
615
|
+
messages=messages,
|
|
616
|
+
temperature=temperature,
|
|
617
|
+
max_tokens=max_tokens,
|
|
618
|
+
api_base=api_base
|
|
619
|
+
)
|
|
620
|
+
|
|
621
|
+
response_text = response.choices[0].message.content
|
|
622
|
+
self.conversation_history.append({"role": "user", "content": original_prompt})
|
|
623
|
+
self.conversation_history.append({"role": "assistant", "content": response_text})
|
|
624
|
+
|
|
625
|
+
return response_text
|
|
626
|
+
|
|
627
|
+
def get_routing_stats(self) -> Dict[str, Any]:
|
|
628
|
+
"""Get statistics about cognitive routing decisions"""
|
|
629
|
+
if not self.has_cognitive_routing or self._cognitive_router is None:
|
|
630
|
+
return {"error": "Cognitive routing not available"}
|
|
631
|
+
return self._cognitive_router.get_routing_stats()
|
|
632
|
+
|
|
633
|
+
async def complete_async(
|
|
634
|
+
self,
|
|
635
|
+
prompt: str,
|
|
636
|
+
model: Optional[str] = None,
|
|
637
|
+
temperature: float = 0.7,
|
|
638
|
+
max_tokens: Optional[int] = None
|
|
639
|
+
) -> str:
|
|
640
|
+
"""Async completion for cognitive router use
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
prompt: The prompt
|
|
644
|
+
model: Specific model to use
|
|
645
|
+
temperature: Sampling temperature
|
|
646
|
+
max_tokens: Maximum tokens
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
Response text
|
|
650
|
+
"""
|
|
651
|
+
if model is None:
|
|
652
|
+
model = "ollama/qwen2.5:32b"
|
|
653
|
+
|
|
654
|
+
messages = [{"role": "user", "content": prompt}]
|
|
655
|
+
api_base = self.config.get("ollama.base_url")
|
|
656
|
+
|
|
657
|
+
try:
|
|
658
|
+
response = await acompletion(
|
|
659
|
+
model=model,
|
|
660
|
+
messages=messages,
|
|
661
|
+
temperature=temperature,
|
|
662
|
+
max_tokens=max_tokens,
|
|
663
|
+
api_base=api_base
|
|
664
|
+
)
|
|
665
|
+
return response.choices[0].message.content
|
|
666
|
+
except Exception as e:
|
|
667
|
+
raise RuntimeError(f"Async completion failed: {e}")
|