nc1709 1.15.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. nc1709/__init__.py +13 -0
  2. nc1709/agent/__init__.py +36 -0
  3. nc1709/agent/core.py +505 -0
  4. nc1709/agent/mcp_bridge.py +245 -0
  5. nc1709/agent/permissions.py +298 -0
  6. nc1709/agent/tools/__init__.py +21 -0
  7. nc1709/agent/tools/base.py +440 -0
  8. nc1709/agent/tools/bash_tool.py +367 -0
  9. nc1709/agent/tools/file_tools.py +454 -0
  10. nc1709/agent/tools/notebook_tools.py +516 -0
  11. nc1709/agent/tools/search_tools.py +322 -0
  12. nc1709/agent/tools/task_tool.py +284 -0
  13. nc1709/agent/tools/web_tools.py +555 -0
  14. nc1709/agents/__init__.py +17 -0
  15. nc1709/agents/auto_fix.py +506 -0
  16. nc1709/agents/test_generator.py +507 -0
  17. nc1709/checkpoints.py +372 -0
  18. nc1709/cli.py +3380 -0
  19. nc1709/cli_ui.py +1080 -0
  20. nc1709/cognitive/__init__.py +149 -0
  21. nc1709/cognitive/anticipation.py +594 -0
  22. nc1709/cognitive/context_engine.py +1046 -0
  23. nc1709/cognitive/council.py +824 -0
  24. nc1709/cognitive/learning.py +761 -0
  25. nc1709/cognitive/router.py +583 -0
  26. nc1709/cognitive/system.py +519 -0
  27. nc1709/config.py +155 -0
  28. nc1709/custom_commands.py +300 -0
  29. nc1709/executor.py +333 -0
  30. nc1709/file_controller.py +354 -0
  31. nc1709/git_integration.py +308 -0
  32. nc1709/github_integration.py +477 -0
  33. nc1709/image_input.py +446 -0
  34. nc1709/linting.py +519 -0
  35. nc1709/llm_adapter.py +667 -0
  36. nc1709/logger.py +192 -0
  37. nc1709/mcp/__init__.py +18 -0
  38. nc1709/mcp/client.py +370 -0
  39. nc1709/mcp/manager.py +407 -0
  40. nc1709/mcp/protocol.py +210 -0
  41. nc1709/mcp/server.py +473 -0
  42. nc1709/memory/__init__.py +20 -0
  43. nc1709/memory/embeddings.py +325 -0
  44. nc1709/memory/indexer.py +474 -0
  45. nc1709/memory/sessions.py +432 -0
  46. nc1709/memory/vector_store.py +451 -0
  47. nc1709/models/__init__.py +86 -0
  48. nc1709/models/detector.py +377 -0
  49. nc1709/models/formats.py +315 -0
  50. nc1709/models/manager.py +438 -0
  51. nc1709/models/registry.py +497 -0
  52. nc1709/performance/__init__.py +343 -0
  53. nc1709/performance/cache.py +705 -0
  54. nc1709/performance/pipeline.py +611 -0
  55. nc1709/performance/tiering.py +543 -0
  56. nc1709/plan_mode.py +362 -0
  57. nc1709/plugins/__init__.py +17 -0
  58. nc1709/plugins/agents/__init__.py +18 -0
  59. nc1709/plugins/agents/django_agent.py +912 -0
  60. nc1709/plugins/agents/docker_agent.py +623 -0
  61. nc1709/plugins/agents/fastapi_agent.py +887 -0
  62. nc1709/plugins/agents/git_agent.py +731 -0
  63. nc1709/plugins/agents/nextjs_agent.py +867 -0
  64. nc1709/plugins/base.py +359 -0
  65. nc1709/plugins/manager.py +411 -0
  66. nc1709/plugins/registry.py +337 -0
  67. nc1709/progress.py +443 -0
  68. nc1709/prompts/__init__.py +22 -0
  69. nc1709/prompts/agent_system.py +180 -0
  70. nc1709/prompts/task_prompts.py +340 -0
  71. nc1709/prompts/unified_prompt.py +133 -0
  72. nc1709/reasoning_engine.py +541 -0
  73. nc1709/remote_client.py +266 -0
  74. nc1709/shell_completions.py +349 -0
  75. nc1709/slash_commands.py +649 -0
  76. nc1709/task_classifier.py +408 -0
  77. nc1709/version_check.py +177 -0
  78. nc1709/web/__init__.py +8 -0
  79. nc1709/web/server.py +950 -0
  80. nc1709/web/templates/index.html +1127 -0
  81. nc1709-1.15.4.dist-info/METADATA +858 -0
  82. nc1709-1.15.4.dist-info/RECORD +86 -0
  83. nc1709-1.15.4.dist-info/WHEEL +5 -0
  84. nc1709-1.15.4.dist-info/entry_points.txt +2 -0
  85. nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
  86. nc1709-1.15.4.dist-info/top_level.txt +1 -0
nc1709/llm_adapter.py ADDED
@@ -0,0 +1,667 @@
1
+ """
2
+ LLM Adapter with Multi-Model Orchestration
3
+ Handles communication with local LLMs via Ollama using LiteLLM for routing
4
+ """
5
+ import sys
6
+ import time
7
+ from typing import List, Dict, Any, Optional, Union
8
+ from enum import Enum
9
+
10
+ try:
11
+ from litellm import completion, acompletion
12
+ LITELLM_AVAILABLE = True
13
+ except ImportError:
14
+ LITELLM_AVAILABLE = False
15
+ print("Warning: litellm not installed. Install with: pip install litellm")
16
+
17
+ from .config import get_config
18
+
19
+ # Import cognitive router (Layer 1 of Cognitive Architecture)
20
+ try:
21
+ from .cognitive.router import IntentAnalyzer, IntelligentRouter, RoutingDecision
22
+ COGNITIVE_ROUTER_AVAILABLE = True
23
+ except ImportError:
24
+ COGNITIVE_ROUTER_AVAILABLE = False
25
+
26
+
27
+ class TaskType(Enum):
28
+ """Types of tasks for model routing"""
29
+ REASONING = "reasoning" # Complex logic, planning, math
30
+ CODING = "coding" # Code generation, debugging
31
+ TOOLS = "tools" # File ops, command execution
32
+ GENERAL = "general" # Q&A, explanations
33
+ FAST = "fast" # Quick responses
34
+
35
+
36
+ class TaskClassifier:
37
+ """Classifies user requests to determine appropriate model"""
38
+
39
+ # Keywords that indicate specific task types
40
+ REASONING_KEYWORDS = [
41
+ "prove", "reason", "explain why", "logic", "analyze", "plan",
42
+ "design", "architecture", "strategy", "solve", "calculate",
43
+ "mathematical", "theorem", "proof", "deduce"
44
+ ]
45
+
46
+ CODING_KEYWORDS = [
47
+ "code", "function", "class", "debug", "refactor", "implement",
48
+ "write a", "create a", "build a", "fix", "bug", "error",
49
+ "optimize", "algorithm", "program", "script", "api"
50
+ ]
51
+
52
+ TOOLS_KEYWORDS = [
53
+ "create file", "write file", "read file", "delete file",
54
+ "run command", "execute", "install", "git", "docker",
55
+ "npm", "pip", "deploy", "build", "compile"
56
+ ]
57
+
58
+ FAST_KEYWORDS = [
59
+ "quick", "simple", "just", "only", "briefly", "short"
60
+ ]
61
+
62
+ @classmethod
63
+ def classify(cls, prompt: str) -> TaskType:
64
+ """Classify a prompt to determine task type
65
+
66
+ Args:
67
+ prompt: User's input prompt
68
+
69
+ Returns:
70
+ TaskType enum value
71
+ """
72
+ prompt_lower = prompt.lower()
73
+
74
+ # Count keyword matches for each category
75
+ reasoning_score = sum(1 for kw in cls.REASONING_KEYWORDS if kw in prompt_lower)
76
+ coding_score = sum(1 for kw in cls.CODING_KEYWORDS if kw in prompt_lower)
77
+ tools_score = sum(1 for kw in cls.TOOLS_KEYWORDS if kw in prompt_lower)
78
+ fast_score = sum(1 for kw in cls.FAST_KEYWORDS if kw in prompt_lower)
79
+
80
+ # Determine task type based on scores
81
+ if fast_score > 0 and len(prompt.split()) < 20:
82
+ return TaskType.FAST
83
+
84
+ if tools_score > 0:
85
+ return TaskType.TOOLS
86
+
87
+ if coding_score > reasoning_score and coding_score > 0:
88
+ return TaskType.CODING
89
+
90
+ if reasoning_score > 0:
91
+ return TaskType.REASONING
92
+
93
+ return TaskType.GENERAL
94
+
95
+ @classmethod
96
+ def classify_with_context(cls, prompt: str, context: Optional[Dict[str, Any]] = None) -> TaskType:
97
+ """Classify with additional context
98
+
99
+ Args:
100
+ prompt: User's input prompt
101
+ context: Additional context (e.g., file paths, previous task type)
102
+
103
+ Returns:
104
+ TaskType enum value
105
+ """
106
+ # Basic classification
107
+ task_type = cls.classify(prompt)
108
+
109
+ # Refine based on context
110
+ if context:
111
+ # If we're in a coding session, prefer coding model
112
+ if context.get("previous_task") == TaskType.CODING and task_type == TaskType.GENERAL:
113
+ task_type = TaskType.CODING
114
+
115
+ # If file paths are mentioned, likely tools/coding
116
+ if context.get("file_paths") and task_type == TaskType.GENERAL:
117
+ task_type = TaskType.CODING
118
+
119
+ return task_type
120
+
121
+
122
+ class LLMAdapter:
123
+ """Adapter for interacting with LLMs via LiteLLM and Ollama"""
124
+
125
+ def __init__(self, skip_health_check: bool = False):
126
+ """Initialize the LLM adapter
127
+
128
+ Args:
129
+ skip_health_check: Skip Ollama connectivity check (useful for testing)
130
+ """
131
+ self.config = get_config()
132
+ self.conversation_history: List[Dict[str, str]] = []
133
+ self._ollama_available = False
134
+
135
+ if not LITELLM_AVAILABLE:
136
+ raise ImportError(
137
+ "LiteLLM is required but not installed. "
138
+ "Install with: pip install litellm"
139
+ )
140
+
141
+ if not skip_health_check:
142
+ self._check_ollama_connection()
143
+
144
+ def _check_ollama_connection(self) -> bool:
145
+ """Check if Ollama is running and accessible
146
+
147
+ Returns:
148
+ True if Ollama is available
149
+ """
150
+ import urllib.request
151
+ import urllib.error
152
+
153
+ base_url = self.config.get("ollama.base_url", "http://localhost:11434")
154
+ health_url = f"{base_url}/api/tags"
155
+
156
+ try:
157
+ with urllib.request.urlopen(health_url, timeout=5) as response:
158
+ if response.status == 200:
159
+ self._ollama_available = True
160
+ return True
161
+ except urllib.error.URLError as e:
162
+ print(f"⚠️ Warning: Cannot connect to Ollama at {base_url}")
163
+ print(f" Error: {e.reason}")
164
+ print(f" Please ensure Ollama is running: ollama serve")
165
+ self._ollama_available = False
166
+ except Exception as e:
167
+ print(f"⚠️ Warning: Ollama health check failed: {e}")
168
+ self._ollama_available = False
169
+
170
+ return self._ollama_available
171
+
172
+ def is_ollama_available(self) -> bool:
173
+ """Check if Ollama is available
174
+
175
+ Returns:
176
+ True if Ollama was reachable during init or last check
177
+ """
178
+ return self._ollama_available
179
+
180
+ def _get_model_for_task(self, task_type: TaskType) -> str:
181
+ """Get the appropriate model for a task type
182
+
183
+ Args:
184
+ task_type: Type of task
185
+
186
+ Returns:
187
+ Model identifier for Ollama
188
+ """
189
+ model_name = self.config.get_model_for_task(task_type.value)
190
+ return f"ollama/{model_name}"
191
+
192
+ def _build_messages(self, prompt: str, system_prompt: Optional[str] = None) -> List[Dict[str, str]]:
193
+ """Build message list for the LLM
194
+
195
+ Args:
196
+ prompt: User's prompt
197
+ system_prompt: Optional system prompt
198
+
199
+ Returns:
200
+ List of message dictionaries
201
+ """
202
+ messages = []
203
+
204
+ if system_prompt:
205
+ messages.append({"role": "system", "content": system_prompt})
206
+
207
+ # Add conversation history (last N messages)
208
+ max_history = self.config.get("memory.conversation_history", 10)
209
+ messages.extend(self.conversation_history[-max_history:])
210
+
211
+ # Add current prompt
212
+ messages.append({"role": "user", "content": prompt})
213
+
214
+ return messages
215
+
216
+ def complete(
217
+ self,
218
+ prompt: str,
219
+ task_type: Optional[TaskType] = None,
220
+ system_prompt: Optional[str] = None,
221
+ temperature: float = 0.7,
222
+ max_tokens: Optional[int] = None,
223
+ stream: bool = False
224
+ ) -> str:
225
+ """Get completion from LLM
226
+
227
+ Args:
228
+ prompt: User's prompt
229
+ task_type: Type of task (auto-detected if None)
230
+ system_prompt: Optional system prompt
231
+ temperature: Sampling temperature (0.0 to 1.0)
232
+ max_tokens: Maximum tokens to generate
233
+ stream: Whether to stream the response
234
+
235
+ Returns:
236
+ LLM response text
237
+ """
238
+ # Auto-classify if task type not provided
239
+ if task_type is None:
240
+ task_type = TaskClassifier.classify(prompt)
241
+
242
+ # Get appropriate model
243
+ model = self._get_model_for_task(task_type)
244
+
245
+ # Build messages
246
+ messages = self._build_messages(prompt, system_prompt)
247
+
248
+ # Get Ollama base URL and retry config
249
+ api_base = self.config.get("ollama.base_url")
250
+ max_retries = self.config.get("execution.max_retries", 3)
251
+
252
+ last_error = None
253
+ for attempt in range(max_retries):
254
+ try:
255
+ if stream:
256
+ return self._stream_completion(model, messages, temperature, max_tokens, api_base, prompt)
257
+ else:
258
+ response = completion(
259
+ model=model,
260
+ messages=messages,
261
+ temperature=temperature,
262
+ max_tokens=max_tokens,
263
+ api_base=api_base
264
+ )
265
+
266
+ response_text = response.choices[0].message.content
267
+
268
+ # Update conversation history
269
+ self.conversation_history.append({"role": "user", "content": prompt})
270
+ self.conversation_history.append({"role": "assistant", "content": response_text})
271
+
272
+ return response_text
273
+
274
+ except Exception as e:
275
+ last_error = e
276
+ if attempt < max_retries - 1:
277
+ wait_time = 2 ** attempt # Exponential backoff: 1, 2, 4 seconds
278
+ print(f"⚠️ LLM request failed (attempt {attempt + 1}/{max_retries}): {e}")
279
+ print(f" Retrying in {wait_time} seconds...")
280
+ time.sleep(wait_time)
281
+ continue
282
+
283
+ raise RuntimeError(f"LLM completion failed after {max_retries} attempts: {last_error}")
284
+
285
+ def _stream_completion(
286
+ self,
287
+ model: str,
288
+ messages: List[Dict[str, str]],
289
+ temperature: float,
290
+ max_tokens: Optional[int],
291
+ api_base: str,
292
+ original_prompt: str
293
+ ) -> str:
294
+ """Stream completion from LLM and print chunks in real-time
295
+
296
+ Args:
297
+ model: Model identifier
298
+ messages: Message list
299
+ temperature: Sampling temperature
300
+ max_tokens: Maximum tokens
301
+ api_base: Ollama base URL
302
+ original_prompt: Original user prompt for history
303
+
304
+ Returns:
305
+ Complete response text
306
+ """
307
+ response = completion(
308
+ model=model,
309
+ messages=messages,
310
+ temperature=temperature,
311
+ max_tokens=max_tokens,
312
+ api_base=api_base,
313
+ stream=True
314
+ )
315
+
316
+ full_response = ""
317
+ for chunk in response:
318
+ if chunk.choices[0].delta.content:
319
+ content = chunk.choices[0].delta.content
320
+ full_response += content
321
+ # Print chunk in real-time
322
+ sys.stdout.write(content)
323
+ sys.stdout.flush()
324
+
325
+ # Print newline after streaming completes
326
+ print()
327
+
328
+ # Update conversation history after streaming completes
329
+ self.conversation_history.append({"role": "user", "content": original_prompt})
330
+ self.conversation_history.append({"role": "assistant", "content": full_response})
331
+
332
+ return full_response
333
+
334
+ def complete_with_tools(
335
+ self,
336
+ prompt: str,
337
+ tools: List[Dict[str, Any]],
338
+ task_type: Optional[TaskType] = None
339
+ ) -> Dict[str, Any]:
340
+ """Get completion with tool/function calling support
341
+
342
+ Args:
343
+ prompt: User's prompt
344
+ tools: List of available tools (function definitions)
345
+ task_type: Type of task (auto-detected if None)
346
+
347
+ Returns:
348
+ Response with potential tool calls
349
+ """
350
+ # Use tools-optimized model
351
+ if task_type is None:
352
+ task_type = TaskType.TOOLS
353
+
354
+ model = self._get_model_for_task(task_type)
355
+ messages = self._build_messages(prompt)
356
+ api_base = self.config.get("ollama.base_url")
357
+
358
+ try:
359
+ response = completion(
360
+ model=model,
361
+ messages=messages,
362
+ tools=tools,
363
+ api_base=api_base
364
+ )
365
+
366
+ return {
367
+ "content": response.choices[0].message.content,
368
+ "tool_calls": getattr(response.choices[0].message, "tool_calls", None)
369
+ }
370
+
371
+ except Exception as e:
372
+ raise RuntimeError(f"LLM completion with tools failed: {e}")
373
+
374
+ def chat(
375
+ self,
376
+ messages: List[Dict[str, str]],
377
+ task_type: Optional[TaskType] = None,
378
+ temperature: float = 0.7,
379
+ max_tokens: Optional[int] = None,
380
+ ) -> str:
381
+ """Chat with the LLM using a message list (for agent use)
382
+
383
+ This method is designed for the Agent class which manages its own
384
+ conversation history and passes complete message lists.
385
+
386
+ Args:
387
+ messages: List of message dicts with 'role' and 'content' keys
388
+ task_type: Type of task (defaults to TOOLS for agent use)
389
+ temperature: Sampling temperature
390
+ max_tokens: Maximum tokens to generate
391
+
392
+ Returns:
393
+ LLM response text
394
+ """
395
+ # Default to TOOLS task type for agent operations
396
+ if task_type is None:
397
+ task_type = TaskType.TOOLS
398
+
399
+ # Get appropriate model
400
+ model = self._get_model_for_task(task_type)
401
+ api_base = self.config.get("ollama.base_url")
402
+ max_retries = self.config.get("execution.max_retries", 3)
403
+
404
+ last_error = None
405
+ for attempt in range(max_retries):
406
+ try:
407
+ response = completion(
408
+ model=model,
409
+ messages=messages,
410
+ temperature=temperature,
411
+ max_tokens=max_tokens,
412
+ api_base=api_base
413
+ )
414
+
415
+ return response.choices[0].message.content
416
+
417
+ except Exception as e:
418
+ last_error = e
419
+ if attempt < max_retries - 1:
420
+ wait_time = 2 ** attempt
421
+ print(f"⚠️ LLM request failed (attempt {attempt + 1}/{max_retries}): {e}")
422
+ print(f" Retrying in {wait_time} seconds...")
423
+ time.sleep(wait_time)
424
+ continue
425
+
426
+ raise RuntimeError(f"LLM chat failed after {max_retries} attempts: {last_error}")
427
+
428
+ def clear_history(self) -> None:
429
+ """Clear conversation history"""
430
+ self.conversation_history = []
431
+
432
+ def get_model_info(self, task_type: TaskType) -> Dict[str, str]:
433
+ """Get information about the model used for a task type
434
+
435
+ Args:
436
+ task_type: Type of task
437
+
438
+ Returns:
439
+ Dictionary with model information
440
+ """
441
+ model = self._get_model_for_task(task_type)
442
+ return {
443
+ "task_type": task_type.value,
444
+ "model": model,
445
+ "base_url": self.config.get("ollama.base_url")
446
+ }
447
+
448
+ # ==================== Cognitive Architecture Integration ====================
449
+
450
+ def _init_cognitive_router(self) -> None:
451
+ """Initialize the cognitive router (lazy initialization)"""
452
+ if not hasattr(self, '_cognitive_router'):
453
+ self._cognitive_router = None
454
+ self._intent_analyzer = None
455
+
456
+ if self._cognitive_router is None and COGNITIVE_ROUTER_AVAILABLE:
457
+ self._intent_analyzer = IntentAnalyzer(self)
458
+ self._cognitive_router = IntelligentRouter(self._intent_analyzer)
459
+
460
+ @property
461
+ def cognitive_router(self) -> Optional["IntelligentRouter"]:
462
+ """Get the cognitive router (lazy initialization)"""
463
+ self._init_cognitive_router()
464
+ return self._cognitive_router
465
+
466
+ @property
467
+ def has_cognitive_routing(self) -> bool:
468
+ """Check if cognitive routing is available"""
469
+ return COGNITIVE_ROUTER_AVAILABLE
470
+
471
+ def complete_with_routing(
472
+ self,
473
+ prompt: str,
474
+ context: Optional[Dict[str, Any]] = None,
475
+ system_prompt: Optional[str] = None,
476
+ temperature: float = 0.7,
477
+ max_tokens: Optional[int] = None,
478
+ stream: bool = False,
479
+ use_cognitive: bool = True
480
+ ) -> str:
481
+ """Complete with intelligent routing from cognitive architecture
482
+
483
+ This is the new primary method for completions that uses Layer 1
484
+ of the cognitive architecture for intelligent model selection.
485
+
486
+ Args:
487
+ prompt: User's prompt
488
+ context: Context for routing decisions (cwd, recent_files, etc.)
489
+ system_prompt: Optional system prompt
490
+ temperature: Sampling temperature
491
+ max_tokens: Maximum tokens to generate
492
+ stream: Whether to stream the response
493
+ use_cognitive: Whether to use cognitive routing (falls back to basic if False)
494
+
495
+ Returns:
496
+ LLM response text
497
+ """
498
+ # Try cognitive routing first
499
+ if use_cognitive and self.has_cognitive_routing:
500
+ self._init_cognitive_router()
501
+
502
+ # Get routing decision (sync version for now)
503
+ decision = self._cognitive_router.route_sync(prompt, context)
504
+
505
+ # Log routing decision
506
+ print(f"🧭 {decision.reasoning}")
507
+
508
+ # Use the model selected by cognitive router
509
+ model = decision.primary_model
510
+
511
+ # If council should be used, we'll handle that when Layer 3 is implemented
512
+ if decision.should_use_council:
513
+ print(f" 📋 Would use council with agents: {decision.agents_to_involve}")
514
+ # For now, continue with standard completion
515
+ # Council integration will come with Layer 3
516
+
517
+ # Build messages
518
+ messages = self._build_messages(prompt, system_prompt)
519
+ api_base = self.config.get("ollama.base_url")
520
+
521
+ try:
522
+ if stream:
523
+ return self._stream_completion_with_model(
524
+ model, messages, temperature, max_tokens, api_base, prompt
525
+ )
526
+ else:
527
+ response = completion(
528
+ model=model,
529
+ messages=messages,
530
+ temperature=temperature,
531
+ max_tokens=max_tokens,
532
+ api_base=api_base
533
+ )
534
+
535
+ response_text = response.choices[0].message.content
536
+
537
+ # Update conversation history
538
+ self.conversation_history.append({"role": "user", "content": prompt})
539
+ self.conversation_history.append({"role": "assistant", "content": response_text})
540
+
541
+ return response_text
542
+
543
+ except Exception as e:
544
+ # Fallback to decision's fallback model
545
+ if decision.fallback_model:
546
+ print(f"⚠️ Primary model failed, trying fallback: {decision.fallback_model}")
547
+ return self._complete_with_fallback(
548
+ decision.fallback_model, messages, temperature,
549
+ max_tokens, api_base, prompt, stream
550
+ )
551
+ raise
552
+
553
+ # Fallback to basic routing
554
+ return self.complete(
555
+ prompt=prompt,
556
+ task_type=None, # Auto-classify
557
+ system_prompt=system_prompt,
558
+ temperature=temperature,
559
+ max_tokens=max_tokens,
560
+ stream=stream
561
+ )
562
+
563
+ def _stream_completion_with_model(
564
+ self,
565
+ model: str,
566
+ messages: List[Dict[str, str]],
567
+ temperature: float,
568
+ max_tokens: Optional[int],
569
+ api_base: str,
570
+ original_prompt: str
571
+ ) -> str:
572
+ """Stream completion with specific model (for cognitive routing)"""
573
+ response = completion(
574
+ model=model,
575
+ messages=messages,
576
+ temperature=temperature,
577
+ max_tokens=max_tokens,
578
+ api_base=api_base,
579
+ stream=True
580
+ )
581
+
582
+ full_response = ""
583
+ for chunk in response:
584
+ if chunk.choices[0].delta.content:
585
+ content = chunk.choices[0].delta.content
586
+ full_response += content
587
+ sys.stdout.write(content)
588
+ sys.stdout.flush()
589
+
590
+ print() # Newline after streaming
591
+
592
+ self.conversation_history.append({"role": "user", "content": original_prompt})
593
+ self.conversation_history.append({"role": "assistant", "content": full_response})
594
+
595
+ return full_response
596
+
597
+ def _complete_with_fallback(
598
+ self,
599
+ model: str,
600
+ messages: List[Dict[str, str]],
601
+ temperature: float,
602
+ max_tokens: Optional[int],
603
+ api_base: str,
604
+ original_prompt: str,
605
+ stream: bool
606
+ ) -> str:
607
+ """Complete with a fallback model"""
608
+ if stream:
609
+ return self._stream_completion_with_model(
610
+ model, messages, temperature, max_tokens, api_base, original_prompt
611
+ )
612
+
613
+ response = completion(
614
+ model=model,
615
+ messages=messages,
616
+ temperature=temperature,
617
+ max_tokens=max_tokens,
618
+ api_base=api_base
619
+ )
620
+
621
+ response_text = response.choices[0].message.content
622
+ self.conversation_history.append({"role": "user", "content": original_prompt})
623
+ self.conversation_history.append({"role": "assistant", "content": response_text})
624
+
625
+ return response_text
626
+
627
+ def get_routing_stats(self) -> Dict[str, Any]:
628
+ """Get statistics about cognitive routing decisions"""
629
+ if not self.has_cognitive_routing or self._cognitive_router is None:
630
+ return {"error": "Cognitive routing not available"}
631
+ return self._cognitive_router.get_routing_stats()
632
+
633
+ async def complete_async(
634
+ self,
635
+ prompt: str,
636
+ model: Optional[str] = None,
637
+ temperature: float = 0.7,
638
+ max_tokens: Optional[int] = None
639
+ ) -> str:
640
+ """Async completion for cognitive router use
641
+
642
+ Args:
643
+ prompt: The prompt
644
+ model: Specific model to use
645
+ temperature: Sampling temperature
646
+ max_tokens: Maximum tokens
647
+
648
+ Returns:
649
+ Response text
650
+ """
651
+ if model is None:
652
+ model = "ollama/qwen2.5:32b"
653
+
654
+ messages = [{"role": "user", "content": prompt}]
655
+ api_base = self.config.get("ollama.base_url")
656
+
657
+ try:
658
+ response = await acompletion(
659
+ model=model,
660
+ messages=messages,
661
+ temperature=temperature,
662
+ max_tokens=max_tokens,
663
+ api_base=api_base
664
+ )
665
+ return response.choices[0].message.content
666
+ except Exception as e:
667
+ raise RuntimeError(f"Async completion failed: {e}")