nc1709 1.15.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nc1709/__init__.py +13 -0
- nc1709/agent/__init__.py +36 -0
- nc1709/agent/core.py +505 -0
- nc1709/agent/mcp_bridge.py +245 -0
- nc1709/agent/permissions.py +298 -0
- nc1709/agent/tools/__init__.py +21 -0
- nc1709/agent/tools/base.py +440 -0
- nc1709/agent/tools/bash_tool.py +367 -0
- nc1709/agent/tools/file_tools.py +454 -0
- nc1709/agent/tools/notebook_tools.py +516 -0
- nc1709/agent/tools/search_tools.py +322 -0
- nc1709/agent/tools/task_tool.py +284 -0
- nc1709/agent/tools/web_tools.py +555 -0
- nc1709/agents/__init__.py +17 -0
- nc1709/agents/auto_fix.py +506 -0
- nc1709/agents/test_generator.py +507 -0
- nc1709/checkpoints.py +372 -0
- nc1709/cli.py +3380 -0
- nc1709/cli_ui.py +1080 -0
- nc1709/cognitive/__init__.py +149 -0
- nc1709/cognitive/anticipation.py +594 -0
- nc1709/cognitive/context_engine.py +1046 -0
- nc1709/cognitive/council.py +824 -0
- nc1709/cognitive/learning.py +761 -0
- nc1709/cognitive/router.py +583 -0
- nc1709/cognitive/system.py +519 -0
- nc1709/config.py +155 -0
- nc1709/custom_commands.py +300 -0
- nc1709/executor.py +333 -0
- nc1709/file_controller.py +354 -0
- nc1709/git_integration.py +308 -0
- nc1709/github_integration.py +477 -0
- nc1709/image_input.py +446 -0
- nc1709/linting.py +519 -0
- nc1709/llm_adapter.py +667 -0
- nc1709/logger.py +192 -0
- nc1709/mcp/__init__.py +18 -0
- nc1709/mcp/client.py +370 -0
- nc1709/mcp/manager.py +407 -0
- nc1709/mcp/protocol.py +210 -0
- nc1709/mcp/server.py +473 -0
- nc1709/memory/__init__.py +20 -0
- nc1709/memory/embeddings.py +325 -0
- nc1709/memory/indexer.py +474 -0
- nc1709/memory/sessions.py +432 -0
- nc1709/memory/vector_store.py +451 -0
- nc1709/models/__init__.py +86 -0
- nc1709/models/detector.py +377 -0
- nc1709/models/formats.py +315 -0
- nc1709/models/manager.py +438 -0
- nc1709/models/registry.py +497 -0
- nc1709/performance/__init__.py +343 -0
- nc1709/performance/cache.py +705 -0
- nc1709/performance/pipeline.py +611 -0
- nc1709/performance/tiering.py +543 -0
- nc1709/plan_mode.py +362 -0
- nc1709/plugins/__init__.py +17 -0
- nc1709/plugins/agents/__init__.py +18 -0
- nc1709/plugins/agents/django_agent.py +912 -0
- nc1709/plugins/agents/docker_agent.py +623 -0
- nc1709/plugins/agents/fastapi_agent.py +887 -0
- nc1709/plugins/agents/git_agent.py +731 -0
- nc1709/plugins/agents/nextjs_agent.py +867 -0
- nc1709/plugins/base.py +359 -0
- nc1709/plugins/manager.py +411 -0
- nc1709/plugins/registry.py +337 -0
- nc1709/progress.py +443 -0
- nc1709/prompts/__init__.py +22 -0
- nc1709/prompts/agent_system.py +180 -0
- nc1709/prompts/task_prompts.py +340 -0
- nc1709/prompts/unified_prompt.py +133 -0
- nc1709/reasoning_engine.py +541 -0
- nc1709/remote_client.py +266 -0
- nc1709/shell_completions.py +349 -0
- nc1709/slash_commands.py +649 -0
- nc1709/task_classifier.py +408 -0
- nc1709/version_check.py +177 -0
- nc1709/web/__init__.py +8 -0
- nc1709/web/server.py +950 -0
- nc1709/web/templates/index.html +1127 -0
- nc1709-1.15.4.dist-info/METADATA +858 -0
- nc1709-1.15.4.dist-info/RECORD +86 -0
- nc1709-1.15.4.dist-info/WHEEL +5 -0
- nc1709-1.15.4.dist-info/entry_points.txt +2 -0
- nc1709-1.15.4.dist-info/licenses/LICENSE +9 -0
- nc1709-1.15.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,543 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NC1709 Performance - Smart Model Tiering
|
|
3
|
+
|
|
4
|
+
Routes requests to the smallest model capable of handling them.
|
|
5
|
+
Dramatically reduces latency for simple queries while maintaining
|
|
6
|
+
quality for complex tasks.
|
|
7
|
+
|
|
8
|
+
Tiers:
|
|
9
|
+
- Tier 1: Instant (3B) - Simple questions, quick answers (~300ms)
|
|
10
|
+
- Tier 2: Fast (7B) - Standard coding tasks (~1-2s)
|
|
11
|
+
- Tier 3: Smart (32B) - Complex tasks (~3-5s)
|
|
12
|
+
- Tier 4: Council - Multi-agent for very complex tasks (~10s+)
|
|
13
|
+
|
|
14
|
+
Integration with Model Registry:
|
|
15
|
+
- Uses nc1709.models to get model names and specs
|
|
16
|
+
- Falls back to hardcoded defaults if registry not available
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
import logging
|
|
20
|
+
from dataclasses import dataclass, field
|
|
21
|
+
from typing import Dict, List, Optional, Any, Tuple
|
|
22
|
+
from enum import Enum
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _get_model_from_registry(task: str, default: str) -> str:
|
|
28
|
+
"""
|
|
29
|
+
Get model name from registry, with fallback.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
task: Task name (instant, fast, coding, etc.)
|
|
33
|
+
default: Default model if registry unavailable
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Model name in ollama/name format
|
|
37
|
+
"""
|
|
38
|
+
try:
|
|
39
|
+
from nc1709.models import get_best_model_for_task, get_model_spec
|
|
40
|
+
|
|
41
|
+
# Try to get best model for task
|
|
42
|
+
spec = get_best_model_for_task(task)
|
|
43
|
+
if spec:
|
|
44
|
+
return f"ollama/{spec.ollama_name}"
|
|
45
|
+
|
|
46
|
+
# Fallback to default
|
|
47
|
+
return default
|
|
48
|
+
except ImportError:
|
|
49
|
+
# Registry not available
|
|
50
|
+
return default
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class ModelTier(Enum):
|
|
54
|
+
"""Model tiers by capability/speed trade-off"""
|
|
55
|
+
INSTANT = "instant" # Tier 1: 3B models
|
|
56
|
+
FAST = "fast" # Tier 2: 7B models
|
|
57
|
+
SMART = "smart" # Tier 3: 32B models
|
|
58
|
+
COUNCIL = "council" # Tier 4: Multi-agent
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def level(self) -> int:
|
|
62
|
+
"""Numeric level for comparison"""
|
|
63
|
+
return {
|
|
64
|
+
ModelTier.INSTANT: 1,
|
|
65
|
+
ModelTier.FAST: 2,
|
|
66
|
+
ModelTier.SMART: 3,
|
|
67
|
+
ModelTier.COUNCIL: 4,
|
|
68
|
+
}[self]
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class TierConfig:
|
|
73
|
+
"""Configuration for a model tier"""
|
|
74
|
+
model: str
|
|
75
|
+
max_tokens: int
|
|
76
|
+
timeout_seconds: int
|
|
77
|
+
cost_factor: float # Relative cost (1.0 = baseline)
|
|
78
|
+
|
|
79
|
+
# Task types this tier can handle
|
|
80
|
+
supported_complexity: float # Max complexity (0.0-1.0)
|
|
81
|
+
supported_categories: List[str]
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class TieringDecision:
|
|
86
|
+
"""Result of tiering decision"""
|
|
87
|
+
tier: ModelTier
|
|
88
|
+
model: str
|
|
89
|
+
reasoning: str
|
|
90
|
+
confidence: float
|
|
91
|
+
fallback_tier: Optional[ModelTier] = None
|
|
92
|
+
fallback_model: Optional[str] = None
|
|
93
|
+
estimated_latency_ms: int = 0
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class TieringStats:
|
|
98
|
+
"""Statistics for model tiering"""
|
|
99
|
+
tier_usage: Dict[str, int] = field(default_factory=lambda: {
|
|
100
|
+
"instant": 0,
|
|
101
|
+
"fast": 0,
|
|
102
|
+
"smart": 0,
|
|
103
|
+
"council": 0,
|
|
104
|
+
})
|
|
105
|
+
tier_success: Dict[str, int] = field(default_factory=lambda: {
|
|
106
|
+
"instant": 0,
|
|
107
|
+
"fast": 0,
|
|
108
|
+
"smart": 0,
|
|
109
|
+
"council": 0,
|
|
110
|
+
})
|
|
111
|
+
escalations: int = 0 # Times we had to escalate to higher tier
|
|
112
|
+
total_latency_saved_ms: float = 0
|
|
113
|
+
|
|
114
|
+
def record_usage(self, tier: ModelTier, success: bool = True):
|
|
115
|
+
"""Record tier usage"""
|
|
116
|
+
self.tier_usage[tier.value] += 1
|
|
117
|
+
if success:
|
|
118
|
+
self.tier_success[tier.value] += 1
|
|
119
|
+
|
|
120
|
+
def record_escalation(self):
|
|
121
|
+
"""Record when we escalated to higher tier"""
|
|
122
|
+
self.escalations += 1
|
|
123
|
+
|
|
124
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
125
|
+
return {
|
|
126
|
+
"tier_usage": self.tier_usage,
|
|
127
|
+
"tier_success": self.tier_success,
|
|
128
|
+
"escalations": self.escalations,
|
|
129
|
+
"total_latency_saved_ms": round(self.total_latency_saved_ms, 2),
|
|
130
|
+
"success_rates": {
|
|
131
|
+
tier: (self.tier_success[tier] / self.tier_usage[tier] * 100)
|
|
132
|
+
if self.tier_usage[tier] > 0 else 0
|
|
133
|
+
for tier in self.tier_usage
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# Default tier configurations
|
|
139
|
+
# Models are retrieved from the registry when available
|
|
140
|
+
def _build_default_tiers() -> Dict[ModelTier, TierConfig]:
|
|
141
|
+
"""Build default tier configurations using registry when available"""
|
|
142
|
+
return {
|
|
143
|
+
ModelTier.INSTANT: TierConfig(
|
|
144
|
+
model=_get_model_from_registry("instant", "ollama/qwen2.5:3b"),
|
|
145
|
+
max_tokens=500,
|
|
146
|
+
timeout_seconds=10,
|
|
147
|
+
cost_factor=0.1,
|
|
148
|
+
supported_complexity=0.3,
|
|
149
|
+
supported_categories=[
|
|
150
|
+
"quick_answer",
|
|
151
|
+
"explanation", # Simple explanations
|
|
152
|
+
],
|
|
153
|
+
),
|
|
154
|
+
ModelTier.FAST: TierConfig(
|
|
155
|
+
model=_get_model_from_registry("fast", "ollama/qwen2.5-coder:7b"),
|
|
156
|
+
max_tokens=2000,
|
|
157
|
+
timeout_seconds=30,
|
|
158
|
+
cost_factor=0.3,
|
|
159
|
+
supported_complexity=0.5,
|
|
160
|
+
supported_categories=[
|
|
161
|
+
"quick_answer",
|
|
162
|
+
"explanation",
|
|
163
|
+
"code_generation", # Simple code
|
|
164
|
+
"code_modification", # Simple edits
|
|
165
|
+
"documentation",
|
|
166
|
+
"file_operations",
|
|
167
|
+
"git_operations",
|
|
168
|
+
],
|
|
169
|
+
),
|
|
170
|
+
ModelTier.SMART: TierConfig(
|
|
171
|
+
model=_get_model_from_registry("coding", "ollama/qwen2.5-coder:32b"),
|
|
172
|
+
max_tokens=4000,
|
|
173
|
+
timeout_seconds=120,
|
|
174
|
+
cost_factor=1.0,
|
|
175
|
+
supported_complexity=0.8,
|
|
176
|
+
supported_categories=[
|
|
177
|
+
# All categories
|
|
178
|
+
"quick_answer", "explanation", "code_generation",
|
|
179
|
+
"code_modification", "code_review", "refactoring",
|
|
180
|
+
"testing", "documentation", "debugging",
|
|
181
|
+
"devops", "database", "file_operations",
|
|
182
|
+
"git_operations", "project_setup", "command_execution",
|
|
183
|
+
],
|
|
184
|
+
),
|
|
185
|
+
ModelTier.COUNCIL: TierConfig(
|
|
186
|
+
model="council", # Special: triggers multi-agent
|
|
187
|
+
max_tokens=8000,
|
|
188
|
+
timeout_seconds=300,
|
|
189
|
+
cost_factor=3.0,
|
|
190
|
+
supported_complexity=1.0,
|
|
191
|
+
supported_categories=[
|
|
192
|
+
# Complex tasks
|
|
193
|
+
"reasoning", "security", "performance",
|
|
194
|
+
"architecture",
|
|
195
|
+
],
|
|
196
|
+
),
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
# Build tiers at module load time
|
|
201
|
+
DEFAULT_TIERS: Dict[ModelTier, TierConfig] = _build_default_tiers()
|
|
202
|
+
|
|
203
|
+
# Categories that always need higher tiers
|
|
204
|
+
HIGH_COMPLEXITY_CATEGORIES = {
|
|
205
|
+
"reasoning",
|
|
206
|
+
"security",
|
|
207
|
+
"performance",
|
|
208
|
+
"debugging", # Complex debugging
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
# Categories that can use lower tiers
|
|
212
|
+
LOW_COMPLEXITY_CATEGORIES = {
|
|
213
|
+
"quick_answer",
|
|
214
|
+
"explanation",
|
|
215
|
+
"documentation",
|
|
216
|
+
"file_operations",
|
|
217
|
+
"git_operations",
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# Keywords suggesting complexity
|
|
221
|
+
COMPLEXITY_KEYWORDS = {
|
|
222
|
+
"high": [
|
|
223
|
+
"architect", "design", "security", "vulnerability", "optimize",
|
|
224
|
+
"performance", "complex", "refactor entire", "redesign",
|
|
225
|
+
"distributed", "microservice", "concurrency", "race condition",
|
|
226
|
+
"memory leak", "deadlock",
|
|
227
|
+
],
|
|
228
|
+
"medium": [
|
|
229
|
+
"implement", "create", "build", "add feature", "integrate",
|
|
230
|
+
"test", "debug", "fix bug", "refactor",
|
|
231
|
+
],
|
|
232
|
+
"low": [
|
|
233
|
+
"explain", "what is", "how to", "simple", "quick",
|
|
234
|
+
"list", "show", "display", "print", "hello world",
|
|
235
|
+
],
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
class TieredModelOrchestrator:
|
|
240
|
+
"""
|
|
241
|
+
Intelligent model tier selection.
|
|
242
|
+
|
|
243
|
+
Analyzes requests and routes to the optimal model tier based on:
|
|
244
|
+
- Task complexity
|
|
245
|
+
- Task category
|
|
246
|
+
- Prompt characteristics
|
|
247
|
+
- Historical performance
|
|
248
|
+
|
|
249
|
+
Usage:
|
|
250
|
+
orchestrator = TieredModelOrchestrator()
|
|
251
|
+
|
|
252
|
+
decision = orchestrator.select_tier(
|
|
253
|
+
prompt="explain what a decorator is",
|
|
254
|
+
category="explanation",
|
|
255
|
+
complexity=0.3
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
print(f"Using {decision.model} ({decision.tier.value})")
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
def __init__(
|
|
262
|
+
self,
|
|
263
|
+
tiers: Optional[Dict[ModelTier, TierConfig]] = None,
|
|
264
|
+
enable_escalation: bool = True,
|
|
265
|
+
conservative: bool = False # If True, prefer higher tiers
|
|
266
|
+
):
|
|
267
|
+
self.tiers = tiers or DEFAULT_TIERS
|
|
268
|
+
self.enable_escalation = enable_escalation
|
|
269
|
+
self.conservative = conservative
|
|
270
|
+
self.stats = TieringStats()
|
|
271
|
+
|
|
272
|
+
# Model availability cache
|
|
273
|
+
self._available_models: Dict[str, bool] = {}
|
|
274
|
+
|
|
275
|
+
def select_tier(
|
|
276
|
+
self,
|
|
277
|
+
prompt: str,
|
|
278
|
+
category: Optional[str] = None,
|
|
279
|
+
complexity: Optional[float] = None,
|
|
280
|
+
context: Optional[Dict[str, Any]] = None,
|
|
281
|
+
force_tier: Optional[ModelTier] = None
|
|
282
|
+
) -> TieringDecision:
|
|
283
|
+
"""
|
|
284
|
+
Select the optimal model tier for a request.
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
prompt: User's prompt
|
|
288
|
+
category: Task category from intent analysis
|
|
289
|
+
complexity: Complexity score (0.0-1.0) from intent analysis
|
|
290
|
+
context: Additional context
|
|
291
|
+
force_tier: Force a specific tier (for testing)
|
|
292
|
+
|
|
293
|
+
Returns:
|
|
294
|
+
TieringDecision with model and reasoning
|
|
295
|
+
"""
|
|
296
|
+
# Force tier if specified
|
|
297
|
+
if force_tier:
|
|
298
|
+
config = self.tiers[force_tier]
|
|
299
|
+
return TieringDecision(
|
|
300
|
+
tier=force_tier,
|
|
301
|
+
model=config.model,
|
|
302
|
+
reasoning="Forced tier selection",
|
|
303
|
+
confidence=1.0,
|
|
304
|
+
estimated_latency_ms=self._estimate_latency(force_tier),
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Estimate complexity if not provided
|
|
308
|
+
if complexity is None:
|
|
309
|
+
complexity = self._estimate_complexity(prompt, category)
|
|
310
|
+
|
|
311
|
+
# Determine minimum tier needed
|
|
312
|
+
selected_tier = self._determine_tier(prompt, category, complexity)
|
|
313
|
+
|
|
314
|
+
# Apply conservative mode
|
|
315
|
+
if self.conservative and selected_tier.level < ModelTier.SMART.level:
|
|
316
|
+
selected_tier = ModelTier(min(selected_tier.level + 1, 3))
|
|
317
|
+
|
|
318
|
+
config = self.tiers[selected_tier]
|
|
319
|
+
|
|
320
|
+
# Determine fallback
|
|
321
|
+
fallback_tier = None
|
|
322
|
+
fallback_model = None
|
|
323
|
+
if selected_tier.level < ModelTier.COUNCIL.level:
|
|
324
|
+
fallback_tier = ModelTier.SMART
|
|
325
|
+
fallback_model = self.tiers[ModelTier.SMART].model
|
|
326
|
+
|
|
327
|
+
decision = TieringDecision(
|
|
328
|
+
tier=selected_tier,
|
|
329
|
+
model=config.model,
|
|
330
|
+
reasoning=self._generate_reasoning(selected_tier, category, complexity),
|
|
331
|
+
confidence=self._calculate_confidence(selected_tier, complexity),
|
|
332
|
+
fallback_tier=fallback_tier,
|
|
333
|
+
fallback_model=fallback_model,
|
|
334
|
+
estimated_latency_ms=self._estimate_latency(selected_tier),
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
logger.debug(f"Tier decision: {decision.tier.value} ({decision.reasoning})")
|
|
338
|
+
return decision
|
|
339
|
+
|
|
340
|
+
def _determine_tier(
|
|
341
|
+
self,
|
|
342
|
+
prompt: str,
|
|
343
|
+
category: Optional[str],
|
|
344
|
+
complexity: float
|
|
345
|
+
) -> ModelTier:
|
|
346
|
+
"""Determine the appropriate tier"""
|
|
347
|
+
prompt_lower = prompt.lower()
|
|
348
|
+
|
|
349
|
+
# Check for high complexity keywords
|
|
350
|
+
for keyword in COMPLEXITY_KEYWORDS["high"]:
|
|
351
|
+
if keyword in prompt_lower:
|
|
352
|
+
return ModelTier.COUNCIL if complexity > 0.8 else ModelTier.SMART
|
|
353
|
+
|
|
354
|
+
# Check category requirements
|
|
355
|
+
if category in HIGH_COMPLEXITY_CATEGORIES:
|
|
356
|
+
return ModelTier.SMART if complexity < 0.8 else ModelTier.COUNCIL
|
|
357
|
+
|
|
358
|
+
# Check for low complexity scenarios
|
|
359
|
+
if category in LOW_COMPLEXITY_CATEGORIES and complexity < 0.4:
|
|
360
|
+
# Check prompt length - very short prompts can use instant
|
|
361
|
+
if len(prompt.split()) < 20:
|
|
362
|
+
return ModelTier.INSTANT
|
|
363
|
+
return ModelTier.FAST
|
|
364
|
+
|
|
365
|
+
# Medium complexity keywords
|
|
366
|
+
for keyword in COMPLEXITY_KEYWORDS["medium"]:
|
|
367
|
+
if keyword in prompt_lower:
|
|
368
|
+
return ModelTier.FAST if complexity < 0.6 else ModelTier.SMART
|
|
369
|
+
|
|
370
|
+
# Low complexity keywords
|
|
371
|
+
for keyword in COMPLEXITY_KEYWORDS["low"]:
|
|
372
|
+
if keyword in prompt_lower:
|
|
373
|
+
return ModelTier.INSTANT if complexity < 0.3 else ModelTier.FAST
|
|
374
|
+
|
|
375
|
+
# Default based on complexity score
|
|
376
|
+
if complexity < 0.3:
|
|
377
|
+
return ModelTier.INSTANT
|
|
378
|
+
elif complexity < 0.5:
|
|
379
|
+
return ModelTier.FAST
|
|
380
|
+
elif complexity < 0.8:
|
|
381
|
+
return ModelTier.SMART
|
|
382
|
+
else:
|
|
383
|
+
return ModelTier.COUNCIL
|
|
384
|
+
|
|
385
|
+
def _estimate_complexity(
|
|
386
|
+
self,
|
|
387
|
+
prompt: str,
|
|
388
|
+
category: Optional[str]
|
|
389
|
+
) -> float:
|
|
390
|
+
"""Estimate complexity when not provided by intent analyzer"""
|
|
391
|
+
score = 0.5 # Default medium
|
|
392
|
+
|
|
393
|
+
prompt_lower = prompt.lower()
|
|
394
|
+
|
|
395
|
+
# Adjust by keywords
|
|
396
|
+
for keyword in COMPLEXITY_KEYWORDS["high"]:
|
|
397
|
+
if keyword in prompt_lower:
|
|
398
|
+
score += 0.15
|
|
399
|
+
|
|
400
|
+
for keyword in COMPLEXITY_KEYWORDS["low"]:
|
|
401
|
+
if keyword in prompt_lower:
|
|
402
|
+
score -= 0.15
|
|
403
|
+
|
|
404
|
+
# Adjust by prompt length
|
|
405
|
+
word_count = len(prompt.split())
|
|
406
|
+
if word_count > 100:
|
|
407
|
+
score += 0.2
|
|
408
|
+
elif word_count < 10:
|
|
409
|
+
score -= 0.2
|
|
410
|
+
|
|
411
|
+
# Adjust by category
|
|
412
|
+
if category in HIGH_COMPLEXITY_CATEGORIES:
|
|
413
|
+
score += 0.2
|
|
414
|
+
elif category in LOW_COMPLEXITY_CATEGORIES:
|
|
415
|
+
score -= 0.2
|
|
416
|
+
|
|
417
|
+
return max(0.0, min(1.0, score))
|
|
418
|
+
|
|
419
|
+
def _generate_reasoning(
|
|
420
|
+
self,
|
|
421
|
+
tier: ModelTier,
|
|
422
|
+
category: Optional[str],
|
|
423
|
+
complexity: float
|
|
424
|
+
) -> str:
|
|
425
|
+
"""Generate human-readable reasoning"""
|
|
426
|
+
reasons = []
|
|
427
|
+
|
|
428
|
+
if tier == ModelTier.INSTANT:
|
|
429
|
+
reasons.append("Simple query")
|
|
430
|
+
if complexity < 0.3:
|
|
431
|
+
reasons.append(f"low complexity ({complexity:.2f})")
|
|
432
|
+
elif tier == ModelTier.FAST:
|
|
433
|
+
reasons.append("Standard task")
|
|
434
|
+
if category:
|
|
435
|
+
reasons.append(f"category: {category}")
|
|
436
|
+
elif tier == ModelTier.SMART:
|
|
437
|
+
reasons.append("Complex task")
|
|
438
|
+
if complexity > 0.6:
|
|
439
|
+
reasons.append(f"high complexity ({complexity:.2f})")
|
|
440
|
+
else: # COUNCIL
|
|
441
|
+
reasons.append("Very complex task requiring multi-agent")
|
|
442
|
+
if complexity > 0.8:
|
|
443
|
+
reasons.append(f"very high complexity ({complexity:.2f})")
|
|
444
|
+
|
|
445
|
+
return "; ".join(reasons)
|
|
446
|
+
|
|
447
|
+
def _calculate_confidence(self, tier: ModelTier, complexity: float) -> float:
|
|
448
|
+
"""Calculate confidence in tier selection"""
|
|
449
|
+
# Higher confidence when complexity clearly matches tier
|
|
450
|
+
tier_ranges = {
|
|
451
|
+
ModelTier.INSTANT: (0.0, 0.3),
|
|
452
|
+
ModelTier.FAST: (0.3, 0.5),
|
|
453
|
+
ModelTier.SMART: (0.5, 0.8),
|
|
454
|
+
ModelTier.COUNCIL: (0.8, 1.0),
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
low, high = tier_ranges[tier]
|
|
458
|
+
if low <= complexity <= high:
|
|
459
|
+
return 0.9 # High confidence
|
|
460
|
+
elif abs(complexity - (low + high) / 2) < 0.2:
|
|
461
|
+
return 0.75 # Medium confidence
|
|
462
|
+
else:
|
|
463
|
+
return 0.6 # Lower confidence
|
|
464
|
+
|
|
465
|
+
def _estimate_latency(self, tier: ModelTier) -> int:
|
|
466
|
+
"""Estimate latency in milliseconds"""
|
|
467
|
+
latencies = {
|
|
468
|
+
ModelTier.INSTANT: 300,
|
|
469
|
+
ModelTier.FAST: 1500,
|
|
470
|
+
ModelTier.SMART: 4000,
|
|
471
|
+
ModelTier.COUNCIL: 15000,
|
|
472
|
+
}
|
|
473
|
+
return latencies.get(tier, 5000)
|
|
474
|
+
|
|
475
|
+
def record_result(
|
|
476
|
+
self,
|
|
477
|
+
tier: ModelTier,
|
|
478
|
+
success: bool,
|
|
479
|
+
actual_latency_ms: Optional[int] = None
|
|
480
|
+
) -> None:
|
|
481
|
+
"""Record the result of using a tier"""
|
|
482
|
+
self.stats.record_usage(tier, success)
|
|
483
|
+
|
|
484
|
+
if not success and self.enable_escalation:
|
|
485
|
+
self.stats.record_escalation()
|
|
486
|
+
|
|
487
|
+
# Track latency savings
|
|
488
|
+
if actual_latency_ms and tier != ModelTier.SMART:
|
|
489
|
+
# Compare to what SMART tier would have taken
|
|
490
|
+
expected_smart = self._estimate_latency(ModelTier.SMART)
|
|
491
|
+
if actual_latency_ms < expected_smart:
|
|
492
|
+
self.stats.total_latency_saved_ms += expected_smart - actual_latency_ms
|
|
493
|
+
|
|
494
|
+
def escalate(self, current_tier: ModelTier) -> Optional[TieringDecision]:
|
|
495
|
+
"""Escalate to next higher tier"""
|
|
496
|
+
if current_tier.level >= ModelTier.COUNCIL.level:
|
|
497
|
+
return None # Can't escalate further
|
|
498
|
+
|
|
499
|
+
next_tier = {
|
|
500
|
+
ModelTier.INSTANT: ModelTier.FAST,
|
|
501
|
+
ModelTier.FAST: ModelTier.SMART,
|
|
502
|
+
ModelTier.SMART: ModelTier.COUNCIL,
|
|
503
|
+
}[current_tier]
|
|
504
|
+
|
|
505
|
+
self.stats.record_escalation()
|
|
506
|
+
config = self.tiers[next_tier]
|
|
507
|
+
|
|
508
|
+
return TieringDecision(
|
|
509
|
+
tier=next_tier,
|
|
510
|
+
model=config.model,
|
|
511
|
+
reasoning=f"Escalated from {current_tier.value}",
|
|
512
|
+
confidence=0.8,
|
|
513
|
+
estimated_latency_ms=self._estimate_latency(next_tier),
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
517
|
+
"""Get tiering statistics"""
|
|
518
|
+
return self.stats.to_dict()
|
|
519
|
+
|
|
520
|
+
def get_tier_config(self, tier: ModelTier) -> TierConfig:
|
|
521
|
+
"""Get configuration for a tier"""
|
|
522
|
+
return self.tiers[tier]
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
# Singleton instance
|
|
526
|
+
_orchestrator: Optional[TieredModelOrchestrator] = None
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def get_orchestrator(**kwargs) -> TieredModelOrchestrator:
|
|
530
|
+
"""Get or create the global orchestrator"""
|
|
531
|
+
global _orchestrator
|
|
532
|
+
if _orchestrator is None:
|
|
533
|
+
_orchestrator = TieredModelOrchestrator(**kwargs)
|
|
534
|
+
return _orchestrator
|
|
535
|
+
|
|
536
|
+
|
|
537
|
+
def quick_tier(
|
|
538
|
+
prompt: str,
|
|
539
|
+
category: Optional[str] = None,
|
|
540
|
+
complexity: Optional[float] = None
|
|
541
|
+
) -> TieringDecision:
|
|
542
|
+
"""Quick helper for tier selection"""
|
|
543
|
+
return get_orchestrator().select_tier(prompt, category, complexity)
|