code-context-control 2.28.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cli/__init__.py +1 -0
- cli/_hook_utils.py +99 -0
- cli/c3.py +6152 -0
- cli/commands/__init__.py +1 -0
- cli/commands/common.py +312 -0
- cli/commands/parser.py +286 -0
- cli/docs.html +3178 -0
- cli/edits.html +878 -0
- cli/hook_auto_snapshot.py +142 -0
- cli/hook_c3_signal.py +61 -0
- cli/hook_c3read.py +116 -0
- cli/hook_edit_ledger.py +213 -0
- cli/hook_edit_unlock.py +170 -0
- cli/hook_filter.py +130 -0
- cli/hook_ghost_files.py +238 -0
- cli/hook_pretool_enforce.py +334 -0
- cli/hook_read.py +200 -0
- cli/hook_session_stats.py +62 -0
- cli/hook_terse_advisor.py +190 -0
- cli/hub.html +3764 -0
- cli/hub_server.py +1619 -0
- cli/mcp_proxy.py +428 -0
- cli/mcp_server.py +660 -0
- cli/server.py +2985 -0
- cli/tools/__init__.py +4 -0
- cli/tools/_helpers.py +65 -0
- cli/tools/agent.py +1165 -0
- cli/tools/compress.py +215 -0
- cli/tools/delegate.py +1184 -0
- cli/tools/edit.py +313 -0
- cli/tools/edits.py +118 -0
- cli/tools/filter.py +285 -0
- cli/tools/impact.py +163 -0
- cli/tools/memory.py +469 -0
- cli/tools/read.py +224 -0
- cli/tools/search.py +337 -0
- cli/tools/session.py +95 -0
- cli/tools/shell.py +193 -0
- cli/tools/status.py +306 -0
- cli/tools/validate.py +310 -0
- cli/ui/api.js +36 -0
- cli/ui/app.js +207 -0
- cli/ui/components/chat.js +758 -0
- cli/ui/components/dashboard.js +689 -0
- cli/ui/components/edits.js +220 -0
- cli/ui/components/instructions.js +481 -0
- cli/ui/components/memory.js +626 -0
- cli/ui/components/sessions.js +606 -0
- cli/ui/components/settings.js +1404 -0
- cli/ui/components/sidebar.js +156 -0
- cli/ui/icons.js +51 -0
- cli/ui/shared.js +119 -0
- cli/ui/theme.js +22 -0
- cli/ui.html +168 -0
- cli/ui_legacy.html +6797 -0
- cli/ui_nano.html +503 -0
- code_context_control-2.28.0.dist-info/METADATA +248 -0
- code_context_control-2.28.0.dist-info/RECORD +150 -0
- code_context_control-2.28.0.dist-info/WHEEL +5 -0
- code_context_control-2.28.0.dist-info/entry_points.txt +4 -0
- code_context_control-2.28.0.dist-info/licenses/LICENSE +201 -0
- code_context_control-2.28.0.dist-info/top_level.txt +5 -0
- core/__init__.py +75 -0
- core/config.py +269 -0
- core/ide.py +188 -0
- oracle/__init__.py +1 -0
- oracle/config.py +75 -0
- oracle/oracle.html +3900 -0
- oracle/oracle_server.py +663 -0
- oracle/services/__init__.py +1 -0
- oracle/services/c3_bridge.py +210 -0
- oracle/services/chat_engine.py +1103 -0
- oracle/services/chat_store.py +155 -0
- oracle/services/cross_memory.py +154 -0
- oracle/services/federated_graph.py +463 -0
- oracle/services/health_checker.py +117 -0
- oracle/services/insight_engine.py +307 -0
- oracle/services/memory_reader.py +106 -0
- oracle/services/memory_writer.py +182 -0
- oracle/services/ollama_bridge.py +332 -0
- oracle/services/project_scanner.py +87 -0
- oracle/services/review_agent.py +206 -0
- services/__init__.py +1 -0
- services/activity_log.py +93 -0
- services/agent_base.py +124 -0
- services/agents.py +1529 -0
- services/auto_memory.py +407 -0
- services/bench/__init__.py +6 -0
- services/bench/external/__init__.py +29 -0
- services/bench/external/aider_polyglot.py +405 -0
- services/bench/external/swe_bench.py +485 -0
- services/benchmark_dashboard.py +596 -0
- services/claude_md.py +785 -0
- services/compressor.py +592 -0
- services/context_snapshot.py +356 -0
- services/conversation_store.py +870 -0
- services/doc_index.py +537 -0
- services/e2e_benchmark.py +2884 -0
- services/e2e_evaluator.py +396 -0
- services/e2e_tasks.py +743 -0
- services/edit_ledger.py +459 -0
- services/embedding_index.py +341 -0
- services/error_reporting.py +123 -0
- services/file_memory.py +734 -0
- services/hub_service.py +585 -0
- services/indexer.py +712 -0
- services/memory.py +318 -0
- services/memory_consolidator.py +538 -0
- services/memory_graph.py +382 -0
- services/memory_grounder.py +304 -0
- services/memory_scorer.py +246 -0
- services/metrics.py +86 -0
- services/notifications.py +209 -0
- services/ollama_client.py +201 -0
- services/output_filter.py +488 -0
- services/parser.py +1238 -0
- services/project_manager.py +579 -0
- services/protocol.py +306 -0
- services/proxy_state.py +152 -0
- services/retrieval_broker.py +129 -0
- services/router.py +414 -0
- services/runtime.py +326 -0
- services/session_benchmark.py +1945 -0
- services/session_manager.py +1026 -0
- services/session_preloader.py +251 -0
- services/text_index.py +90 -0
- services/tool_classifier.py +176 -0
- services/transcript_index.py +340 -0
- services/validation_cache.py +155 -0
- services/vector_store.py +299 -0
- services/version_tracker.py +271 -0
- services/watcher.py +192 -0
- tui/__init__.py +0 -0
- tui/backend.py +59 -0
- tui/main.py +145 -0
- tui/screens/__init__.py +1 -0
- tui/screens/benchmark_view.py +109 -0
- tui/screens/claudemd_view.py +46 -0
- tui/screens/compress_view.py +52 -0
- tui/screens/index_view.py +74 -0
- tui/screens/init_view.py +82 -0
- tui/screens/mcp_view.py +73 -0
- tui/screens/optimize_view.py +41 -0
- tui/screens/pipe_view.py +46 -0
- tui/screens/projects_view.py +355 -0
- tui/screens/search_view.py +55 -0
- tui/screens/session_view.py +143 -0
- tui/screens/stats.py +158 -0
- tui/screens/ui_view.py +54 -0
- tui/theme.tcss +335 -0
services/router.py
ADDED
|
@@ -0,0 +1,414 @@
|
|
|
1
|
+
"""Adaptive Multi-Model Router — Classifies queries and routes to appropriate local LLM.
|
|
2
|
+
|
|
3
|
+
Feature extraction + classification:
|
|
4
|
+
- log_summary → gemma3n:latest (temp 0.1) — large log/output summarization
|
|
5
|
+
- simple_qa → deepseek-r1:1.5b (temp 0.4) — short factual questions
|
|
6
|
+
- complex → llama3.2:3b (temp 0.5) — multi-step reasoning
|
|
7
|
+
- passthrough → Claude (no local routing)
|
|
8
|
+
"""
|
|
9
|
+
import re
|
|
10
|
+
import threading
|
|
11
|
+
import time
|
|
12
|
+
|
|
13
|
+
from core import count_tokens
|
|
14
|
+
from services.ollama_client import OllamaClient
|
|
15
|
+
|
|
16
|
+
# Route class definitions with model assignments
|
|
17
|
+
ROUTE_CLASSES = {
|
|
18
|
+
"classification": {
|
|
19
|
+
"default_model": "qwen2:0.5b",
|
|
20
|
+
"temperature": 0.0,
|
|
21
|
+
"max_tokens": 128,
|
|
22
|
+
"num_ctx": 2048,
|
|
23
|
+
"description": "Ultra-fast classification and feature extraction (Nano Tier)",
|
|
24
|
+
},
|
|
25
|
+
"log_summary": {
|
|
26
|
+
"default_model": "gemma3n:latest",
|
|
27
|
+
"temperature": 0.1,
|
|
28
|
+
"max_tokens": 300,
|
|
29
|
+
"num_ctx": 8192,
|
|
30
|
+
"description": "Large log/output summarization",
|
|
31
|
+
},
|
|
32
|
+
"simple_qa": {
|
|
33
|
+
"default_model": "deepseek-r1:1.5b",
|
|
34
|
+
"temperature": 0.4,
|
|
35
|
+
"max_tokens": 256,
|
|
36
|
+
"num_ctx": 4096,
|
|
37
|
+
"description": "Short factual questions",
|
|
38
|
+
},
|
|
39
|
+
"complex": {
|
|
40
|
+
"default_model": "llama3.2:3b",
|
|
41
|
+
"temperature": 0.5,
|
|
42
|
+
"max_tokens": 512,
|
|
43
|
+
"num_ctx": 8192,
|
|
44
|
+
"description": "Multi-step reasoning",
|
|
45
|
+
},
|
|
46
|
+
"passthrough": {
|
|
47
|
+
"default_model": None,
|
|
48
|
+
"temperature": None,
|
|
49
|
+
"max_tokens": None,
|
|
50
|
+
"description": "Route to Claude (no local model)",
|
|
51
|
+
},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# ── Feature extraction patterns ──────────────────────────
|
|
55
|
+
_STACKTRACE_RE = re.compile(
|
|
56
|
+
r'Traceback|at\s+\w+\.\w+\(|File\s+".*",\s+line\s+\d+'
|
|
57
|
+
r'|Exception|Error:|panic:|FAIL',
|
|
58
|
+
re.IGNORECASE,
|
|
59
|
+
)
|
|
60
|
+
_CODE_RE = re.compile(r'[{}\[\]();=<>]|def\s|class\s|function\s|import\s|const\s|let\s|var\s')
|
|
61
|
+
_FILE_REF_RE = re.compile(r'[\w/\\]+\.\w{1,5}(?::\d+)?')
|
|
62
|
+
_QUESTION_RE = re.compile(r'\?\s*$|^(what|how|why|when|where|which|can|does|is|are)\s', re.IGNORECASE)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _resolve_model_name(candidate: str, available: list[str]) -> str:
|
|
66
|
+
"""Resolve configured model alias to an installed Ollama model name."""
|
|
67
|
+
if not candidate:
|
|
68
|
+
return ""
|
|
69
|
+
normalized = candidate.strip().lower()
|
|
70
|
+
if not normalized:
|
|
71
|
+
return ""
|
|
72
|
+
|
|
73
|
+
for model in available:
|
|
74
|
+
if model.lower() == normalized:
|
|
75
|
+
return model
|
|
76
|
+
|
|
77
|
+
base = normalized.split(":", 1)[0]
|
|
78
|
+
for model in available:
|
|
79
|
+
lower = model.lower()
|
|
80
|
+
if lower == base or lower.startswith(base + ":"):
|
|
81
|
+
return model
|
|
82
|
+
|
|
83
|
+
for model in available:
|
|
84
|
+
if base in model.lower():
|
|
85
|
+
return model
|
|
86
|
+
|
|
87
|
+
return ""
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _route_fallback_order(route_class: str) -> list[str]:
|
|
91
|
+
"""Conservative fallback model order per route class."""
|
|
92
|
+
if route_class == "simple_qa":
|
|
93
|
+
return ["llama3.2:latest", "llama3.2:3b", "qwen3-coder-next:latest", "gemma3n:latest"]
|
|
94
|
+
if route_class == "complex":
|
|
95
|
+
return ["llama3.2:latest", "qwen3-coder-next:latest", "gemma3n:latest"]
|
|
96
|
+
if route_class == "log_summary":
|
|
97
|
+
return ["gemma3n:latest", "llama3.2:latest", "llama3.2:3b"]
|
|
98
|
+
return ["llama3.2:latest", "gemma3n:latest"]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class ModelRouter:
|
|
102
|
+
"""Classifies input and routes to appropriate Ollama model."""
|
|
103
|
+
|
|
104
|
+
def __init__(self, config: dict | None = None):
|
|
105
|
+
self.config = config or {}
|
|
106
|
+
base_url = self.config.get("ollama_base_url", "http://localhost:11434")
|
|
107
|
+
self.ollama = OllamaClient(base_url)
|
|
108
|
+
self.log_threshold = self.config.get("router_log_threshold", 500)
|
|
109
|
+
self.simple_threshold = self.config.get("router_simple_threshold", 100)
|
|
110
|
+
self.allow_model_fallback = self.config.get("router_allow_model_fallback", True)
|
|
111
|
+
fb = self.config.get("router_fallback_models", [])
|
|
112
|
+
self.router_fallback_models = fb if isinstance(fb, list) else ([fb] if fb else [])
|
|
113
|
+
self.retry_on_empty = self.config.get("router_retry_on_empty", True)
|
|
114
|
+
self._lock = threading.Lock()
|
|
115
|
+
|
|
116
|
+
# Model overrides from config
|
|
117
|
+
self._model_overrides = {}
|
|
118
|
+
for cls_name in ROUTE_CLASSES:
|
|
119
|
+
config_key = f"{cls_name}_model"
|
|
120
|
+
if config_key in self.config:
|
|
121
|
+
self._model_overrides[cls_name] = self.config[config_key]
|
|
122
|
+
|
|
123
|
+
# Metrics
|
|
124
|
+
self.metrics = {
|
|
125
|
+
"total_routes": 0,
|
|
126
|
+
"by_class": {cls: 0 for cls in ROUTE_CLASSES},
|
|
127
|
+
"failures": 0,
|
|
128
|
+
"total_latency_ms": 0,
|
|
129
|
+
"avg_latency_ms": 0,
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
def classify(self, query: str, context: str = "") -> dict:
|
|
133
|
+
"""Classify the query into a route class using local AI (Nano Tier) or heuristics."""
|
|
134
|
+
full_text = query + "\n" + context if context else query
|
|
135
|
+
features = self._extract_features(full_text)
|
|
136
|
+
|
|
137
|
+
# 1. Try AI-powered classification (Nano Tier)
|
|
138
|
+
ai_class = None
|
|
139
|
+
nano_config = ROUTE_CLASSES["classification"]
|
|
140
|
+
if self.ollama.is_available() and self.ollama.has_model(nano_config["default_model"]):
|
|
141
|
+
ai_class = self._ai_classify(query, context)
|
|
142
|
+
|
|
143
|
+
# 2. Fallback to heuristic classification
|
|
144
|
+
route_class = ai_class or self._classify_features(features)
|
|
145
|
+
|
|
146
|
+
route_info = ROUTE_CLASSES[route_class]
|
|
147
|
+
model = self._model_overrides.get(route_class, route_info["default_model"])
|
|
148
|
+
|
|
149
|
+
return {
|
|
150
|
+
"route_class": route_class,
|
|
151
|
+
"features": features,
|
|
152
|
+
"model": model,
|
|
153
|
+
"temperature": route_info["temperature"],
|
|
154
|
+
"max_tokens": route_info["max_tokens"],
|
|
155
|
+
"num_ctx": route_info.get("num_ctx", 4096),
|
|
156
|
+
"description": route_info["description"],
|
|
157
|
+
"classification_source": "ai" if ai_class else "heuristic"
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
def _ai_classify(self, query: str, context: str = "") -> str | None:
|
|
161
|
+
"""Use Nano model to classify the query."""
|
|
162
|
+
nano_model = ROUTE_CLASSES["classification"]["default_model"]
|
|
163
|
+
|
|
164
|
+
# Build classification prompt
|
|
165
|
+
class_desc = "\n".join([f"- {k}: {v['description']}" for k, v in ROUTE_CLASSES.items() if k != "classification"])
|
|
166
|
+
system = (
|
|
167
|
+
"You are a routing classifier for a coding assistant. "
|
|
168
|
+
"Output ONLY the category name from this list:\n"
|
|
169
|
+
f"{class_desc}\n\n"
|
|
170
|
+
"Rules:\n"
|
|
171
|
+
"1. If it's a short question, use 'simple_qa'.\n"
|
|
172
|
+
"2. If it involves complex reasoning or bug analysis, use 'complex'.\n"
|
|
173
|
+
"3. If it's a large log or terminal output to summarize, use 'log_summary'.\n"
|
|
174
|
+
"4. If it's a direct code instruction better handled by the primary model, use 'passthrough'.\n"
|
|
175
|
+
"Output EXACTLY one word."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
try:
|
|
179
|
+
# Use ultra-low max_tokens and num_ctx for speed
|
|
180
|
+
response = self.ollama.generate(
|
|
181
|
+
prompt=f"Input: {query[:500]}",
|
|
182
|
+
model=nano_model,
|
|
183
|
+
system=system,
|
|
184
|
+
temperature=0.0,
|
|
185
|
+
max_tokens=10,
|
|
186
|
+
num_ctx=1024
|
|
187
|
+
)
|
|
188
|
+
if not response:
|
|
189
|
+
return None
|
|
190
|
+
|
|
191
|
+
# Sanitize response
|
|
192
|
+
found = response.strip().lower()
|
|
193
|
+
for cls_name in ROUTE_CLASSES:
|
|
194
|
+
if cls_name in found:
|
|
195
|
+
return cls_name
|
|
196
|
+
return None
|
|
197
|
+
except Exception:
|
|
198
|
+
return None
|
|
199
|
+
|
|
200
|
+
def route(self, query: str, context: str = "",
|
|
201
|
+
force_class: str = "", stream: bool = False) -> dict:
|
|
202
|
+
"""Classify and execute routing to the appropriate model.
|
|
203
|
+
|
|
204
|
+
If force_class is set, skip classification and use that class.
|
|
205
|
+
Returns dict with: route_class, model, response, latency_ms, features
|
|
206
|
+
"""
|
|
207
|
+
if self.config.get("HYBRID_DISABLE_TIER2"):
|
|
208
|
+
return {
|
|
209
|
+
"route_class": "passthrough",
|
|
210
|
+
"model": None,
|
|
211
|
+
"response": None,
|
|
212
|
+
"latency_ms": 0,
|
|
213
|
+
"reason": "Tier 2 disabled",
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
# Classify
|
|
217
|
+
if force_class and force_class in ROUTE_CLASSES:
|
|
218
|
+
classification = {
|
|
219
|
+
"route_class": force_class,
|
|
220
|
+
"features": self._extract_features(query),
|
|
221
|
+
**ROUTE_CLASSES[force_class],
|
|
222
|
+
}
|
|
223
|
+
model = self._model_overrides.get(force_class, ROUTE_CLASSES[force_class]["default_model"])
|
|
224
|
+
classification["model"] = model
|
|
225
|
+
else:
|
|
226
|
+
classification = self.classify(query, context)
|
|
227
|
+
|
|
228
|
+
route_class = classification["route_class"]
|
|
229
|
+
model = classification.get("model")
|
|
230
|
+
|
|
231
|
+
# Passthrough — don't call any local model
|
|
232
|
+
if route_class == "passthrough" or model is None:
|
|
233
|
+
with self._lock:
|
|
234
|
+
self.metrics["total_routes"] += 1
|
|
235
|
+
self.metrics["by_class"]["passthrough"] += 1
|
|
236
|
+
return {
|
|
237
|
+
"route_class": "passthrough",
|
|
238
|
+
"model": None,
|
|
239
|
+
"response": None,
|
|
240
|
+
"latency_ms": 0,
|
|
241
|
+
"features": classification.get("features", {}),
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
available = self.ollama.list_models() or []
|
|
245
|
+
candidates = []
|
|
246
|
+
|
|
247
|
+
resolved_primary = _resolve_model_name(model, available)
|
|
248
|
+
if resolved_primary:
|
|
249
|
+
candidates.append(resolved_primary)
|
|
250
|
+
elif model and not available:
|
|
251
|
+
# If inventory lookup fails, still attempt requested model.
|
|
252
|
+
candidates.append(model)
|
|
253
|
+
|
|
254
|
+
if self.allow_model_fallback and available:
|
|
255
|
+
for cand in _route_fallback_order(route_class) + self.router_fallback_models + available:
|
|
256
|
+
resolved = _resolve_model_name(cand, available)
|
|
257
|
+
if resolved and resolved not in candidates:
|
|
258
|
+
candidates.append(resolved)
|
|
259
|
+
|
|
260
|
+
if model and model not in candidates:
|
|
261
|
+
candidates.append(model)
|
|
262
|
+
|
|
263
|
+
# Route to local model (with fallback attempts)
|
|
264
|
+
start = time.monotonic()
|
|
265
|
+
system = self._get_system_prompt(route_class)
|
|
266
|
+
response = None
|
|
267
|
+
used_model = model
|
|
268
|
+
for candidate in candidates:
|
|
269
|
+
used_model = candidate
|
|
270
|
+
response = self.ollama.generate(
|
|
271
|
+
prompt=query if not context else f"{query}\n\nContext:\n{context}",
|
|
272
|
+
model=candidate,
|
|
273
|
+
system=system,
|
|
274
|
+
temperature=classification.get("temperature", 0.3),
|
|
275
|
+
max_tokens=classification.get("max_tokens", 512),
|
|
276
|
+
num_ctx=classification.get("num_ctx", 4096),
|
|
277
|
+
stream=stream,
|
|
278
|
+
)
|
|
279
|
+
if response is not None:
|
|
280
|
+
break
|
|
281
|
+
if not self.retry_on_empty:
|
|
282
|
+
break
|
|
283
|
+
|
|
284
|
+
latency_ms = int((time.monotonic() - start) * 1000)
|
|
285
|
+
|
|
286
|
+
with self._lock:
|
|
287
|
+
self.metrics["total_routes"] += 1
|
|
288
|
+
self.metrics["by_class"][route_class] += 1
|
|
289
|
+
if response is None:
|
|
290
|
+
self.metrics["failures"] += 1
|
|
291
|
+
self.metrics["total_latency_ms"] += latency_ms
|
|
292
|
+
total = self.metrics["total_routes"]
|
|
293
|
+
self.metrics["avg_latency_ms"] = self.metrics["total_latency_ms"] // max(total, 1)
|
|
294
|
+
|
|
295
|
+
return {
|
|
296
|
+
"route_class": route_class,
|
|
297
|
+
"model": used_model,
|
|
298
|
+
"response": response,
|
|
299
|
+
"latency_ms": latency_ms,
|
|
300
|
+
"features": classification.get("features", {}),
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
def summarize(self, text: str, style: str = "concise", stream: bool = False) -> dict:
|
|
304
|
+
"""Summarize text using the appropriate model based on length.
|
|
305
|
+
|
|
306
|
+
style: 'concise' (1-3 lines), 'detailed' (5-10 lines), 'bullet' (bullet points)
|
|
307
|
+
"""
|
|
308
|
+
tokens = count_tokens(text)
|
|
309
|
+
|
|
310
|
+
# Pick model based on text size
|
|
311
|
+
if tokens > self.log_threshold:
|
|
312
|
+
model = self._model_overrides.get("log_summary", "gemma3n:latest")
|
|
313
|
+
temp = 0.1
|
|
314
|
+
else:
|
|
315
|
+
model = self._model_overrides.get("simple_qa", "deepseek-r1:1.5b")
|
|
316
|
+
temp = 0.3
|
|
317
|
+
|
|
318
|
+
style_prompts = {
|
|
319
|
+
"concise": "Summarize in 1-3 lines. Be extremely terse.",
|
|
320
|
+
"detailed": "Summarize in 5-10 lines. Cover key points.",
|
|
321
|
+
"bullet": "Summarize as 3-7 bullet points.",
|
|
322
|
+
}
|
|
323
|
+
system = f"You are a summarizer. {style_prompts.get(style, style_prompts['concise'])}"
|
|
324
|
+
|
|
325
|
+
start = time.monotonic()
|
|
326
|
+
response = self.ollama.generate(
|
|
327
|
+
prompt=f"Summarize:\n\n{text[:4000]}",
|
|
328
|
+
model=model,
|
|
329
|
+
system=system,
|
|
330
|
+
temperature=temp,
|
|
331
|
+
max_tokens=300,
|
|
332
|
+
stream=stream,
|
|
333
|
+
)
|
|
334
|
+
latency_ms = int((time.monotonic() - start) * 1000)
|
|
335
|
+
|
|
336
|
+
return {
|
|
337
|
+
"summary": response,
|
|
338
|
+
"model": model,
|
|
339
|
+
"style": style,
|
|
340
|
+
"input_tokens": tokens,
|
|
341
|
+
"latency_ms": latency_ms,
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
def get_metrics(self) -> dict:
|
|
345
|
+
"""Return routing metrics."""
|
|
346
|
+
with self._lock:
|
|
347
|
+
return dict(self.metrics)
|
|
348
|
+
|
|
349
|
+
# ── Feature extraction ───────────────────────────────
|
|
350
|
+
|
|
351
|
+
def _extract_features(self, text: str) -> dict:
|
|
352
|
+
"""Extract classification features from input text."""
|
|
353
|
+
tokens = count_tokens(text)
|
|
354
|
+
lines = text.splitlines()
|
|
355
|
+
code_lines = sum(1 for line in lines if _CODE_RE.search(line))
|
|
356
|
+
total_lines = max(len(lines), 1)
|
|
357
|
+
|
|
358
|
+
return {
|
|
359
|
+
"input_tokens": tokens,
|
|
360
|
+
"code_ratio": round(code_lines / total_lines, 2),
|
|
361
|
+
"has_stacktrace": bool(_STACKTRACE_RE.search(text)),
|
|
362
|
+
"file_count": len(set(_FILE_REF_RE.findall(text))),
|
|
363
|
+
"is_question": bool(_QUESTION_RE.search(text.strip()[:200])),
|
|
364
|
+
"line_count": total_lines,
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
def _classify_features(self, features: dict) -> str:
|
|
368
|
+
"""Classify based on extracted features."""
|
|
369
|
+
tokens = features["input_tokens"]
|
|
370
|
+
code_ratio = features["code_ratio"]
|
|
371
|
+
has_stacktrace = features["has_stacktrace"]
|
|
372
|
+
is_question = features["is_question"]
|
|
373
|
+
|
|
374
|
+
# Large output with low code ratio → log summary
|
|
375
|
+
if tokens > self.log_threshold and code_ratio < 0.3:
|
|
376
|
+
return "log_summary"
|
|
377
|
+
|
|
378
|
+
# Has stacktrace → likely needs detailed analysis
|
|
379
|
+
if has_stacktrace and tokens > 200:
|
|
380
|
+
return "complex"
|
|
381
|
+
|
|
382
|
+
# Short question → simple QA
|
|
383
|
+
if is_question and tokens < self.simple_threshold:
|
|
384
|
+
return "simple_qa"
|
|
385
|
+
|
|
386
|
+
# Short, code-heavy → passthrough to Claude
|
|
387
|
+
if code_ratio > 0.5:
|
|
388
|
+
return "passthrough"
|
|
389
|
+
|
|
390
|
+
# Medium complexity
|
|
391
|
+
if tokens > self.simple_threshold:
|
|
392
|
+
return "complex"
|
|
393
|
+
|
|
394
|
+
# Default: let Claude handle it
|
|
395
|
+
return "passthrough"
|
|
396
|
+
|
|
397
|
+
def _get_system_prompt(self, route_class: str) -> str:
|
|
398
|
+
"""Get the system prompt for a route class."""
|
|
399
|
+
prompts = {
|
|
400
|
+
"log_summary": (
|
|
401
|
+
"You summarize logs and terminal output. Be concise. "
|
|
402
|
+
"Highlight errors, warnings, and key results. "
|
|
403
|
+
"Preserve file paths and line numbers from errors."
|
|
404
|
+
),
|
|
405
|
+
"simple_qa": (
|
|
406
|
+
"You answer short factual questions concisely. "
|
|
407
|
+
"Give direct answers without preamble."
|
|
408
|
+
),
|
|
409
|
+
"complex": (
|
|
410
|
+
"You analyze code and technical problems. "
|
|
411
|
+
"Think step by step. Be thorough but concise."
|
|
412
|
+
),
|
|
413
|
+
}
|
|
414
|
+
return prompts.get(route_class, "")
|